6.18-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 3 May 2026 12:45:19 +0000 (14:45 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 3 May 2026 12:45:19 +0000 (14:45 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 3 May 2026 12:45:19 +0000 (14:45 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 3 May 2026 12:45:19 +0000 (14:45 +0200)
diff --git a/queue-6.18/arm64-dts-ti-am62-verdin-enable-pullup-for-emmc-data-pins.patch b/queue-6.18/arm64-dts-ti-am62-verdin-enable-pullup-for-emmc-data-pins.patch

new file mode 100644 (file)

index 0000000..409e5e1
--- /dev/null
+++ b/queue-6.18/arm64-dts-ti-am62-verdin-enable-pullup-for-emmc-data-pins.patch
@@ -0,0 +1,57 @@
+From d5325810814ee995debfa0b6c4a22e0391598bef Mon Sep 17 00:00:00 2001
+From: Francesco Dolcini <francesco.dolcini@toradex.com>
+Date: Fri, 20 Mar 2026 08:30:30 +0100
+Subject: arm64: dts: ti: am62-verdin: Enable pullup for eMMC data pins
+
+From: Francesco Dolcini <francesco.dolcini@toradex.com>
+
+commit d5325810814ee995debfa0b6c4a22e0391598bef upstream.
+
+Verdin AM62 board does not have external pullups on eMMC DAT1-DAT7 pins.
+Enable internal pullups on DAT1-DAT7 considering:
+
+ - without a host-side pullup, these lines rely solely on the eMMC
+   device's internal pullup (R_int, 10kohm-150kohm per JEDEC), which may
+   exceed the recommended 50kohm max for 1.8V VCCQ
+ - JEDEC JESD84-B51 Table 200 requires host-side pullups (R_DAT,
+   10kohm-100kohm) on all data lines to prevent bus floating
+
+Fixes: 316b80246b16 ("arm64: dts: ti: add verdin am62")
+Cc: stable@vger.kernel.org
+Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>
+Link: https://patch.msgid.link/20260320073032.10427-1-francesco@dolcini.it
+Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi |   20 ++++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+--- a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi
++++ b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi
+@@ -572,16 +572,16 @@
+       /* On-module eMMC */
+       pinctrl_sdhci0: main-mmc0-default-pins {
+               pinctrl-single,pins = <
+-                      AM62X_IOPAD(0x220, PIN_INPUT, 0) /*  (Y3) MMC0_CMD  */
+-                      AM62X_IOPAD(0x218, PIN_INPUT, 0) /* (AB1) MMC0_CLK  */
+-                      AM62X_IOPAD(0x214, PIN_INPUT, 0) /* (AA2) MMC0_DAT0 */
+-                      AM62X_IOPAD(0x210, PIN_INPUT, 0) /* (AA1) MMC0_DAT1 */
+-                      AM62X_IOPAD(0x20c, PIN_INPUT, 0) /* (AA3) MMC0_DAT2 */
+-                      AM62X_IOPAD(0x208, PIN_INPUT, 0) /*  (Y4) MMC0_DAT3 */
+-                      AM62X_IOPAD(0x204, PIN_INPUT, 0) /* (AB2) MMC0_DAT4 */
+-                      AM62X_IOPAD(0x200, PIN_INPUT, 0) /* (AC1) MMC0_DAT5 */
+-                      AM62X_IOPAD(0x1fc, PIN_INPUT, 0) /* (AD2) MMC0_DAT6 */
+-                      AM62X_IOPAD(0x1f8, PIN_INPUT, 0) /* (AC2) MMC0_DAT7 */
++                      AM62X_IOPAD(0x220, PIN_INPUT,        0) /*  (Y3) MMC0_CMD  */
++                      AM62X_IOPAD(0x218, PIN_INPUT,        0) /* (AB1) MMC0_CLK  */
++                      AM62X_IOPAD(0x214, PIN_INPUT,        0) /* (AA2) MMC0_DAT0 */
++                      AM62X_IOPAD(0x210, PIN_INPUT_PULLUP, 0) /* (AA1) MMC0_DAT1 */
++                      AM62X_IOPAD(0x20c, PIN_INPUT_PULLUP, 0) /* (AA3) MMC0_DAT2 */
++                      AM62X_IOPAD(0x208, PIN_INPUT_PULLUP, 0) /*  (Y4) MMC0_DAT3 */
++                      AM62X_IOPAD(0x204, PIN_INPUT_PULLUP, 0) /* (AB2) MMC0_DAT4 */
++                      AM62X_IOPAD(0x200, PIN_INPUT_PULLUP, 0) /* (AC1) MMC0_DAT5 */
++                      AM62X_IOPAD(0x1fc, PIN_INPUT_PULLUP, 0) /* (AD2) MMC0_DAT6 */
++                      AM62X_IOPAD(0x1f8, PIN_INPUT_PULLUP, 0) /* (AC2) MMC0_DAT7 */
+               >;
+       };
+ 
diff --git a/queue-6.18/crypto-qat-fix-irq-cleanup-on-6xxx-probe-failure.patch b/queue-6.18/crypto-qat-fix-irq-cleanup-on-6xxx-probe-failure.patch

new file mode 100644 (file)

index 0000000..bc3570e
--- /dev/null
+++ b/queue-6.18/crypto-qat-fix-irq-cleanup-on-6xxx-probe-failure.patch
@@ -0,0 +1,55 @@
+From 95aed2af87ec43fa7624cc81dd13d37824ad4972 Mon Sep 17 00:00:00 2001
+From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+Date: Wed, 1 Apr 2026 10:31:11 +0100
+Subject: crypto: qat - fix IRQ cleanup on 6xxx probe failure
+
+From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+
+commit 95aed2af87ec43fa7624cc81dd13d37824ad4972 upstream.
+
+When adf_dev_up() partially completes and then fails, the IRQ
+handlers registered during adf_isr_resource_alloc() are not detached
+before the MSI-X vectors are released.
+
+Since the device is enabled with pcim_enable_device(), calling
+pci_alloc_irq_vectors() internally registers pcim_msi_release() as a
+devres action. On probe failure, devres runs pcim_msi_release() which
+calls pci_free_irq_vectors(), tearing down the MSI-X vectors while IRQ
+handlers (for example 'qat0-bundle0') are still attached. This causes
+remove_proc_entry() warnings:
+
+    [   22.163964] remove_proc_entry: removing non-empty directory 'irq/143', leaking at least 'qat0-bundle0'
+
+Moving the devm_add_action_or_reset() before adf_dev_up() does not solve
+the problem since devres runs in LIFO order and pcim_msi_release(),
+registered later inside adf_dev_up(), would still fire before
+adf_device_down().
+
+Fix by calling adf_dev_down() explicitly when adf_dev_up() fails, to
+properly free IRQ handlers before devres releases the MSI-X vectors.
+
+Fixes: 17fd7514ae68 ("crypto: qat - add qat_6xxx driver")
+Cc: stable@vger.kernel.org
+Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+Reviewed-by: Ahsan Atta <ahsan.atta@intel.com>
+Reviewed-by: Laurent M Coquerel <laurent.m.coquerel@intel.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/crypto/intel/qat/qat_6xxx/adf_drv.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/crypto/intel/qat/qat_6xxx/adf_drv.c
++++ b/drivers/crypto/intel/qat/qat_6xxx/adf_drv.c
+@@ -182,8 +182,10 @@ static int adf_probe(struct pci_dev *pde
+               return ret;
+ 
+       ret = adf_dev_up(accel_dev, true);
+-      if (ret)
++      if (ret) {
++              adf_dev_down(accel_dev);
+               return ret;
++      }
+ 
+       ret = devm_add_action_or_reset(dev, adf_device_down, accel_dev);
+       if (ret)
diff --git a/queue-6.18/crypto-talitos-fix-sec1-32k-ahash-request-limitation.patch b/queue-6.18/crypto-talitos-fix-sec1-32k-ahash-request-limitation.patch

new file mode 100644 (file)

index 0000000..56a906b
--- /dev/null
+++ b/queue-6.18/crypto-talitos-fix-sec1-32k-ahash-request-limitation.patch
@@ -0,0 +1,357 @@
+From 655ef638a2bc3cd0a9eff99a02f83cab94a3a917 Mon Sep 17 00:00:00 2001
+From: Paul Louvel <paul.louvel@bootlin.com>
+Date: Mon, 30 Mar 2026 12:28:18 +0200
+Subject: crypto: talitos - fix SEC1 32k ahash request limitation
+
+From: Paul Louvel <paul.louvel@bootlin.com>
+
+commit 655ef638a2bc3cd0a9eff99a02f83cab94a3a917 upstream.
+
+Since commit c662b043cdca ("crypto: af_alg/hash: Support
+MSG_SPLICE_PAGES"), the crypto core may pass large scatterlists spanning
+multiple pages to drivers supporting ahash operations. As a result, a
+driver can now receive large ahash requests.
+
+The SEC1 engine has a limitation where a single descriptor cannot
+process more than 32k of data. The current implementation attempts to
+handle the entire request within a single descriptor, which leads to
+failures raised by the driver:
+
+  "length exceeds h/w max limit"
+
+Address this limitation by splitting large ahash requests into multiple
+descriptors, each respecting the 32k hardware limit. This allows
+processing arbitrarily large requests.
+
+Cc: stable@vger.kernel.org
+Fixes: c662b043cdca ("crypto: af_alg/hash: Support MSG_SPLICE_PAGES")
+Signed-off-by: Paul Louvel <paul.louvel@bootlin.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/crypto/talitos.c |  216 +++++++++++++++++++++++++++++++----------------
+ 1 file changed, 147 insertions(+), 69 deletions(-)
+
+--- a/drivers/crypto/talitos.c
++++ b/drivers/crypto/talitos.c
+@@ -12,6 +12,7 @@
+  * All rights reserved.
+  */
+ 
++#include <linux/workqueue.h>
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/mod_devicetable.h>
+@@ -870,10 +871,18 @@ struct talitos_ahash_req_ctx {
+       unsigned int swinit;
+       unsigned int first;
+       unsigned int last;
++      unsigned int last_request;
+       unsigned int to_hash_later;
+       unsigned int nbuf;
+       struct scatterlist bufsl[2];
+       struct scatterlist *psrc;
++
++      struct scatterlist request_bufsl[2];
++      struct ahash_request *areq;
++      struct scatterlist *request_sl;
++      unsigned int remaining_ahash_request_bytes;
++      unsigned int current_ahash_request_bytes;
++      struct work_struct sec1_ahash_process_remaining;
+ };
+ 
+ struct talitos_export_state {
+@@ -1759,7 +1768,20 @@ static void ahash_done(struct device *de
+ 
+       kfree(edesc);
+ 
+-      ahash_request_complete(areq, err);
++      if (err) {
++              ahash_request_complete(areq, err);
++              return;
++      }
++
++      req_ctx->remaining_ahash_request_bytes -=
++              req_ctx->current_ahash_request_bytes;
++
++      if (!req_ctx->remaining_ahash_request_bytes) {
++              ahash_request_complete(areq, 0);
++              return;
++      }
++
++      schedule_work(&req_ctx->sec1_ahash_process_remaining);
+ }
+ 
+ /*
+@@ -1925,60 +1947,7 @@ static struct talitos_edesc *ahash_edesc
+                                  nbytes, 0, 0, 0, areq->base.flags, false);
+ }
+ 
+-static int ahash_init(struct ahash_request *areq)
+-{
+-      struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+-      struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
+-      struct device *dev = ctx->dev;
+-      struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+-      unsigned int size;
+-      dma_addr_t dma;
+-
+-      /* Initialize the context */
+-      req_ctx->buf_idx = 0;
+-      req_ctx->nbuf = 0;
+-      req_ctx->first = 1; /* first indicates h/w must init its context */
+-      req_ctx->swinit = 0; /* assume h/w init of context */
+-      size =  (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE)
+-                      ? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256
+-                      : TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512;
+-      req_ctx->hw_context_size = size;
+-
+-      dma = dma_map_single(dev, req_ctx->hw_context, req_ctx->hw_context_size,
+-                           DMA_TO_DEVICE);
+-      dma_unmap_single(dev, dma, req_ctx->hw_context_size, DMA_TO_DEVICE);
+-
+-      return 0;
+-}
+-
+-/*
+- * on h/w without explicit sha224 support, we initialize h/w context
+- * manually with sha224 constants, and tell it to run sha256.
+- */
+-static int ahash_init_sha224_swinit(struct ahash_request *areq)
+-{
+-      struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+-
+-      req_ctx->hw_context[0] = SHA224_H0;
+-      req_ctx->hw_context[1] = SHA224_H1;
+-      req_ctx->hw_context[2] = SHA224_H2;
+-      req_ctx->hw_context[3] = SHA224_H3;
+-      req_ctx->hw_context[4] = SHA224_H4;
+-      req_ctx->hw_context[5] = SHA224_H5;
+-      req_ctx->hw_context[6] = SHA224_H6;
+-      req_ctx->hw_context[7] = SHA224_H7;
+-
+-      /* init 64-bit count */
+-      req_ctx->hw_context[8] = 0;
+-      req_ctx->hw_context[9] = 0;
+-
+-      ahash_init(areq);
+-      req_ctx->swinit = 1;/* prevent h/w initting context with sha256 values*/
+-
+-      return 0;
+-}
+-
+-static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
++static int ahash_process_req_one(struct ahash_request *areq, unsigned int nbytes)
+ {
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+       struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
+@@ -1997,12 +1966,12 @@ static int ahash_process_req(struct ahas
+ 
+       if (!req_ctx->last && (nbytes + req_ctx->nbuf <= blocksize)) {
+               /* Buffer up to one whole block */
+-              nents = sg_nents_for_len(areq->src, nbytes);
++              nents = sg_nents_for_len(req_ctx->request_sl, nbytes);
+               if (nents < 0) {
+                       dev_err(dev, "Invalid number of src SG.\n");
+                       return nents;
+               }
+-              sg_copy_to_buffer(areq->src, nents,
++              sg_copy_to_buffer(req_ctx->request_sl, nents,
+                                 ctx_buf + req_ctx->nbuf, nbytes);
+               req_ctx->nbuf += nbytes;
+               return 0;
+@@ -2029,7 +1998,7 @@ static int ahash_process_req(struct ahas
+               sg_init_table(req_ctx->bufsl, nsg);
+               sg_set_buf(req_ctx->bufsl, ctx_buf, req_ctx->nbuf);
+               if (nsg > 1)
+-                      sg_chain(req_ctx->bufsl, 2, areq->src);
++                      sg_chain(req_ctx->bufsl, 2, req_ctx->request_sl);
+               req_ctx->psrc = req_ctx->bufsl;
+       } else if (is_sec1 && req_ctx->nbuf && req_ctx->nbuf < blocksize) {
+               int offset;
+@@ -2038,26 +2007,26 @@ static int ahash_process_req(struct ahas
+                       offset = blocksize - req_ctx->nbuf;
+               else
+                       offset = nbytes_to_hash - req_ctx->nbuf;
+-              nents = sg_nents_for_len(areq->src, offset);
++              nents = sg_nents_for_len(req_ctx->request_sl, offset);
+               if (nents < 0) {
+                       dev_err(dev, "Invalid number of src SG.\n");
+                       return nents;
+               }
+-              sg_copy_to_buffer(areq->src, nents,
++              sg_copy_to_buffer(req_ctx->request_sl, nents,
+                                 ctx_buf + req_ctx->nbuf, offset);
+               req_ctx->nbuf += offset;
+-              req_ctx->psrc = scatterwalk_ffwd(req_ctx->bufsl, areq->src,
++              req_ctx->psrc = scatterwalk_ffwd(req_ctx->bufsl, req_ctx->request_sl,
+                                                offset);
+       } else
+-              req_ctx->psrc = areq->src;
++              req_ctx->psrc = req_ctx->request_sl;
+ 
+       if (to_hash_later) {
+-              nents = sg_nents_for_len(areq->src, nbytes);
++              nents = sg_nents_for_len(req_ctx->request_sl, nbytes);
+               if (nents < 0) {
+                       dev_err(dev, "Invalid number of src SG.\n");
+                       return nents;
+               }
+-              sg_pcopy_to_buffer(areq->src, nents,
++              sg_pcopy_to_buffer(req_ctx->request_sl, nents,
+                                  req_ctx->buf[(req_ctx->buf_idx + 1) & 1],
+                                     to_hash_later,
+                                     nbytes - to_hash_later);
+@@ -2065,7 +2034,7 @@ static int ahash_process_req(struct ahas
+       req_ctx->to_hash_later = to_hash_later;
+ 
+       /* Allocate extended descriptor */
+-      edesc = ahash_edesc_alloc(areq, nbytes_to_hash);
++      edesc = ahash_edesc_alloc(req_ctx->areq, nbytes_to_hash);
+       if (IS_ERR(edesc))
+               return PTR_ERR(edesc);
+ 
+@@ -2087,14 +2056,123 @@ static int ahash_process_req(struct ahas
+       if (ctx->keylen && (req_ctx->first || req_ctx->last))
+               edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_HMAC;
+ 
+-      return common_nonsnoop_hash(edesc, areq, nbytes_to_hash, ahash_done);
++      return common_nonsnoop_hash(edesc, req_ctx->areq, nbytes_to_hash, ahash_done);
+ }
+ 
+-static int ahash_update(struct ahash_request *areq)
++static void sec1_ahash_process_remaining(struct work_struct *work)
+ {
++      struct talitos_ahash_req_ctx *req_ctx =
++              container_of(work, struct talitos_ahash_req_ctx,
++                           sec1_ahash_process_remaining);
++      int err = 0;
++
++      req_ctx->request_sl = scatterwalk_ffwd(req_ctx->request_bufsl,
++                                             req_ctx->request_sl, TALITOS1_MAX_DATA_LEN);
++
++      if (req_ctx->remaining_ahash_request_bytes > TALITOS1_MAX_DATA_LEN)
++              req_ctx->current_ahash_request_bytes = TALITOS1_MAX_DATA_LEN;
++      else {
++              req_ctx->current_ahash_request_bytes =
++                      req_ctx->remaining_ahash_request_bytes;
++
++              if (req_ctx->last_request)
++                      req_ctx->last = 1;
++      }
++
++      err = ahash_process_req_one(req_ctx->areq,
++                                  req_ctx->current_ahash_request_bytes);
++
++      if (err != -EINPROGRESS)
++              ahash_request_complete(req_ctx->areq, err);
++}
++
++static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
++{
++      struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
++      struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
++      struct device *dev = ctx->dev;
++      struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
++      struct talitos_private *priv = dev_get_drvdata(dev);
++      bool is_sec1 = has_ftr_sec1(priv);
++
++      req_ctx->areq = areq;
++      req_ctx->request_sl = areq->src;
++      req_ctx->remaining_ahash_request_bytes = nbytes;
++
++      if (is_sec1) {
++              if (nbytes > TALITOS1_MAX_DATA_LEN)
++                      nbytes = TALITOS1_MAX_DATA_LEN;
++              else if (req_ctx->last_request)
++                      req_ctx->last = 1;
++      }
++
++      req_ctx->current_ahash_request_bytes = nbytes;
++
++      return ahash_process_req_one(req_ctx->areq,
++                                   req_ctx->current_ahash_request_bytes);
++}
++
++static int ahash_init(struct ahash_request *areq)
++{
++      struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
++      struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
++      struct device *dev = ctx->dev;
+       struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
++      unsigned int size;
++      dma_addr_t dma;
+ 
++      /* Initialize the context */
++      req_ctx->buf_idx = 0;
++      req_ctx->nbuf = 0;
++      req_ctx->first = 1; /* first indicates h/w must init its context */
++      req_ctx->swinit = 0; /* assume h/w init of context */
++      size =  (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE)
++                      ? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256
++                      : TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512;
++      req_ctx->hw_context_size = size;
++      req_ctx->last_request = 0;
+       req_ctx->last = 0;
++      INIT_WORK(&req_ctx->sec1_ahash_process_remaining, sec1_ahash_process_remaining);
++
++      dma = dma_map_single(dev, req_ctx->hw_context, req_ctx->hw_context_size,
++                           DMA_TO_DEVICE);
++      dma_unmap_single(dev, dma, req_ctx->hw_context_size, DMA_TO_DEVICE);
++
++      return 0;
++}
++
++/*
++ * on h/w without explicit sha224 support, we initialize h/w context
++ * manually with sha224 constants, and tell it to run sha256.
++ */
++static int ahash_init_sha224_swinit(struct ahash_request *areq)
++{
++      struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
++
++      req_ctx->hw_context[0] = SHA224_H0;
++      req_ctx->hw_context[1] = SHA224_H1;
++      req_ctx->hw_context[2] = SHA224_H2;
++      req_ctx->hw_context[3] = SHA224_H3;
++      req_ctx->hw_context[4] = SHA224_H4;
++      req_ctx->hw_context[5] = SHA224_H5;
++      req_ctx->hw_context[6] = SHA224_H6;
++      req_ctx->hw_context[7] = SHA224_H7;
++
++      /* init 64-bit count */
++      req_ctx->hw_context[8] = 0;
++      req_ctx->hw_context[9] = 0;
++
++      ahash_init(areq);
++      req_ctx->swinit = 1;/* prevent h/w initting context with sha256 values*/
++
++      return 0;
++}
++
++static int ahash_update(struct ahash_request *areq)
++{
++      struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
++
++      req_ctx->last_request = 0;
+ 
+       return ahash_process_req(areq, areq->nbytes);
+ }
+@@ -2103,7 +2181,7 @@ static int ahash_final(struct ahash_requ
+ {
+       struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+ 
+-      req_ctx->last = 1;
++      req_ctx->last_request = 1;
+ 
+       return ahash_process_req(areq, 0);
+ }
+@@ -2112,7 +2190,7 @@ static int ahash_finup(struct ahash_requ
+ {
+       struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+ 
+-      req_ctx->last = 1;
++      req_ctx->last_request = 1;
+ 
+       return ahash_process_req(areq, areq->nbytes);
+ }
diff --git a/queue-6.18/crypto-talitos-rename-first-last-to-first_desc-last_desc.patch b/queue-6.18/crypto-talitos-rename-first-last-to-first_desc-last_desc.patch

new file mode 100644 (file)

index 0000000..7d26de3
--- /dev/null
+++ b/queue-6.18/crypto-talitos-rename-first-last-to-first_desc-last_desc.patch
@@ -0,0 +1,199 @@
+From a1b80018b8cec27fc06a8b04a7f8b5f6cfe86eae Mon Sep 17 00:00:00 2001
+From: Paul Louvel <paul.louvel@bootlin.com>
+Date: Mon, 30 Mar 2026 12:28:19 +0200
+Subject: crypto: talitos - rename first/last to first_desc/last_desc
+
+From: Paul Louvel <paul.louvel@bootlin.com>
+
+commit a1b80018b8cec27fc06a8b04a7f8b5f6cfe86eae upstream.
+
+Previous commit introduces a new last_request variable in the context
+structure.
+
+Renaming the first/last existing member variable in the context
+structure to improve readability.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Paul Louvel <paul.louvel@bootlin.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/crypto/talitos.c |   46 +++++++++++++++++++++++-----------------------
+ 1 file changed, 23 insertions(+), 23 deletions(-)
+
+--- a/drivers/crypto/talitos.c
++++ b/drivers/crypto/talitos.c
+@@ -869,8 +869,8 @@ struct talitos_ahash_req_ctx {
+       u8 buf[2][HASH_MAX_BLOCK_SIZE];
+       int buf_idx;
+       unsigned int swinit;
+-      unsigned int first;
+-      unsigned int last;
++      unsigned int first_desc;
++      unsigned int last_desc;
+       unsigned int last_request;
+       unsigned int to_hash_later;
+       unsigned int nbuf;
+@@ -889,8 +889,8 @@ struct talitos_export_state {
+       u32 hw_context[TALITOS_MDEU_MAX_CONTEXT_SIZE / sizeof(u32)];
+       u8 buf[HASH_MAX_BLOCK_SIZE];
+       unsigned int swinit;
+-      unsigned int first;
+-      unsigned int last;
++      unsigned int first_desc;
++      unsigned int last_desc;
+       unsigned int to_hash_later;
+       unsigned int nbuf;
+ };
+@@ -1722,7 +1722,7 @@ static void common_nonsnoop_hash_unmap(s
+       if (desc->next_desc &&
+           desc->ptr[5].ptr != desc2->ptr[5].ptr)
+               unmap_single_talitos_ptr(dev, &desc2->ptr[5], DMA_FROM_DEVICE);
+-      if (req_ctx->last)
++      if (req_ctx->last_desc)
+               memcpy(areq->result, req_ctx->hw_context,
+                      crypto_ahash_digestsize(tfm));
+ 
+@@ -1759,7 +1759,7 @@ static void ahash_done(struct device *de
+                container_of(desc, struct talitos_edesc, desc);
+       struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+ 
+-      if (!req_ctx->last && req_ctx->to_hash_later) {
++      if (!req_ctx->last_desc && req_ctx->to_hash_later) {
+               /* Position any partial block for next update/final/finup */
+               req_ctx->buf_idx = (req_ctx->buf_idx + 1) & 1;
+               req_ctx->nbuf = req_ctx->to_hash_later;
+@@ -1825,7 +1825,7 @@ static int common_nonsnoop_hash(struct t
+       /* first DWORD empty */
+ 
+       /* hash context in */
+-      if (!req_ctx->first || req_ctx->swinit) {
++      if (!req_ctx->first_desc || req_ctx->swinit) {
+               map_single_talitos_ptr_nosync(dev, &desc->ptr[1],
+                                             req_ctx->hw_context_size,
+                                             req_ctx->hw_context,
+@@ -1833,7 +1833,7 @@ static int common_nonsnoop_hash(struct t
+               req_ctx->swinit = 0;
+       }
+       /* Indicate next op is not the first. */
+-      req_ctx->first = 0;
++      req_ctx->first_desc = 0;
+ 
+       /* HMAC key */
+       if (ctx->keylen)
+@@ -1866,7 +1866,7 @@ static int common_nonsnoop_hash(struct t
+       /* fifth DWORD empty */
+ 
+       /* hash/HMAC out -or- hash context out */
+-      if (req_ctx->last)
++      if (req_ctx->last_desc)
+               map_single_talitos_ptr(dev, &desc->ptr[5],
+                                      crypto_ahash_digestsize(tfm),
+                                      req_ctx->hw_context, DMA_FROM_DEVICE);
+@@ -1908,7 +1908,7 @@ static int common_nonsnoop_hash(struct t
+               if (sg_count > 1)
+                       sync_needed = true;
+               copy_talitos_ptr(&desc2->ptr[5], &desc->ptr[5], is_sec1);
+-              if (req_ctx->last)
++              if (req_ctx->last_desc)
+                       map_single_talitos_ptr_nosync(dev, &desc->ptr[5],
+                                                     req_ctx->hw_context_size,
+                                                     req_ctx->hw_context,
+@@ -1964,7 +1964,7 @@ static int ahash_process_req_one(struct
+       bool is_sec1 = has_ftr_sec1(priv);
+       u8 *ctx_buf = req_ctx->buf[req_ctx->buf_idx];
+ 
+-      if (!req_ctx->last && (nbytes + req_ctx->nbuf <= blocksize)) {
++      if (!req_ctx->last_desc && (nbytes + req_ctx->nbuf <= blocksize)) {
+               /* Buffer up to one whole block */
+               nents = sg_nents_for_len(req_ctx->request_sl, nbytes);
+               if (nents < 0) {
+@@ -1981,7 +1981,7 @@ static int ahash_process_req_one(struct
+       nbytes_to_hash = nbytes + req_ctx->nbuf;
+       to_hash_later = nbytes_to_hash & (blocksize - 1);
+ 
+-      if (req_ctx->last)
++      if (req_ctx->last_desc)
+               to_hash_later = 0;
+       else if (to_hash_later)
+               /* There is a partial block. Hash the full block(s) now */
+@@ -2041,19 +2041,19 @@ static int ahash_process_req_one(struct
+       edesc->desc.hdr = ctx->desc_hdr_template;
+ 
+       /* On last one, request SEC to pad; otherwise continue */
+-      if (req_ctx->last)
++      if (req_ctx->last_desc)
+               edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_PAD;
+       else
+               edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_CONT;
+ 
+       /* request SEC to INIT hash. */
+-      if (req_ctx->first && !req_ctx->swinit)
++      if (req_ctx->first_desc && !req_ctx->swinit)
+               edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_INIT;
+ 
+       /* When the tfm context has a keylen, it's an HMAC.
+        * A first or last (ie. not middle) descriptor must request HMAC.
+        */
+-      if (ctx->keylen && (req_ctx->first || req_ctx->last))
++      if (ctx->keylen && (req_ctx->first_desc || req_ctx->last_desc))
+               edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_HMAC;
+ 
+       return common_nonsnoop_hash(edesc, req_ctx->areq, nbytes_to_hash, ahash_done);
+@@ -2076,7 +2076,7 @@ static void sec1_ahash_process_remaining
+                       req_ctx->remaining_ahash_request_bytes;
+ 
+               if (req_ctx->last_request)
+-                      req_ctx->last = 1;
++                      req_ctx->last_desc = 1;
+       }
+ 
+       err = ahash_process_req_one(req_ctx->areq,
+@@ -2103,7 +2103,7 @@ static int ahash_process_req(struct ahas
+               if (nbytes > TALITOS1_MAX_DATA_LEN)
+                       nbytes = TALITOS1_MAX_DATA_LEN;
+               else if (req_ctx->last_request)
+-                      req_ctx->last = 1;
++                      req_ctx->last_desc = 1;
+       }
+ 
+       req_ctx->current_ahash_request_bytes = nbytes;
+@@ -2124,14 +2124,14 @@ static int ahash_init(struct ahash_reque
+       /* Initialize the context */
+       req_ctx->buf_idx = 0;
+       req_ctx->nbuf = 0;
+-      req_ctx->first = 1; /* first indicates h/w must init its context */
++      req_ctx->first_desc = 1; /* first_desc indicates h/w must init its context */
+       req_ctx->swinit = 0; /* assume h/w init of context */
+       size =  (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE)
+                       ? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256
+                       : TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512;
+       req_ctx->hw_context_size = size;
+       req_ctx->last_request = 0;
+-      req_ctx->last = 0;
++      req_ctx->last_desc = 0;
+       INIT_WORK(&req_ctx->sec1_ahash_process_remaining, sec1_ahash_process_remaining);
+ 
+       dma = dma_map_single(dev, req_ctx->hw_context, req_ctx->hw_context_size,
+@@ -2224,8 +2224,8 @@ static int ahash_export(struct ahash_req
+              req_ctx->hw_context_size);
+       memcpy(export->buf, req_ctx->buf[req_ctx->buf_idx], req_ctx->nbuf);
+       export->swinit = req_ctx->swinit;
+-      export->first = req_ctx->first;
+-      export->last = req_ctx->last;
++      export->first_desc = req_ctx->first_desc;
++      export->last_desc = req_ctx->last_desc;
+       export->to_hash_later = req_ctx->to_hash_later;
+       export->nbuf = req_ctx->nbuf;
+ 
+@@ -2250,8 +2250,8 @@ static int ahash_import(struct ahash_req
+       memcpy(req_ctx->hw_context, export->hw_context, size);
+       memcpy(req_ctx->buf[0], export->buf, export->nbuf);
+       req_ctx->swinit = export->swinit;
+-      req_ctx->first = export->first;
+-      req_ctx->last = export->last;
++      req_ctx->first_desc = export->first_desc;
++      req_ctx->last_desc = export->last_desc;
+       req_ctx->to_hash_later = export->to_hash_later;
+       req_ctx->nbuf = export->nbuf;
+ 
diff --git a/queue-6.18/firmware-google-framebuffer-do-not-unregister-platform-device.patch b/queue-6.18/firmware-google-framebuffer-do-not-unregister-platform-device.patch

new file mode 100644 (file)

index 0000000..bb50115
--- /dev/null
+++ b/queue-6.18/firmware-google-framebuffer-do-not-unregister-platform-device.patch
@@ -0,0 +1,69 @@
+From 5cd28bd28c8ce426b56ce4230dbd17537181d5ad Mon Sep 17 00:00:00 2001
+From: Thomas Zimmermann <tzimmermann@suse.de>
+Date: Tue, 17 Feb 2026 16:56:11 +0100
+Subject: firmware: google: framebuffer: Do not unregister platform device
+
+From: Thomas Zimmermann <tzimmermann@suse.de>
+
+commit 5cd28bd28c8ce426b56ce4230dbd17537181d5ad upstream.
+
+The native driver takes over the framebuffer aperture by removing the
+system- framebuffer platform device. Afterwards the pointer in drvdata
+is dangling. Remove the entire logic around drvdata and let the kernel's
+aperture helpers handle this. The platform device depends on the native
+hardware device instead of the coreboot device anyway.
+
+When commit 851b4c14532d ("firmware: coreboot: Add coreboot framebuffer
+driver") added the coreboot framebuffer code, the kernel did not support
+device-based aperture management. Instead native driviers only removed
+the conflicting fbdev device. At that point, unregistering the framebuffer
+device most likely worked correctly. It was definitely broken after
+commit d9702b2a2171 ("fbdev/simplefb: Do not use struct
+fb_info.apertures"). So take this commit for the Fixes tag. Earlier
+releases might work depending on the native hardware driver.
+
+Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
+Fixes: d9702b2a2171 ("fbdev/simplefb: Do not use struct fb_info.apertures")
+Acked-by: Tzung-Bi Shih <tzungbi@kernel.org>
+Acked-by: Julius Werner <jwerner@chromium.org>
+Cc: Thomas Zimmermann <tzimmermann@suse.de>
+Cc: Javier Martinez Canillas <javierm@redhat.com>
+Cc: Hans de Goede <hansg@kernel.org>
+Cc: linux-fbdev@vger.kernel.org
+Cc: <stable@vger.kernel.org> # v6.3+
+Link: https://patch.msgid.link/20260217155836.96267-2-tzimmermann@suse.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/google/framebuffer-coreboot.c |   10 ----------
+ 1 file changed, 10 deletions(-)
+
+--- a/drivers/firmware/google/framebuffer-coreboot.c
++++ b/drivers/firmware/google/framebuffer-coreboot.c
+@@ -81,19 +81,10 @@ static int framebuffer_probe(struct core
+                                                sizeof(pdata));
+       if (IS_ERR(pdev))
+               pr_warn("coreboot: could not register framebuffer\n");
+-      else
+-              dev_set_drvdata(&dev->dev, pdev);
+ 
+       return PTR_ERR_OR_ZERO(pdev);
+ }
+ 
+-static void framebuffer_remove(struct coreboot_device *dev)
+-{
+-      struct platform_device *pdev = dev_get_drvdata(&dev->dev);
+-
+-      platform_device_unregister(pdev);
+-}
+-
+ static const struct coreboot_device_id framebuffer_ids[] = {
+       { .tag = CB_TAG_FRAMEBUFFER },
+       { /* sentinel */ }
+@@ -102,7 +93,6 @@ MODULE_DEVICE_TABLE(coreboot, framebuffe
+ 
+ static struct coreboot_driver framebuffer_driver = {
+       .probe = framebuffer_probe,
+-      .remove = framebuffer_remove,
+       .drv = {
+               .name = "framebuffer",
+       },
diff --git a/queue-6.18/hwmon-pt5161l-fix-bugs-in-pt5161l_read_block_data.patch b/queue-6.18/hwmon-pt5161l-fix-bugs-in-pt5161l_read_block_data.patch

new file mode 100644 (file)

index 0000000..bf439e9
--- /dev/null
+++ b/queue-6.18/hwmon-pt5161l-fix-bugs-in-pt5161l_read_block_data.patch
@@ -0,0 +1,58 @@
+From 24c73e93d6a756e1b8626bb259d2e07c5b89b370 Mon Sep 17 00:00:00 2001
+From: Sanman Pradhan <psanman@juniper.net>
+Date: Fri, 10 Apr 2026 00:25:55 +0000
+Subject: hwmon: (pt5161l) Fix bugs in pt5161l_read_block_data()
+
+From: Sanman Pradhan <psanman@juniper.net>
+
+commit 24c73e93d6a756e1b8626bb259d2e07c5b89b370 upstream.
+
+Fix two bugs in pt5161l_read_block_data():
+
+1. Buffer overrun: The local buffer rbuf is declared as u8 rbuf[24],
+   but i2c_smbus_read_block_data() can return up to
+   I2C_SMBUS_BLOCK_MAX (32) bytes. The i2c-core copies the data into
+   the caller's buffer before the return value can be checked, so
+   the post-read length validation does not prevent a stack overrun
+   if a device returns more than 24 bytes. Resize the buffer to
+   I2C_SMBUS_BLOCK_MAX.
+
+2. Unexpected positive return on length mismatch: When all three
+   retries are exhausted because the device returns data with an
+   unexpected length, i2c_smbus_read_block_data() returns a positive
+   byte count. The function returns this directly, and callers treat
+   any non-negative return as success, processing stale or incomplete
+   buffer contents. Return -EIO when retries are exhausted with a
+   positive return value, preserving the negative error code on I2C
+   failure.
+
+Fixes: 1b2ca93cd0592 ("hwmon: Add driver for Astera Labs PT5161L retimer")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sanman Pradhan <psanman@juniper.net>
+Link: https://lore.kernel.org/r/20260410002549.424162-1-sanman.pradhan@hpe.com
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/hwmon/pt5161l.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/hwmon/pt5161l.c
++++ b/drivers/hwmon/pt5161l.c
+@@ -121,7 +121,7 @@ static int pt5161l_read_block_data(struc
+       int ret, tries;
+       u8 remain_len = len;
+       u8 curr_len;
+-      u8 wbuf[16], rbuf[24];
++      u8 wbuf[16], rbuf[I2C_SMBUS_BLOCK_MAX];
+       u8 cmd = 0x08; /* [7]:pec_en, [4:2]:func, [1]:start, [0]:end */
+       u8 config = 0x00; /* [6]:cfg_type, [4:1]:burst_len, [0]:address bit16 */
+ 
+@@ -151,7 +151,7 @@ static int pt5161l_read_block_data(struc
+                               break;
+               }
+               if (tries >= 3)
+-                      return ret;
++                      return ret < 0 ? ret : -EIO;
+ 
+               memcpy(val, rbuf, curr_len);
+               val += curr_len;
diff --git a/queue-6.18/kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch b/queue-6.18/kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch

new file mode 100644 (file)

index 0000000..e92dcf7
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch
@@ -0,0 +1,69 @@
+From 96bd3e76a171a8e21a6387e54e4c420a81968492 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:10 +0000
+Subject: KVM: nSVM: Add missing consistency check for EFER, CR0, CR4, and CS
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 96bd3e76a171a8e21a6387e54e4c420a81968492 upstream.
+
+According to the APM Volume #2, 15.5, Canonicalization and Consistency
+Checks (24593—Rev. 3.42—March 2024), the following condition (among
+others) results in a #VMEXIT with VMEXIT_INVALID (aka SVM_EXIT_ERR):
+
+  EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
+
+In the list of consistency checks done when EFER.LME and CR0.PG are set,
+add a check that CS.L and CS.D are not both set, after the existing
+check that CR4.PAE is set.
+
+This is functionally a nop because the nested VMRUN results in
+SVM_EXIT_ERR in HW, which is forwarded to L1, but KVM makes all
+consistency checks before a VMRUN is actually attempted.
+
+Fixes: 3d6368ef580a ("KVM: SVM: Add VMRUN handler")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-17-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    6 ++++++
+ arch/x86/kvm/svm/svm.h    |    1 +
+ 2 files changed, 7 insertions(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -377,6 +377,10 @@ static bool __nested_vmcb_check_save(str
+                   CC(!(save->cr0 & X86_CR0_PE)) ||
+                   CC(!kvm_vcpu_is_legal_cr3(vcpu, save->cr3)))
+                       return false;
++
++              if (CC((save->cs.attrib & SVM_SELECTOR_L_MASK) &&
++                     (save->cs.attrib & SVM_SELECTOR_DB_MASK)))
++                      return false;
+       }
+ 
+       /* Note, SVM doesn't have any additional restrictions on CR4. */
+@@ -465,6 +469,8 @@ static void __nested_copy_vmcb_save_to_c
+        * Copy only fields that are validated, as we need them
+        * to avoid TOC/TOU races.
+        */
++      to->cs = from->cs;
++
+       to->efer = from->efer;
+       to->cr0 = from->cr0;
+       to->cr3 = from->cr3;
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -142,6 +142,7 @@ struct kvm_vmcb_info {
+ };
+ 
+ struct vmcb_save_area_cached {
++      struct vmcb_seg cs;
+       u64 efer;
+       u64 cr4;
+       u64 cr3;
diff --git a/queue-6.18/kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch b/queue-6.18/kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch

new file mode 100644 (file)

index 0000000..e11f4af
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch
@@ -0,0 +1,49 @@
+From b71138fcc362c67ebe66747bb22cb4e6b4d6a651 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:09 +0000
+Subject: KVM: nSVM: Add missing consistency check for nCR3 validity
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit b71138fcc362c67ebe66747bb22cb4e6b4d6a651 upstream.
+
+From the APM Volume #2, 15.25.4 (24593—Rev. 3.42—March 2024):
+
+  When VMRUN is executed with nested paging enabled (NP_ENABLE = 1), the
+  following conditions are considered illegal state combinations, in
+  addition to those mentioned in “Canonicalization and Consistency Checks”:
+      • Any MBZ bit of nCR3 is set.
+      • Any G_PAT.PA field has an unsupported type encoding or any
+        reserved field in G_PAT has a nonzero value.
+
+Add the consistency check for nCR3 being a legal GPA with no MBZ bits
+set.  Note, the G_PAT.PA check is being handled separately[*].
+
+Link: https://lore.kernel.org/kvm/20260205214326.1029278-3-jmattson@google.com [*]
+Fixes: 4b16184c1cca ("KVM: SVM: Initialize Nested Nested MMU context on VMRUN")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-16-yosry@kernel.org
+[sean: capture everything in CC(), massage changelog formatting]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -335,6 +335,10 @@ static bool __nested_vmcb_check_controls
+       if (CC(control->asid == 0))
+               return false;
+ 
++      if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
++             !kvm_vcpu_is_legal_gpa(vcpu, control->nested_cr3)))
++              return false;
++
+       if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa,
+                                          MSRPM_SIZE)))
+               return false;
diff --git a/queue-6.18/kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch b/queue-6.18/kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch

new file mode 100644 (file)

index 0000000..f01c973
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch
@@ -0,0 +1,41 @@
+From 01ddcdc55e097ca38c28ae656711b8e6d1df71f8 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:33:59 +0000
+Subject: KVM: nSVM: Always inject a #GP if mapping VMCB12 fails on nested VMRUN
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 01ddcdc55e097ca38c28ae656711b8e6d1df71f8 upstream.
+
+nested_svm_vmrun() currently only injects a #GP if kvm_vcpu_map() fails
+with -EINVAL. But it could also fail with -EFAULT if creating a host
+mapping failed. Inject a #GP in all cases, no reason to treat failure
+modes differently.
+
+Fixes: 8c5fbf1a7231 ("KVM/nSVM: Use the new mapping API for mapping guest memory")
+CC: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-6-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -966,12 +966,9 @@ int nested_svm_vmrun(struct kvm_vcpu *vc
+       }
+ 
+       vmcb12_gpa = svm->vmcb->save.rax;
+-      ret = kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map);
+-      if (ret == -EINVAL) {
++      if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map)) {
+               kvm_inject_gp(vcpu, 0);
+               return 1;
+-      } else if (ret) {
+-              return kvm_skip_emulated_instruction(vcpu);
+       }
+ 
+       ret = kvm_skip_emulated_instruction(vcpu);
diff --git a/queue-6.18/kvm-nsvm-always-intercept-vmmcall-when-l2-is-active.patch b/queue-6.18/kvm-nsvm-always-intercept-vmmcall-when-l2-is-active.patch

new file mode 100644 (file)

index 0000000..8ae44da
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-always-intercept-vmmcall-when-l2-is-active.patch
@@ -0,0 +1,65 @@
+From 33d3617a52f9930d22b2af59f813c2fbdefa6dd5 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 3 Mar 2026 16:22:23 -0800
+Subject: KVM: nSVM: Always intercept VMMCALL when L2 is active
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 33d3617a52f9930d22b2af59f813c2fbdefa6dd5 upstream.
+
+Always intercept VMMCALL now that KVM properly synthesizes a #UD as
+appropriate, i.e. when L1 doesn't want to intercept VMMCALL, to avoid
+putting L2 into an infinite #UD loop if KVM_X86_QUIRK_FIX_HYPERCALL_INSN
+is enabled.
+
+By letting L2 execute VMMCALL natively and thus #UD, for all intents and
+purposes KVM morphs the VMMCALL intercept into a #UD intercept (KVM always
+intercepts #UD).  When the hypercall quirk is enabled, KVM "emulates"
+VMMCALL in response to the #UD by trying to fixup the opcode to the "right"
+vendor, then restarts the guest, without skipping the VMMCALL.  As a
+result, the guest sees an endless stream of #UDs since it's already
+executing the correct vendor hypercall instruction, i.e. the emulator
+doesn't anticipate that the #UD could be due to lack of interception, as
+opposed to a truly undefined opcode.
+
+Fixes: 0d945bd93511 ("KVM: SVM: Don't allow nested guest to VMMCALL into host")
+Cc: stable@vger.kernel.org
+Reviewed-by: Yosry Ahmed <yosry@kernel.org>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Link: https://patch.msgid.link/20260304002223.1105129-3-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/hyperv.h |    4 ----
+ arch/x86/kvm/svm/nested.c |    7 -------
+ 2 files changed, 11 deletions(-)
+
+--- a/arch/x86/kvm/svm/hyperv.h
++++ b/arch/x86/kvm/svm/hyperv.h
+@@ -51,10 +51,6 @@ static inline bool nested_svm_is_l2_tlb_
+ void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu);
+ #else /* CONFIG_KVM_HYPERV */
+ static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu) {}
+-static inline bool nested_svm_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu)
+-{
+-      return false;
+-}
+ static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu)
+ {
+       return false;
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -159,13 +159,6 @@ void recalc_intercepts(struct vcpu_svm *
+                       vmcb_clr_intercept(c, INTERCEPT_VINTR);
+       }
+ 
+-      /*
+-       * We want to see VMMCALLs from a nested guest only when Hyper-V L2 TLB
+-       * flush feature is enabled.
+-       */
+-      if (!nested_svm_l2_tlb_flush_enabled(&svm->vcpu))
+-              vmcb_clr_intercept(c, INTERCEPT_VMMCALL);
+-
+       for (i = 0; i < MAX_INTERCEPT; i++)
+               c->intercepts[i] |= g->intercepts[i];
+ 
diff --git a/queue-6.18/kvm-nsvm-always-use-nextrip-as-vmcb02-s-nextrip-after-first-l2-vmrun.patch b/queue-6.18/kvm-nsvm-always-use-nextrip-as-vmcb02-s-nextrip-after-first-l2-vmrun.patch

new file mode 100644 (file)

index 0000000..c6d1fa6
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-always-use-nextrip-as-vmcb02-s-nextrip-after-first-l2-vmrun.patch
@@ -0,0 +1,76 @@
+From 8d397582f6b5e9fbcf09781c7c934b4910e94a50 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Wed, 25 Feb 2026 00:59:47 +0000
+Subject: KVM: nSVM: Always use NextRIP as vmcb02's NextRIP after first L2 VMRUN
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 8d397582f6b5e9fbcf09781c7c934b4910e94a50 upstream.
+
+For guests with NRIPS disabled, L1 does not provide NextRIP when running
+an L2 with an injected soft interrupt, instead it advances the current RIP
+before running it. KVM uses the current RIP as the NextRIP in vmcb02 to
+emulate a CPU without NRIPS.
+
+However, after L2 runs the first time, NextRIP will be updated by the CPU
+and/or KVM, and the current RIP is no longer the correct value to use in
+vmcb02.  Hence, after save/restore, use the current RIP if and only if a
+nested run is pending, otherwise use NextRIP.  Give soft_int_next_rip the
+same treatment, as it's the same logic, just for a narrower use case.
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260225005950.3739782-6-yosry@kernel.org
+[sean: give soft_int_next_rip the same treatment]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |   28 ++++++++++++++++++----------
+ 1 file changed, 18 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -814,24 +814,32 @@ static void nested_vmcb02_prepare_contro
+       vmcb02->control.event_inj_err       = svm->nested.ctl.event_inj_err;
+ 
+       /*
+-       * next_rip is consumed on VMRUN as the return address pushed on the
++       * NextRIP is consumed on VMRUN as the return address pushed on the
+        * stack for injected soft exceptions/interrupts.  If nrips is exposed
+-       * to L1, take it verbatim from vmcb12.  If nrips is supported in
+-       * hardware but not exposed to L1, stuff the actual L2 RIP to emulate
+-       * what a nrips=0 CPU would do (L1 is responsible for advancing RIP
+-       * prior to injecting the event).
++       * to L1, take it verbatim from vmcb12.
++       *
++       * If nrips is supported in hardware but not exposed to L1, stuff the
++       * actual L2 RIP to emulate what a nrips=0 CPU would do (L1 is
++       * responsible for advancing RIP prior to injecting the event). This is
++       * only the case for the first L2 run after VMRUN. After that (e.g.
++       * during save/restore), NextRIP is updated by the CPU and/or KVM, and
++       * the value of the L2 RIP from vmcb12 should not be used.
+        */
+-      if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
+-              vmcb02->control.next_rip    = svm->nested.ctl.next_rip;
+-      else if (boot_cpu_has(X86_FEATURE_NRIPS))
+-              vmcb02->control.next_rip    = vmcb12_rip;
++      if (boot_cpu_has(X86_FEATURE_NRIPS)) {
++              if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) ||
++                  !svm->nested.nested_run_pending)
++                      vmcb02->control.next_rip    = svm->nested.ctl.next_rip;
++              else
++                      vmcb02->control.next_rip    = vmcb12_rip;
++      }
+ 
+       svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj);
+       if (is_evtinj_soft(vmcb02->control.event_inj)) {
+               svm->soft_int_injected = true;
+               svm->soft_int_csbase = vmcb12_csbase;
+               svm->soft_int_old_rip = vmcb12_rip;
+-              if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
++              if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) ||
++                  !svm->nested.nested_run_pending)
+                       svm->soft_int_next_rip = svm->nested.ctl.next_rip;
+               else
+                       svm->soft_int_next_rip = vmcb12_rip;
diff --git a/queue-6.18/kvm-nsvm-avoid-clearing-vmcb_lbr-in-vmcb12.patch b/queue-6.18/kvm-nsvm-avoid-clearing-vmcb_lbr-in-vmcb12.patch

new file mode 100644 (file)

index 0000000..9b906c8
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-avoid-clearing-vmcb_lbr-in-vmcb12.patch
@@ -0,0 +1,65 @@
+From b53ab5167a81537777ac780bbd93d32613aa3bda Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:33:55 +0000
+Subject: KVM: nSVM: Avoid clearing VMCB_LBR in vmcb12
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit b53ab5167a81537777ac780bbd93d32613aa3bda upstream.
+
+svm_copy_lbrs() always marks VMCB_LBR dirty in the destination VMCB.
+However, nested_svm_vmexit() uses it to copy LBRs to vmcb12, and
+clearing clean bits in vmcb12 is not architecturally defined.
+
+Move vmcb_mark_dirty() to callers and drop it for vmcb12.
+
+This also facilitates incoming refactoring that does not pass the entire
+VMCB to svm_copy_lbrs().
+
+Fixes: d20c796ca370 ("KVM: x86: nSVM: implement nested LBR virtualization")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-2-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    7 +++++--
+ arch/x86/kvm/svm/svm.c    |    2 --
+ 2 files changed, 5 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -684,6 +684,7 @@ static void nested_vmcb02_prepare_save(s
+       } else {
+               svm_copy_lbrs(vmcb02, vmcb01);
+       }
++      vmcb_mark_dirty(vmcb02, VMCB_LBR);
+       svm_update_lbrv(&svm->vcpu);
+ }
+ 
+@@ -1188,10 +1189,12 @@ int nested_svm_vmexit(struct vcpu_svm *s
+               kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+ 
+       if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
+-                   (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)))
++                   (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+               svm_copy_lbrs(vmcb12, vmcb02);
+-      else
++      } else {
+               svm_copy_lbrs(vmcb01, vmcb02);
++              vmcb_mark_dirty(vmcb01, VMCB_LBR);
++      }
+ 
+       svm_update_lbrv(vcpu);
+ 
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -811,8 +811,6 @@ void svm_copy_lbrs(struct vmcb *to_vmcb,
+       to_vmcb->save.br_to             = from_vmcb->save.br_to;
+       to_vmcb->save.last_excp_from    = from_vmcb->save.last_excp_from;
+       to_vmcb->save.last_excp_to      = from_vmcb->save.last_excp_to;
+-
+-      vmcb_mark_dirty(to_vmcb, VMCB_LBR);
+ }
+ 
+ static void __svm_enable_lbrv(struct kvm_vcpu *vcpu)
diff --git a/queue-6.18/kvm-nsvm-clear-eventinj-fields-in-vmcb12-on-nested-vmexit.patch b/queue-6.18/kvm-nsvm-clear-eventinj-fields-in-vmcb12-on-nested-vmexit.patch

new file mode 100644 (file)

index 0000000..2cd1486
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-clear-eventinj-fields-in-vmcb12-on-nested-vmexit.patch
@@ -0,0 +1,69 @@
+From 69b721a86d0dcb026f6db7d111dcde7550442d2e Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:05 +0000
+Subject: KVM: nSVM: Clear EVENTINJ fields in vmcb12 on nested #VMEXIT
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 69b721a86d0dcb026f6db7d111dcde7550442d2e upstream.
+
+According to the APM, from the reference of the VMRUN instruction:
+
+  Upon #VMEXIT, the processor performs the following actions in order to
+  return to the host execution context:
+
+  ...
+
+  clear EVENTINJ field in VMCB
+
+KVM already syncs EVENTINJ fields from vmcb02 to cached vmcb12 on every
+L2->L0  #VMEXIT. Since these fields are zeroed by the CPU on #VMEXIT, they
+will mostly be zeroed in vmcb12 on nested #VMEXIT by nested_svm_vmexit().
+
+However, this is not the case when:
+
+  1. Consistency checks fail, as nested_svm_vmexit() is not called.
+  2. Entering guest mode fails before L2 runs (e.g. due to failed load of
+     CR3).
+
+(2) was broken by commit 2d8a42be0e2b ("KVM: nSVM: synchronize VMCB
+controls updated by the processor on every vmexit"), as prior to that
+nested_svm_vmexit() always zeroed EVENTINJ fields.
+
+Explicitly clear the fields in all nested #VMEXIT code paths.
+
+Fixes: 3d6368ef580a ("KVM: SVM: Add VMRUN handler")
+Fixes: 2d8a42be0e2b ("KVM: nSVM: synchronize VMCB controls updated by the processor on every vmexit")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-12-yosry@kernel.org
+[sean: massage changelog formatting]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -992,6 +992,8 @@ int nested_svm_vmrun(struct kvm_vcpu *vc
+               vmcb12->control.exit_code_hi = -1u;
+               vmcb12->control.exit_info_1  = 0;
+               vmcb12->control.exit_info_2  = 0;
++              vmcb12->control.event_inj = 0;
++              vmcb12->control.event_inj_err = 0;
+               svm_set_gif(svm, false);
+               goto out;
+       }
+@@ -1137,9 +1139,9 @@ static int nested_svm_vmexit_update_vmcb
+       if (nested_vmcb12_has_lbrv(vcpu))
+               svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
+ 
++      vmcb12->control.event_inj         = 0;
++      vmcb12->control.event_inj_err     = 0;
+       vmcb12->control.int_ctl           = svm->nested.ctl.int_ctl;
+-      vmcb12->control.event_inj         = svm->nested.ctl.event_inj;
+-      vmcb12->control.event_inj_err     = svm->nested.ctl.event_inj_err;
+ 
+       trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code,
+                                      vmcb12->control.exit_info_1,
diff --git a/queue-6.18/kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch b/queue-6.18/kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch

new file mode 100644 (file)

index 0000000..fc34f57
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch
@@ -0,0 +1,33 @@
+From f85a6ce06e4a0d49652f57967a649ab09e06287c Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:04 +0000
+Subject: KVM: nSVM: Clear GIF on nested #VMEXIT(INVALID)
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit f85a6ce06e4a0d49652f57967a649ab09e06287c upstream.
+
+According to the APM, GIF is set to 0 on any #VMEXIT, including
+an #VMEXIT(INVALID) due to failed consistency checks. Clear GIF on
+consistency check failures.
+
+Fixes: 3d6368ef580a ("KVM: SVM: Add VMRUN handler")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-11-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -992,6 +992,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vc
+               vmcb12->control.exit_code_hi = -1u;
+               vmcb12->control.exit_info_1  = 0;
+               vmcb12->control.exit_info_2  = 0;
++              svm_set_gif(svm, false);
+               goto out;
+       }
+ 
diff --git a/queue-6.18/kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch b/queue-6.18/kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch

new file mode 100644 (file)

index 0000000..d10e80b
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch
@@ -0,0 +1,64 @@
+From 8998e1d012f3f45d0456f16706682cef04c3c436 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:06 +0000
+Subject: KVM: nSVM: Clear tracking of L1->L2 NMI and soft IRQ on nested #VMEXIT
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 8998e1d012f3f45d0456f16706682cef04c3c436 upstream.
+
+KVM clears tracking of L1->L2 injected NMIs (i.e. nmi_l1_to_l2) and soft
+IRQs (i.e. soft_int_injected) on a synthesized #VMEXIT(INVALID) due to
+failed VMRUN. However, they are not explicitly cleared in other
+synthesized #VMEXITs.
+
+soft_int_injected is always cleared after the first VMRUN of L2 when
+completing interrupts, as any re-injection is then tracked by KVM
+(instead of purely in vmcb02).
+
+nmi_l1_to_l2 is not cleared after the first VMRUN if NMI injection
+failed, as KVM still needs to keep track that the NMI originated from L1
+to avoid blocking NMIs for L1. It is only cleared when the NMI injection
+succeeds.
+
+KVM could synthesize a #VMEXIT to L1 before successfully injecting the
+NMI into L2 (e.g. due to a #NPF on L2's NMI handler in L1's NPTs). In
+this case, nmi_l1_to_l2 will remain true, and KVM may not correctly mask
+NMIs and intercept IRET when injecting an NMI into L1.
+
+Clear both nmi_l1_to_l2 and soft_int_injected in nested_svm_vmexit(), i.e.
+for all #VMEXITs except those that occur due to failed consistency checks,
+as those happen before nmi_l1_to_l2 or soft_int_injected are set.
+
+Fixes: 159fc6fa3b7d ("KVM: nSVM: Transparently handle L1 -> L2 NMI re-injection")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-13-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1021,8 +1021,6 @@ int nested_svm_vmrun(struct kvm_vcpu *vc
+ 
+ out_exit_err:
+       svm->nested.nested_run_pending = 0;
+-      svm->nmi_l1_to_l2 = false;
+-      svm->soft_int_injected = false;
+ 
+       svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
+       svm->vmcb->control.exit_code_hi = -1u;
+@@ -1279,6 +1277,10 @@ int nested_svm_vmexit(struct vcpu_svm *s
+       if (rc)
+               return 1;
+ 
++      /* Drop tracking for L1->L2 injected NMIs and soft IRQs */
++      svm->nmi_l1_to_l2 = false;
++      svm->soft_int_injected = false;
++
+       /*
+        * Drop what we picked up for L2 via svm_complete_interrupts() so it
+        * doesn't end up in L1.
diff --git a/queue-6.18/kvm-nsvm-delay-setting-soft-irq-rip-tracking-fields-until-vcpu-run.patch b/queue-6.18/kvm-nsvm-delay-setting-soft-irq-rip-tracking-fields-until-vcpu-run.patch

new file mode 100644 (file)

index 0000000..084094b
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-delay-setting-soft-irq-rip-tracking-fields-until-vcpu-run.patch
@@ -0,0 +1,139 @@
+From c64bc6ed1764c1b7e3c0017019f743196074092f Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 4 Mar 2026 16:06:56 -0800
+Subject: KVM: nSVM: Delay setting soft IRQ RIP tracking fields until vCPU run
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit c64bc6ed1764c1b7e3c0017019f743196074092f upstream.
+
+In the save+restore path, when restoring nested state, the values of RIP
+and CS base passed into nested_vmcb02_prepare_control() are mostly
+incorrect.  They are both pulled from the vmcb02. For CS base, the value
+is only correct if system regs are restored before nested state. The
+value of RIP is whatever the vCPU had in vmcb02 before restoring nested
+state (zero on a freshly created vCPU).
+
+Instead, take a similar approach to NextRIP, and delay initializing the
+RIP tracking fields until shortly before the vCPU is run, to make sure
+the most up-to-date values of RIP and CS base are used regardless of
+KVM_SET_SREGS, KVM_SET_REGS, and KVM_SET_NESTED_STATE's relative
+ordering.
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260225005950.3739782-8-yosry@kernel.org
+[sean: deal with the svm_cancel_injection() madness]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |   17 ++++++++---------
+ arch/x86/kvm/svm/svm.c    |   29 +++++++++++++++++++++++++++++
+ 2 files changed, 37 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -712,9 +712,7 @@ static bool is_evtinj_nmi(u32 evtinj)
+       return type == SVM_EVTINJ_TYPE_NMI;
+ }
+ 
+-static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
+-                                        unsigned long vmcb12_rip,
+-                                        unsigned long vmcb12_csbase)
++static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
+ {
+       u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK;
+       u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
+@@ -826,15 +824,16 @@ static void nested_vmcb02_prepare_contro
+               vmcb02->control.next_rip = svm->nested.ctl.next_rip;
+ 
+       svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj);
++
++      /*
++       * soft_int_csbase, soft_int_old_rip, and soft_int_next_rip (if L1
++       * doesn't have NRIPS) are initialized later, before the vCPU is run.
++       */
+       if (is_evtinj_soft(vmcb02->control.event_inj)) {
+               svm->soft_int_injected = true;
+-              svm->soft_int_csbase = vmcb12_csbase;
+-              svm->soft_int_old_rip = vmcb12_rip;
+               if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) ||
+                   !svm->nested.nested_run_pending)
+                       svm->soft_int_next_rip = svm->nested.ctl.next_rip;
+-              else
+-                      svm->soft_int_next_rip = vmcb12_rip;
+       }
+ 
+       /* LBR_CTL_ENABLE_MASK is controlled by svm_update_lbrv() */
+@@ -919,7 +918,7 @@ int enter_svm_guest_mode(struct kvm_vcpu
+       nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
+ 
+       svm_switch_vmcb(svm, &svm->nested.vmcb02);
+-      nested_vmcb02_prepare_control(svm, vmcb12->save.rip, vmcb12->save.cs.base);
++      nested_vmcb02_prepare_control(svm);
+       nested_vmcb02_prepare_save(svm, vmcb12);
+ 
+       ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
+@@ -1877,7 +1876,7 @@ static int svm_set_nested_state(struct k
+       nested_copy_vmcb_control_to_cache(svm, ctl);
+ 
+       svm_switch_vmcb(svm, &svm->nested.vmcb02);
+-      nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base);
++      nested_vmcb02_prepare_control(svm);
+ 
+       /*
+        * Any previously restored state (e.g. KVM_SET_SREGS) would mark fields
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -3558,6 +3558,16 @@ static int svm_handle_exit(struct kvm_vc
+       return svm_invoke_exit_handler(vcpu, exit_code);
+ }
+ 
++static void svm_set_nested_run_soft_int_state(struct kvm_vcpu *vcpu)
++{
++      struct vcpu_svm *svm = to_svm(vcpu);
++
++      svm->soft_int_csbase = svm->vmcb->save.cs.base;
++      svm->soft_int_old_rip = kvm_rip_read(vcpu);
++      if (!guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
++              svm->soft_int_next_rip = kvm_rip_read(vcpu);
++}
++
+ static int pre_svm_run(struct kvm_vcpu *vcpu)
+ {
+       struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
+@@ -3680,6 +3690,13 @@ static void svm_fixup_nested_rips(struct
+       if (boot_cpu_has(X86_FEATURE_NRIPS) &&
+           !guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
+               svm->vmcb->control.next_rip = kvm_rip_read(vcpu);
++
++      /*
++       * Simiarly, initialize the soft int metadata here to use the most
++       * up-to-date values of RIP and CS base, regardless of restore order.
++       */
++      if (svm->soft_int_injected)
++              svm_set_nested_run_soft_int_state(vcpu);
+ }
+ 
+ void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
+@@ -4043,6 +4060,18 @@ static void svm_complete_soft_interrupt(
+       struct vcpu_svm *svm = to_svm(vcpu);
+ 
+       /*
++       * Initialize the soft int fields *before* reading them below if KVM
++       * aborted entry to the guest with a nested VMRUN pending.  To ensure
++       * KVM uses up-to-date values for RIP and CS base across save/restore,
++       * regardless of restore order, KVM waits to set the soft int fields
++       * until VMRUN is imminent.  But when canceling injection, KVM requeues
++       * the soft int and will reinject it via the standard injection flow,
++       * and so KVM needs to grab the state from the pending nested VMRUN.
++       */
++      if (is_guest_mode(vcpu) && svm->nested.nested_run_pending)
++              svm_set_nested_run_soft_int_state(vcpu);
++
++      /*
+        * If NRIPS is enabled, KVM must snapshot the pre-VMRUN next_rip that's
+        * associated with the original soft exception/interrupt.  next_rip is
+        * cleared on all exits that can occur while vectoring an event, so KVM
diff --git a/queue-6.18/kvm-nsvm-delay-stuffing-l2-s-current-rip-into-nextrip-until-vcpu-run.patch b/queue-6.18/kvm-nsvm-delay-stuffing-l2-s-current-rip-into-nextrip-until-vcpu-run.patch

new file mode 100644 (file)

index 0000000..9ee8b83
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-delay-stuffing-l2-s-current-rip-into-nextrip-until-vcpu-run.patch
@@ -0,0 +1,117 @@
+From a0592461f39c00b28f552fe842a063a00043eaa8 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Wed, 25 Feb 2026 00:59:48 +0000
+Subject: KVM: nSVM: Delay stuffing L2's current RIP into NextRIP until vCPU run
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit a0592461f39c00b28f552fe842a063a00043eaa8 upstream.
+
+For guests with NRIPS disabled, L1 does not provide NextRIP when running
+an L2 with an injected soft interrupt, instead it advances L2's RIP
+before running it. KVM uses L2's current RIP as the NextRIP in vmcb02 to
+emulate a CPU without NRIPS.
+
+However, in svm_set_nested_state(), the value used for L2's current RIP
+comes from vmcb02, which is just whatever the vCPU had in vmcb02 before
+restoring nested state (zero on a freshly created vCPU). Passing the
+cached RIP value instead (i.e. kvm_rip_read()) would only fix the issue
+if registers are restored before nested state.
+
+Instead, split the logic of setting NextRIP in vmcb02. Handle the
+'normal' case of initializing vmcb02's NextRIP using NextRIP from vmcb12
+(or KVM_GET_NESTED_STATE's payload) in nested_vmcb02_prepare_control().
+Delay the special case of stuffing L2's current RIP into vmcb02's
+NextRIP until shortly before the vCPU is run, to make sure the most
+up-to-date value of RIP is used regardless of KVM_SET_REGS and
+KVM_SET_NESTED_STATE's relative ordering.
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260225005950.3739782-7-yosry@kernel.org
+[sean: use new helper, svm_fixup_nested_rips()]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |   25 ++++++++-----------------
+ arch/x86/kvm/svm/svm.c    |   25 +++++++++++++++++++++++++
+ 2 files changed, 33 insertions(+), 17 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -814,24 +814,15 @@ static void nested_vmcb02_prepare_contro
+       vmcb02->control.event_inj_err       = svm->nested.ctl.event_inj_err;
+ 
+       /*
+-       * NextRIP is consumed on VMRUN as the return address pushed on the
+-       * stack for injected soft exceptions/interrupts.  If nrips is exposed
+-       * to L1, take it verbatim from vmcb12.
+-       *
+-       * If nrips is supported in hardware but not exposed to L1, stuff the
+-       * actual L2 RIP to emulate what a nrips=0 CPU would do (L1 is
+-       * responsible for advancing RIP prior to injecting the event). This is
+-       * only the case for the first L2 run after VMRUN. After that (e.g.
+-       * during save/restore), NextRIP is updated by the CPU and/or KVM, and
+-       * the value of the L2 RIP from vmcb12 should not be used.
++       * If nrips is exposed to L1, take NextRIP as-is.  Otherwise, L1
++       * advances L2's RIP before VMRUN instead of using NextRIP. KVM will
++       * stuff the current RIP as vmcb02's NextRIP before L2 is run.  After
++       * the first run of L2 (e.g. after save+restore), NextRIP is updated by
++       * the CPU and/or KVM and should be used regardless of L1's support.
+        */
+-      if (boot_cpu_has(X86_FEATURE_NRIPS)) {
+-              if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) ||
+-                  !svm->nested.nested_run_pending)
+-                      vmcb02->control.next_rip    = svm->nested.ctl.next_rip;
+-              else
+-                      vmcb02->control.next_rip    = vmcb12_rip;
+-      }
++      if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) ||
++          !svm->nested.nested_run_pending)
++              vmcb02->control.next_rip = svm->nested.ctl.next_rip;
+ 
+       svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj);
+       if (is_evtinj_soft(vmcb02->control.event_inj)) {
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -3661,6 +3661,29 @@ static void svm_inject_irq(struct kvm_vc
+       svm->vmcb->control.event_inj = intr->nr | SVM_EVTINJ_VALID | type;
+ }
+ 
++static void svm_fixup_nested_rips(struct kvm_vcpu *vcpu)
++{
++      struct vcpu_svm *svm = to_svm(vcpu);
++
++      if (!is_guest_mode(vcpu) || !svm->nested.nested_run_pending)
++              return;
++
++      /*
++       * If nrips is supported in hardware but not exposed to L1, stuff the
++       * actual L2 RIP to emulate what a nrips=0 CPU would do (L1 is
++       * responsible for advancing RIP prior to injecting the event). Once L2
++       * runs after L1 executes VMRUN, NextRIP is updated by the CPU and/or
++       * KVM, and this is no longer needed.
++       *
++       * This is done here (as opposed to when preparing vmcb02) to use the
++       * most up-to-date value of RIP regardless of the order of restoring
++       * registers and nested state in the vCPU save+restore path.
++       */
++      if (boot_cpu_has(X86_FEATURE_NRIPS) &&
++          !guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
++              svm->vmcb->control.next_rip = kvm_rip_read(vcpu);
++}
++
+ void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
+                                    int trig_mode, int vector)
+ {
+@@ -4246,6 +4269,8 @@ static __no_kcsan fastpath_t svm_vcpu_ru
+       }
+       svm->vmcb->save.cr2 = vcpu->arch.cr2;
+ 
++      svm_fixup_nested_rips(vcpu);
++
+       svm_hv_update_vp_id(svm->vmcb, vcpu);
+ 
+       /*
diff --git a/queue-6.18/kvm-nsvm-drop-the-non-architectural-consistency-check-for-np_enable.patch b/queue-6.18/kvm-nsvm-drop-the-non-architectural-consistency-check-for-np_enable.patch

new file mode 100644 (file)

index 0000000..79cc168
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-drop-the-non-architectural-consistency-check-for-np_enable.patch
@@ -0,0 +1,67 @@
+From e0b6f031d64c086edd563e7af9c0c0a2261dd2a4 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:08 +0000
+Subject: KVM: nSVM: Drop the non-architectural consistency check for NP_ENABLE
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit e0b6f031d64c086edd563e7af9c0c0a2261dd2a4 upstream.
+
+KVM currenty fails a nested VMRUN and injects VMEXIT_INVALID (aka
+SVM_EXIT_ERR) if L1 sets NP_ENABLE and the host does not support NPTs.
+On first glance, it seems like the check should actually be for
+guest_cpu_cap_has(X86_FEATURE_NPT) instead, as it is possible for the
+host to support NPTs but the guest CPUID to not advertise it.
+
+However, the consistency check is not architectural to begin with. The
+APM does not mention VMEXIT_INVALID if NP_ENABLE is set on a processor
+that does not have X86_FEATURE_NPT. Hence, NP_ENABLE should be ignored
+if X86_FEATURE_NPT is not available for L1, so sanitize it when copying
+from the VMCB12 to KVM's cache.
+
+Apart from the consistency check, NP_ENABLE in VMCB12 is currently
+ignored because the bit is actually copied from VMCB01 to VMCB02, not
+from VMCB12.
+
+Fixes: 4b16184c1cca ("KVM: SVM: Initialize Nested Nested MMU context on VMRUN")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-15-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -335,9 +335,6 @@ static bool __nested_vmcb_check_controls
+       if (CC(control->asid == 0))
+               return false;
+ 
+-      if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled))
+-              return false;
+-
+       if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa,
+                                          MSRPM_SIZE)))
+               return false;
+@@ -419,6 +416,11 @@ void __nested_copy_vmcb_control_to_cache
+       for (i = 0; i < MAX_INTERCEPT; i++)
+               to->intercepts[i] = from->intercepts[i];
+ 
++      /* Always clear SVM_NESTED_CTL_NP_ENABLE if the guest cannot use NPTs */
++      to->nested_ctl          = from->nested_ctl;
++      if (!guest_cpu_cap_has(vcpu, X86_FEATURE_NPT))
++              to->nested_ctl &= ~SVM_NESTED_CTL_NP_ENABLE;
++
+       to->iopm_base_pa        = from->iopm_base_pa;
+       to->msrpm_base_pa       = from->msrpm_base_pa;
+       to->tsc_offset          = from->tsc_offset;
+@@ -432,7 +434,6 @@ void __nested_copy_vmcb_control_to_cache
+       to->exit_info_2         = from->exit_info_2;
+       to->exit_int_info       = from->exit_int_info;
+       to->exit_int_info_err   = from->exit_int_info_err;
+-      to->nested_ctl          = from->nested_ctl;
+       to->event_inj           = from->event_inj;
+       to->event_inj_err       = from->event_inj_err;
+       to->next_rip            = from->next_rip;
diff --git a/queue-6.18/kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch b/queue-6.18/kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch

new file mode 100644 (file)

index 0000000..36b0380
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch
@@ -0,0 +1,42 @@
+From 24f7d36b824b65cf1a2db3db478059187b2a37b0 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 24 Feb 2026 22:50:17 +0000
+Subject: KVM: nSVM: Ensure AVIC is inhibited when restoring a vCPU to guest mode
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 24f7d36b824b65cf1a2db3db478059187b2a37b0 upstream.
+
+On nested VMRUN, KVM ensures AVIC is inhibited by requesting
+KVM_REQ_APICV_UPDATE, triggering a check of inhibit reasons, finding
+APICV_INHIBIT_REASON_NESTED, and disabling AVIC.
+
+However, when KVM_SET_NESTED_STATE is performed on a vCPU not in guest
+mode with AVIC enabled, KVM_REQ_APICV_UPDATE is not requested, and AVIC
+is not inhibited.
+
+Request KVM_REQ_APICV_UPDATE in the KVM_SET_NESTED_STATE path if AVIC is
+active, similar to the nested VMRUN path.
+
+Fixes: f44509f849fe ("KVM: x86: SVM: allow AVIC to co-exist with a nested guest running")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260224225017.3303870-1-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1896,6 +1896,9 @@ static int svm_set_nested_state(struct k
+ 
+       svm->nested.force_msr_bitmap_recalc = true;
+ 
++      if (kvm_vcpu_apicv_active(vcpu))
++              kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
++
+       kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+       ret = 0;
+ out_free:
diff --git a/queue-6.18/kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch b/queue-6.18/kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch

new file mode 100644 (file)

index 0000000..b088a4b
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch
@@ -0,0 +1,42 @@
+From e63fb1379f4b9300a44739964e69549bebbcdca4 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+Date: Tue, 10 Feb 2026 01:08:06 +0000
+Subject: KVM: nSVM: Mark all of vmcb02 dirty when restoring nested state
+
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+
+commit e63fb1379f4b9300a44739964e69549bebbcdca4 upstream.
+
+When restoring a vCPU in guest mode, any state restored before
+KVM_SET_NESTED_STATE (e.g. KVM_SET_SREGS) will mark the corresponding
+dirty bits in vmcb01, as it is the active VMCB before switching to
+vmcb02 in svm_set_nested_state().
+
+Hence, mark all fields in vmcb02 dirty in svm_set_nested_state() to
+capture any previously restored fields.
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20260210010806.3204289-1-yosry.ahmed@linux.dev
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1875,6 +1875,12 @@ static int svm_set_nested_state(struct k
+       nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base);
+ 
+       /*
++       * Any previously restored state (e.g. KVM_SET_SREGS) would mark fields
++       * dirty in vmcb01 instead of vmcb02, so mark all of vmcb02 dirty here.
++       */
++      vmcb_mark_all_dirty(svm->vmcb);
++
++      /*
+        * While the nested guest CR3 is already checked and set by
+        * KVM_SET_SREGS, it was set when nested state was yet loaded,
+        * thus MMU might not be initialized correctly.
diff --git a/queue-6.18/kvm-nsvm-raise-ud-if-unhandled-vmmcall-isn-t-intercepted-by-l1.patch b/queue-6.18/kvm-nsvm-raise-ud-if-unhandled-vmmcall-isn-t-intercepted-by-l1.patch

new file mode 100644 (file)

index 0000000..2f6759e
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-raise-ud-if-unhandled-vmmcall-isn-t-intercepted-by-l1.patch
@@ -0,0 +1,141 @@
+From c36991c6f8d2ab56ee67aff04e3c357f45cfc76c Mon Sep 17 00:00:00 2001
+From: Kevin Cheng <chengkev@google.com>
+Date: Tue, 3 Mar 2026 16:22:22 -0800
+Subject: KVM: nSVM: Raise #UD if unhandled VMMCALL isn't intercepted by L1
+
+From: Kevin Cheng <chengkev@google.com>
+
+commit c36991c6f8d2ab56ee67aff04e3c357f45cfc76c upstream.
+
+Explicitly synthesize a #UD for VMMCALL if L2 is active, L1 does NOT want
+to intercept VMMCALL, nested_svm_l2_tlb_flush_enabled() is true, and the
+hypercall is something other than one of the supported Hyper-V hypercalls.
+When all of the above conditions are met, KVM will intercept VMMCALL but
+never forward it to L1, i.e. will let L2 make hypercalls as if it were L1.
+
+The TLFS says a whole lot of nothing about this scenario, so go with the
+architectural behavior, which says that VMMCALL #UDs if it's not
+intercepted.
+
+Opportunistically do a 2-for-1 stub trade by stub-ifying the new API
+instead of the helpers it uses.  The last remaining "single" stub will
+soon be dropped as well.
+
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Fixes: 3f4a812edf5c ("KVM: nSVM: hyper-v: Enable L2 TLB flush")
+Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Kevin Cheng <chengkev@google.com>
+Link: https://patch.msgid.link/20260228033328.2285047-5-chengkev@google.com
+[sean: rewrite changelog and comment, tag for stable, remove defunct stubs]
+Reviewed-by: Yosry Ahmed <yosry@kernel.org>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Link: https://patch.msgid.link/20260304002223.1105129-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/hyperv.h     |    8 --------
+ arch/x86/kvm/svm/hyperv.h |   11 +++++++++++
+ arch/x86/kvm/svm/nested.c |    4 +---
+ arch/x86/kvm/svm/svm.c    |   19 ++++++++++++++++++-
+ 4 files changed, 30 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/kvm/hyperv.h
++++ b/arch/x86/kvm/hyperv.h
+@@ -305,14 +305,6 @@ static inline bool kvm_hv_has_stimer_pen
+ {
+       return false;
+ }
+-static inline bool kvm_hv_is_tlb_flush_hcall(struct kvm_vcpu *vcpu)
+-{
+-      return false;
+-}
+-static inline bool guest_hv_cpuid_has_l2_tlb_flush(struct kvm_vcpu *vcpu)
+-{
+-      return false;
+-}
+ static inline int kvm_hv_verify_vp_assist(struct kvm_vcpu *vcpu)
+ {
+       return 0;
+--- a/arch/x86/kvm/svm/hyperv.h
++++ b/arch/x86/kvm/svm/hyperv.h
+@@ -41,6 +41,13 @@ static inline bool nested_svm_l2_tlb_flu
+       return hv_vcpu->vp_assist_page.nested_control.features.directhypercall;
+ }
+ 
++static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu)
++{
++      return guest_hv_cpuid_has_l2_tlb_flush(vcpu) &&
++             nested_svm_l2_tlb_flush_enabled(vcpu) &&
++             kvm_hv_is_tlb_flush_hcall(vcpu);
++}
++
+ void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu);
+ #else /* CONFIG_KVM_HYPERV */
+ static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu) {}
+@@ -48,6 +55,10 @@ static inline bool nested_svm_l2_tlb_flu
+ {
+       return false;
+ }
++static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu)
++{
++      return false;
++}
+ static inline void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu) {}
+ #endif /* CONFIG_KVM_HYPERV */
+ 
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1672,9 +1672,7 @@ int nested_svm_exit_special(struct vcpu_
+       }
+       case SVM_EXIT_VMMCALL:
+               /* Hyper-V L2 TLB flush hypercall is handled by L0 */
+-              if (guest_hv_cpuid_has_l2_tlb_flush(vcpu) &&
+-                  nested_svm_l2_tlb_flush_enabled(vcpu) &&
+-                  kvm_hv_is_tlb_flush_hcall(vcpu))
++              if (nested_svm_is_l2_tlb_flush_hcall(vcpu))
+                       return NESTED_EXIT_HOST;
+               break;
+       default:
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -52,6 +52,7 @@
+ #include "svm.h"
+ #include "svm_ops.h"
+ 
++#include "hyperv.h"
+ #include "kvm_onhyperv.h"
+ #include "svm_onhyperv.h"
+ 
+@@ -3177,6 +3178,22 @@ static int bus_lock_exit(struct kvm_vcpu
+       return 0;
+ }
+ 
++static int vmmcall_interception(struct kvm_vcpu *vcpu)
++{
++      /*
++       * Inject a #UD if L2 is active and the VMMCALL isn't a Hyper-V TLB
++       * hypercall, as VMMCALL #UDs if it's not intercepted, and this path is
++       * reachable if and only if L1 doesn't want to intercept VMMCALL or has
++       * enabled L0 (KVM) handling of Hyper-V L2 TLB flush hypercalls.
++       */
++      if (is_guest_mode(vcpu) && !nested_svm_is_l2_tlb_flush_hcall(vcpu)) {
++              kvm_queue_exception(vcpu, UD_VECTOR);
++              return 1;
++      }
++
++      return kvm_emulate_hypercall(vcpu);
++}
++
+ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
+       [SVM_EXIT_READ_CR0]                     = cr_interception,
+       [SVM_EXIT_READ_CR3]                     = cr_interception,
+@@ -3227,7 +3244,7 @@ static int (*const svm_exit_handlers[])(
+       [SVM_EXIT_TASK_SWITCH]                  = task_switch_interception,
+       [SVM_EXIT_SHUTDOWN]                     = shutdown_interception,
+       [SVM_EXIT_VMRUN]                        = vmrun_interception,
+-      [SVM_EXIT_VMMCALL]                      = kvm_emulate_hypercall,
++      [SVM_EXIT_VMMCALL]                      = vmmcall_interception,
+       [SVM_EXIT_VMLOAD]                       = vmload_interception,
+       [SVM_EXIT_VMSAVE]                       = vmsave_interception,
+       [SVM_EXIT_STGI]                         = stgi_interception,
diff --git a/queue-6.18/kvm-nsvm-refactor-checking-lbrv-enablement-in-vmcb12-into-a-helper.patch b/queue-6.18/kvm-nsvm-refactor-checking-lbrv-enablement-in-vmcb12-into-a-helper.patch

new file mode 100644 (file)

index 0000000..94fa392
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-refactor-checking-lbrv-enablement-in-vmcb12-into-a-helper.patch
@@ -0,0 +1,58 @@
+From 290c8d82023ab0e1d2782d37136541e017174d7c Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:00 +0000
+Subject: KVM: nSVM: Refactor checking LBRV enablement in vmcb12 into a helper
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 290c8d82023ab0e1d2782d37136541e017174d7c upstream.
+
+Refactor the vCPU cap and vmcb12 flag checks into a helper. The
+unlikely() annotation is dropped, it's unlikely (huh) to make a
+difference and the CPU will probably predict it better on its own.
+
+CC: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-7-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -608,6 +608,12 @@ void nested_vmcb02_compute_g_pat(struct
+       svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat;
+ }
+ 
++static bool nested_vmcb12_has_lbrv(struct kvm_vcpu *vcpu)
++{
++      return guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
++              (to_svm(vcpu)->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK);
++}
++
+ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
+ {
+       bool new_vmcb12 = false;
+@@ -673,8 +679,7 @@ static void nested_vmcb02_prepare_save(s
+               vmcb_mark_dirty(vmcb02, VMCB_DR);
+       }
+ 
+-      if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
+-                   (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
++      if (nested_vmcb12_has_lbrv(vcpu)) {
+               /*
+                * Reserved bits of DEBUGCTL are ignored.  Be consistent with
+                * svm_set_msr's definition of reserved bits.
+@@ -1189,8 +1194,7 @@ int nested_svm_vmexit(struct vcpu_svm *s
+       if (!nested_exit_on_intr(svm))
+               kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+ 
+-      if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
+-                   (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
++      if (nested_vmcb12_has_lbrv(vcpu)) {
+               svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
+       } else {
+               svm_copy_lbrs(&vmcb01->save, &vmcb02->save);
diff --git a/queue-6.18/kvm-nsvm-refactor-writing-vmcb12-on-nested-vmexit-as-a-helper.patch b/queue-6.18/kvm-nsvm-refactor-writing-vmcb12-on-nested-vmexit-as-a-helper.patch

new file mode 100644 (file)

index 0000000..035e2c7
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-refactor-writing-vmcb12-on-nested-vmexit-as-a-helper.patch
@@ -0,0 +1,142 @@
+From dcf3648ab71437b504abbfdc4e74622a0f1a56e3 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:01 +0000
+Subject: KVM: nSVM: Refactor writing vmcb12 on nested #VMEXIT as a helper
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit dcf3648ab71437b504abbfdc4e74622a0f1a56e3 upstream.
+
+Move mapping vmcb12 and updating it out of nested_svm_vmexit() into a
+helper, no functional change intended.
+
+CC: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-8-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |   77 ++++++++++++++++++++++++++--------------------
+ 1 file changed, 44 insertions(+), 33 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1082,36 +1082,20 @@ void svm_copy_vmloadsave_state(struct vm
+       to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
+ }
+ 
+-int nested_svm_vmexit(struct vcpu_svm *svm)
++static int nested_svm_vmexit_update_vmcb12(struct kvm_vcpu *vcpu)
+ {
+-      struct kvm_vcpu *vcpu = &svm->vcpu;
+-      struct vmcb *vmcb01 = svm->vmcb01.ptr;
++      struct vcpu_svm *svm = to_svm(vcpu);
+       struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
+-      struct vmcb *vmcb12;
+       struct kvm_host_map map;
++      struct vmcb *vmcb12;
+       int rc;
+ 
+       rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map);
+-      if (rc) {
+-              if (rc == -EINVAL)
+-                      kvm_inject_gp(vcpu, 0);
+-              return 1;
+-      }
++      if (rc)
++              return rc;
+ 
+       vmcb12 = map.hva;
+ 
+-      /* Exit Guest-Mode */
+-      leave_guest_mode(vcpu);
+-      svm->nested.vmcb12_gpa = 0;
+-      WARN_ON_ONCE(svm->nested.nested_run_pending);
+-
+-      kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+-
+-      /* in case we halted in L2 */
+-      kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
+-
+-      /* Give the current vmcb to the guest */
+-
+       vmcb12->save.es     = vmcb02->save.es;
+       vmcb12->save.cs     = vmcb02->save.cs;
+       vmcb12->save.ss     = vmcb02->save.ss;
+@@ -1149,10 +1133,48 @@ int nested_svm_vmexit(struct vcpu_svm *s
+       if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
+               vmcb12->control.next_rip  = vmcb02->control.next_rip;
+ 
++      if (nested_vmcb12_has_lbrv(vcpu))
++              svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
++
+       vmcb12->control.int_ctl           = svm->nested.ctl.int_ctl;
+       vmcb12->control.event_inj         = svm->nested.ctl.event_inj;
+       vmcb12->control.event_inj_err     = svm->nested.ctl.event_inj_err;
+ 
++      trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code,
++                                     vmcb12->control.exit_info_1,
++                                     vmcb12->control.exit_info_2,
++                                     vmcb12->control.exit_int_info,
++                                     vmcb12->control.exit_int_info_err,
++                                     KVM_ISA_SVM);
++
++      kvm_vcpu_unmap(vcpu, &map);
++      return 0;
++}
++
++int nested_svm_vmexit(struct vcpu_svm *svm)
++{
++      struct kvm_vcpu *vcpu = &svm->vcpu;
++      struct vmcb *vmcb01 = svm->vmcb01.ptr;
++      struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
++      int rc;
++
++      rc = nested_svm_vmexit_update_vmcb12(vcpu);
++      if (rc) {
++              if (rc == -EINVAL)
++                      kvm_inject_gp(vcpu, 0);
++              return 1;
++      }
++
++      /* Exit Guest-Mode */
++      leave_guest_mode(vcpu);
++      svm->nested.vmcb12_gpa = 0;
++      WARN_ON_ONCE(svm->nested.nested_run_pending);
++
++      kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
++
++      /* in case we halted in L2 */
++      kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
++
+       if (!kvm_pause_in_guest(vcpu->kvm)) {
+               vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
+               vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
+@@ -1194,9 +1216,7 @@ int nested_svm_vmexit(struct vcpu_svm *s
+       if (!nested_exit_on_intr(svm))
+               kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+ 
+-      if (nested_vmcb12_has_lbrv(vcpu)) {
+-              svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
+-      } else {
++      if (!nested_vmcb12_has_lbrv(vcpu)) {
+               svm_copy_lbrs(&vmcb01->save, &vmcb02->save);
+               vmcb_mark_dirty(vmcb01, VMCB_LBR);
+       }
+@@ -1252,15 +1272,6 @@ int nested_svm_vmexit(struct vcpu_svm *s
+       svm->vcpu.arch.dr7 = DR7_FIXED_1;
+       kvm_update_dr7(&svm->vcpu);
+ 
+-      trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code,
+-                                     vmcb12->control.exit_info_1,
+-                                     vmcb12->control.exit_info_2,
+-                                     vmcb12->control.exit_int_info,
+-                                     vmcb12->control.exit_int_info_err,
+-                                     KVM_ISA_SVM);
+-
+-      kvm_vcpu_unmap(vcpu, &map);
+-
+       nested_svm_transition_tlb_flush(vcpu);
+ 
+       nested_svm_uninit_mmu_context(vcpu);
diff --git a/queue-6.18/kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch b/queue-6.18/kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch

new file mode 100644 (file)

index 0000000..f4b26ba
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch
@@ -0,0 +1,52 @@
+From 03bee264f8ebfd39e0254c98e112d033a7aa9055 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Wed, 25 Feb 2026 00:59:44 +0000
+Subject: KVM: nSVM: Sync interrupt shadow to cached vmcb12 after VMRUN of L2
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 03bee264f8ebfd39e0254c98e112d033a7aa9055 upstream.
+
+After VMRUN in guest mode, nested_sync_control_from_vmcb02() syncs
+fields written by the CPU from vmcb02 to the cached vmcb12. This is
+because the cached vmcb12 is used as the authoritative copy of some of
+the controls, and is the payload when saving/restoring nested state.
+
+int_state is also written by the CPU, specifically bit 0 (i.e.
+SVM_INTERRUPT_SHADOW_MASK) for nested VMs, but it is not sync'd to
+cached vmcb12. This does not cause a problem if KVM_SET_NESTED_STATE
+preceeds KVM_SET_VCPU_EVENTS in the restore path, as an interrupt shadow
+would be correctly restored to vmcb02 (KVM_SET_VCPU_EVENTS overwrites
+what KVM_SET_NESTED_STATE restored in int_state).
+
+However, if KVM_SET_VCPU_EVENTS preceeds KVM_SET_NESTED_STATE, an
+interrupt shadow would be restored into vmcb01 instead of vmcb02. This
+would mostly be benign for L1 (delays an interrupt), but not for L2. For
+L2, the vCPU could hang (e.g. if a wakeup interrupt is delivered before
+a HLT that should have been in an interrupt shadow).
+
+Sync int_state to the cached vmcb12 in nested_sync_control_from_vmcb02()
+to avoid this problem. With that, KVM_SET_NESTED_STATE restores the
+correct interrupt shadow state, and if KVM_SET_VCPU_EVENTS follows it
+would overwrite it with the same value.
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260225005950.3739782-3-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -487,6 +487,7 @@ void nested_sync_control_from_vmcb02(str
+       u32 mask;
+       svm->nested.ctl.event_inj      = svm->vmcb->control.event_inj;
+       svm->nested.ctl.event_inj_err  = svm->vmcb->control.event_inj_err;
++      svm->nested.ctl.int_state       = svm->vmcb->control.int_state;
+ 
+       /* Only a few fields of int_ctl are written by the processor.  */
+       mask = V_IRQ_MASK | V_TPR_MASK;
diff --git a/queue-6.18/kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch b/queue-6.18/kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch

new file mode 100644 (file)

index 0000000..4b6ecae
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch
@@ -0,0 +1,55 @@
+From 778d8c1b2a6ffe622ddcd3bb35b620e6e41f4da0 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Wed, 25 Feb 2026 00:59:43 +0000
+Subject: KVM: nSVM: Sync NextRIP to cached vmcb12 after VMRUN of L2
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 778d8c1b2a6ffe622ddcd3bb35b620e6e41f4da0 upstream.
+
+After VMRUN in guest mode, nested_sync_control_from_vmcb02() syncs
+fields written by the CPU from vmcb02 to the cached vmcb12. This is
+because the cached vmcb12 is used as the authoritative copy of some of
+the controls, and is the payload when saving/restoring nested state.
+
+NextRIP is also written by the CPU (in some cases) after VMRUN, but is
+not sync'd to the cached vmcb12. As a result, it is corrupted after
+save/restore (replaced by the original value written by L1 on nested
+VMRUN). This could cause problems for both KVM (e.g. when injecting a
+soft IRQ) or L1 (e.g. when using NextRIP to advance RIP after emulating
+an instruction).
+
+Fix this by sync'ing NextRIP to the cache after VMRUN of L2, but only
+after completing interrupts (not in nested_sync_control_from_vmcb02()),
+as KVM may update NextRIP (e.g. when re-injecting a soft IRQ).
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260225005950.3739782-2-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -4343,6 +4343,16 @@ static __no_kcsan fastpath_t svm_vcpu_ru
+ 
+       svm_complete_interrupts(vcpu);
+ 
++      /*
++       * Update the cache after completing interrupts to get an accurate
++       * NextRIP, e.g. when re-injecting a soft interrupt.
++       *
++       * FIXME: Rework svm_get_nested_state() to not pull data from the
++       *        cache (except for maybe int_ctl).
++       */
++      if (is_guest_mode(vcpu))
++              svm->nested.ctl.next_rip = svm->vmcb->control.next_rip;
++
+       return svm_exit_handlers_fastpath(vcpu);
+ }
+ 
diff --git a/queue-6.18/kvm-nsvm-triple-fault-if-mapping-vmcb12-fails-on-nested-vmexit.patch b/queue-6.18/kvm-nsvm-triple-fault-if-mapping-vmcb12-fails-on-nested-vmexit.patch

new file mode 100644 (file)

index 0000000..63c648d
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-triple-fault-if-mapping-vmcb12-fails-on-nested-vmexit.patch
@@ -0,0 +1,55 @@
+From 1b30e7551767cb95b3e49bb169c72bbd76b56e05 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:02 +0000
+Subject: KVM: nSVM: Triple fault if mapping VMCB12 fails on nested #VMEXIT
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 1b30e7551767cb95b3e49bb169c72bbd76b56e05 upstream.
+
+KVM currently injects a #GP and hopes for the best if mapping VMCB12
+fails on nested #VMEXIT, and only if the failure mode is -EINVAL.
+Mapping the VMCB12 could also fail if creating host mappings fails.
+
+After the #GP is injected, nested_svm_vmexit() bails early, without
+cleaning up (e.g. KVM_REQ_GET_NESTED_STATE_PAGES is set, is_guest_mode()
+is true, etc).
+
+Instead of optionally injecting a #GP, triple fault the guest if mapping
+VMCB12 fails since KVM cannot make a sane recovery. The APM states that
+a #VMEXIT will triple fault if host state is illegal or an exception
+occurs while loading host state, so the behavior is not entirely made
+up.
+
+Do not return early from nested_svm_vmexit(), continue cleaning up the
+vCPU state (e.g. switch back to vmcb01), to handle the failure as
+gracefully as possible.
+
+Fixes: cf74a78b229d ("KVM: SVM: Add VMEXIT handler and intercepts")
+CC: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-9-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1158,12 +1158,8 @@ int nested_svm_vmexit(struct vcpu_svm *s
+       struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
+       int rc;
+ 
+-      rc = nested_svm_vmexit_update_vmcb12(vcpu);
+-      if (rc) {
+-              if (rc == -EINVAL)
+-                      kvm_inject_gp(vcpu, 0);
+-              return 1;
+-      }
++      if (nested_svm_vmexit_update_vmcb12(vcpu))
++              kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+ 
+       /* Exit Guest-Mode */
+       leave_guest_mode(vcpu);
diff --git a/queue-6.18/kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch b/queue-6.18/kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch

new file mode 100644 (file)

index 0000000..f89c888
--- /dev/null
+++ b/queue-6.18/kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch
@@ -0,0 +1,73 @@
+From 5c247d08bc81bbad4c662dcf5654137a2f8483ec Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+Date: Tue, 3 Feb 2026 20:10:10 +0000
+Subject: KVM: nSVM: Use vcpu->arch.cr2 when updating vmcb12 on nested #VMEXIT
+
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+
+commit 5c247d08bc81bbad4c662dcf5654137a2f8483ec upstream.
+
+KVM currently uses the value of CR2 from vmcb02 to update vmcb12 on
+nested #VMEXIT. This value is incorrect in some cases, causing L1 to run
+L2 with a corrupted CR2. This could lead to segfaults or data corruption
+if L2 is in the middle of handling a #PF and reads a corrupted CR2. Use
+the correct value in vcpu->arch.cr2 instead.
+
+The value in vcpu->arch.cr2 is sync'd to vmcb02 shortly before a VMRUN
+of L2, and sync'd back to vcpu->arch.cr2 shortly after. The value are
+only out-of-sync in two cases: after save+restore, and after a #PF is
+injected into L2. In either case, if a #VMEXIT to L1 is synthesized
+before L2 runs, using the value in vmcb02 would be incorrect.
+
+After save+restore, the value of CR2 is restored by KVM_SET_SREGS into
+vcpu->arch.cr2. It is not reflect in vmcb02 until a VMRUN of L2. Before
+that, it holds whatever was in vmcb02 before restore, which would be
+zero on a new vCPU that never ran nested. If a #VMEXIT to L1 is
+synthesized before L2 ever runs, using vcpu->arch.cr2 to update vmcb12
+is the right thing to do.
+
+The #PF injection case is more nuanced.  Although the APM is a bit
+unclear about when CR2 is written during a #PF, the SDM is more clear:
+
+       Processors update CR2 whenever a page fault is detected. If a
+       second page fault occurs while an earlier page fault is being
+       delivered, the faulting linear address of the second fault will
+       overwrite the contents of CR2 (replacing the previous address).
+       These updates to CR2 occur even if the page fault results in a
+       double fault or occurs during the delivery of a double fault.
+
+KVM injecting the exception surely counts as the #PF being "detected".
+More importantly, when an exception is injected into L2 at the time of a
+synthesized #VMEXIT, KVM updates exit_int_info in vmcb12 accordingly,
+such that an L1 hypervisor can re-inject the exception. If CR2 is not
+written at that point, the L1 hypervisor have no way of correctly
+re-injecting the #PF. Hence, if a #VMEXIT to L1 is synthesized after
+the #PF is injected into L2 but before it actually runs, using
+vcpu->arch.cr2 to update vmcb12 is also the right thing to do.
+
+Note that KVM does _not_ update vcpu->arch.cr2 when a #PF is pending for
+L2, only when it is injected. The distinction is important, because only
+injected (but not intercepted) exceptions are propagated to L1 through
+exit_int_info. It would be incorrect to update CR2 in vmcb12 for a
+pending #PF, as L1 would perceive an updated CR2 value with no #PF.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20260203201010.1871056-1-yosry.ahmed@linux.dev
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1114,7 +1114,7 @@ int nested_svm_vmexit(struct vcpu_svm *s
+       vmcb12->save.efer   = svm->vcpu.arch.efer;
+       vmcb12->save.cr0    = kvm_read_cr0(vcpu);
+       vmcb12->save.cr3    = kvm_read_cr3(vcpu);
+-      vmcb12->save.cr2    = vmcb02->save.cr2;
++      vmcb12->save.cr2    = vcpu->arch.cr2;
+       vmcb12->save.cr4    = svm->vcpu.arch.cr4;
+       vmcb12->save.rflags = kvm_get_rflags(vcpu);
+       vmcb12->save.rip    = kvm_rip_read(vcpu);
diff --git a/queue-6.18/kvm-svm-add-missing-save-restore-handling-of-lbr-msrs.patch b/queue-6.18/kvm-svm-add-missing-save-restore-handling-of-lbr-msrs.patch

new file mode 100644 (file)

index 0000000..b62a35c
--- /dev/null
+++ b/queue-6.18/kvm-svm-add-missing-save-restore-handling-of-lbr-msrs.patch
@@ -0,0 +1,133 @@
+From 3700f0788da6acf73b2df56690f4b201aa4aefd2 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:33:57 +0000
+Subject: KVM: SVM: Add missing save/restore handling of LBR MSRs
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 3700f0788da6acf73b2df56690f4b201aa4aefd2 upstream.
+
+MSR_IA32_DEBUGCTLMSR and LBR MSRs are currently not enumerated by
+KVM_GET_MSR_INDEX_LIST, and LBR MSRs cannot be set with KVM_SET_MSRS. So
+save/restore is completely broken.
+
+Fix it by adding the MSRs to msrs_to_save_base, and allowing writes to
+LBR MSRs from userspace only (as they are read-only MSRs) if LBR
+virtualization is enabled.  Additionally, to correctly restore L1's LBRs
+while L2 is running, make sure the LBRs are copied from the captured
+VMCB01 save area in svm_copy_vmrun_state().
+
+Note, for VMX, this also fixes a flaw where MSR_IA32_DEBUGCTLMSR isn't
+reported as an MSR to save/restore.
+
+Note #2, over-reporting MSR_IA32_LASTxxx on Intel is ok, as KVM already
+handles unsupported reads and writes thanks to commit b5e2fec0ebc3 ("KVM:
+Ignore DEBUGCTL MSRs with no effect") (kvm_do_msr_access() will morph the
+unsupported userspace write into a nop).
+
+Fixes: 24e09cbf480a ("KVM: SVM: enable LBR virtualization")
+Cc: stable@vger.kernel.org
+Reported-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-4-yosry@kernel.org
+[sean: guard with lbrv checks, massage changelog]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    5 +++++
+ arch/x86/kvm/svm/svm.c    |   42 +++++++++++++++++++++++++++++++++++++-----
+ arch/x86/kvm/x86.c        |    3 +++
+ 3 files changed, 45 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1057,6 +1057,11 @@ void svm_copy_vmrun_state(struct vmcb_sa
+               to_save->isst_addr = from_save->isst_addr;
+               to_save->ssp = from_save->ssp;
+       }
++
++      if (kvm_cpu_cap_has(X86_FEATURE_LBRV)) {
++              svm_copy_lbrs(to_save, from_save);
++              to_save->dbgctl &= ~DEBUGCTL_RESERVED_BITS;
++      }
+ }
+ 
+ void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -2712,19 +2712,19 @@ static int svm_get_msr(struct kvm_vcpu *
+               msr_info->data = svm->tsc_aux;
+               break;
+       case MSR_IA32_DEBUGCTLMSR:
+-              msr_info->data = svm->vmcb->save.dbgctl;
++              msr_info->data = lbrv ? svm->vmcb->save.dbgctl : 0;
+               break;
+       case MSR_IA32_LASTBRANCHFROMIP:
+-              msr_info->data = svm->vmcb->save.br_from;
++              msr_info->data = lbrv ? svm->vmcb->save.br_from : 0;
+               break;
+       case MSR_IA32_LASTBRANCHTOIP:
+-              msr_info->data = svm->vmcb->save.br_to;
++              msr_info->data = lbrv ? svm->vmcb->save.br_to : 0;
+               break;
+       case MSR_IA32_LASTINTFROMIP:
+-              msr_info->data = svm->vmcb->save.last_excp_from;
++              msr_info->data = lbrv ? svm->vmcb->save.last_excp_from : 0;
+               break;
+       case MSR_IA32_LASTINTTOIP:
+-              msr_info->data = svm->vmcb->save.last_excp_to;
++              msr_info->data = lbrv ? svm->vmcb->save.last_excp_to : 0;
+               break;
+       case MSR_VM_HSAVE_PA:
+               msr_info->data = svm->nested.hsave_msr;
+@@ -2999,6 +2999,38 @@ static int svm_set_msr(struct kvm_vcpu *
+               vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
+               svm_update_lbrv(vcpu);
+               break;
++      case MSR_IA32_LASTBRANCHFROMIP:
++              if (!lbrv)
++                      return KVM_MSR_RET_UNSUPPORTED;
++              if (!msr->host_initiated)
++                      return 1;
++              svm->vmcb->save.br_from = data;
++              vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
++              break;
++      case MSR_IA32_LASTBRANCHTOIP:
++              if (!lbrv)
++                      return KVM_MSR_RET_UNSUPPORTED;
++              if (!msr->host_initiated)
++                      return 1;
++              svm->vmcb->save.br_to = data;
++              vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
++              break;
++      case MSR_IA32_LASTINTFROMIP:
++              if (!lbrv)
++                      return KVM_MSR_RET_UNSUPPORTED;
++              if (!msr->host_initiated)
++                      return 1;
++              svm->vmcb->save.last_excp_from = data;
++              vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
++              break;
++      case MSR_IA32_LASTINTTOIP:
++              if (!lbrv)
++                      return KVM_MSR_RET_UNSUPPORTED;
++              if (!msr->host_initiated)
++                      return 1;
++              svm->vmcb->save.last_excp_to = data;
++              vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
++              break;
+       case MSR_VM_HSAVE_PA:
+               /*
+                * Old kernels did not validate the value written to
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -350,6 +350,9 @@ static const u32 msrs_to_save_base[] = {
+       MSR_IA32_U_CET, MSR_IA32_S_CET,
+       MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP,
+       MSR_IA32_PL3_SSP, MSR_IA32_INT_SSP_TAB,
++      MSR_IA32_DEBUGCTLMSR,
++      MSR_IA32_LASTBRANCHFROMIP, MSR_IA32_LASTBRANCHTOIP,
++      MSR_IA32_LASTINTFROMIP, MSR_IA32_LASTINTTOIP,
+ };
+ 
+ static const u32 msrs_to_save_pmu[] = {
diff --git a/queue-6.18/kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch b/queue-6.18/kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch

new file mode 100644 (file)

index 0000000..020060c
--- /dev/null
+++ b/queue-6.18/kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch
@@ -0,0 +1,42 @@
+From d5bde6113aed8315a2bfe708730b721be9c2f48b Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 18 Feb 2026 15:09:51 -0800
+Subject: KVM: SVM: Explicitly mark vmcb01 dirty after modifying VMCB intercepts
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit d5bde6113aed8315a2bfe708730b721be9c2f48b upstream.
+
+When reacting to an intercept update, explicitly mark vmcb01's intercepts
+dirty, as KVM always initially operates on vmcb01, and nested_svm_vmexit()
+isn't guaranteed to mark VMCB_INTERCEPTS as dirty.  I.e. if L2 is active,
+KVM will modify the intercepts for L1, but might not mark them as dirty
+before the next VMRUN of L1.
+
+Fixes: 116a0a23676e ("KVM: SVM: Add clean-bit for intercetps, tsc-offset and pause filter count")
+Cc: stable@vger.kernel.org
+Reviewed-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20260218230958.2877682-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -129,11 +129,13 @@ void recalc_intercepts(struct vcpu_svm *
+       struct vmcb_ctrl_area_cached *g;
+       unsigned int i;
+ 
+-      vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
++      vmcb_mark_dirty(svm->vmcb01.ptr, VMCB_INTERCEPTS);
+ 
+       if (!is_guest_mode(&svm->vcpu))
+               return;
+ 
++      vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
++
+       c = &svm->vmcb->control;
+       h = &svm->vmcb01.ptr->control;
+       g = &svm->nested.ctl;
diff --git a/queue-6.18/kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch b/queue-6.18/kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch

new file mode 100644 (file)

index 0000000..800791e
--- /dev/null
+++ b/queue-6.18/kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch
@@ -0,0 +1,36 @@
+From d99df02ff427f461102230f9c5b90a6c64ee8e23 Mon Sep 17 00:00:00 2001
+From: Kevin Cheng <chengkev@google.com>
+Date: Sat, 28 Feb 2026 03:33:26 +0000
+Subject: KVM: SVM: Inject #UD for INVLPGA if EFER.SVME=0
+
+From: Kevin Cheng <chengkev@google.com>
+
+commit d99df02ff427f461102230f9c5b90a6c64ee8e23 upstream.
+
+INVLPGA should cause a #UD when EFER.SVME is not set. Add a check to
+properly inject #UD when EFER.SVME=0.
+
+Fixes: ff092385e828 ("KVM: SVM: Implement INVLPGA")
+Cc: stable@vger.kernel.org
+Signed-off-by: Kevin Cheng <chengkev@google.com>
+Reviewed-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20260228033328.2285047-3-chengkev@google.com
+[sean: tag for stable@]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -2289,6 +2289,9 @@ static int invlpga_interception(struct k
+       gva_t gva = kvm_rax_read(vcpu);
+       u32 asid = kvm_rcx_read(vcpu);
+ 
++      if (nested_svm_check_permissions(vcpu))
++              return 1;
++
+       /* FIXME: Handle an address size prefix. */
+       if (!is_long_mode(vcpu))
+               gva = (u32)gva;
diff --git a/queue-6.18/kvm-svm-switch-svm_copy_lbrs-to-a-macro.patch b/queue-6.18/kvm-svm-switch-svm_copy_lbrs-to-a-macro.patch

new file mode 100644 (file)

index 0000000..1b036cb
--- /dev/null
+++ b/queue-6.18/kvm-svm-switch-svm_copy_lbrs-to-a-macro.patch
@@ -0,0 +1,94 @@
+From 361dbe8173c460a2bf8aee23920f6c2dbdcabb94 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:33:56 +0000
+Subject: KVM: SVM: Switch svm_copy_lbrs() to a macro
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 361dbe8173c460a2bf8aee23920f6c2dbdcabb94 upstream.
+
+In preparation for using svm_copy_lbrs() with 'struct vmcb_save_area'
+without a containing 'struct vmcb', and later even 'struct
+vmcb_save_area_cached', make it a macro.
+
+Macros are generally not preferred compared to functions, mainly due to
+type-safety. However, in this case it seems like having a simple macro
+copying a few fields is better than copy-pasting the same 5 lines of
+code in different places.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-3-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    8 ++++----
+ arch/x86/kvm/svm/svm.c    |    9 ---------
+ arch/x86/kvm/svm/svm.h    |   10 +++++++++-
+ 3 files changed, 13 insertions(+), 14 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -679,10 +679,10 @@ static void nested_vmcb02_prepare_save(s
+                * Reserved bits of DEBUGCTL are ignored.  Be consistent with
+                * svm_set_msr's definition of reserved bits.
+                */
+-              svm_copy_lbrs(vmcb02, vmcb12);
++              svm_copy_lbrs(&vmcb02->save, &vmcb12->save);
+               vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS;
+       } else {
+-              svm_copy_lbrs(vmcb02, vmcb01);
++              svm_copy_lbrs(&vmcb02->save, &vmcb01->save);
+       }
+       vmcb_mark_dirty(vmcb02, VMCB_LBR);
+       svm_update_lbrv(&svm->vcpu);
+@@ -1189,9 +1189,9 @@ int nested_svm_vmexit(struct vcpu_svm *s
+ 
+       if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
+                    (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+-              svm_copy_lbrs(vmcb12, vmcb02);
++              svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
+       } else {
+-              svm_copy_lbrs(vmcb01, vmcb02);
++              svm_copy_lbrs(&vmcb01->save, &vmcb02->save);
+               vmcb_mark_dirty(vmcb01, VMCB_LBR);
+       }
+ 
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -804,15 +804,6 @@ static void svm_recalc_msr_intercepts(st
+        */
+ }
+ 
+-void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
+-{
+-      to_vmcb->save.dbgctl            = from_vmcb->save.dbgctl;
+-      to_vmcb->save.br_from           = from_vmcb->save.br_from;
+-      to_vmcb->save.br_to             = from_vmcb->save.br_to;
+-      to_vmcb->save.last_excp_from    = from_vmcb->save.last_excp_from;
+-      to_vmcb->save.last_excp_to      = from_vmcb->save.last_excp_to;
+-}
+-
+ static void __svm_enable_lbrv(struct kvm_vcpu *vcpu)
+ {
+       to_svm(vcpu)->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -688,8 +688,16 @@ static inline void *svm_vcpu_alloc_msrpm
+       return svm_alloc_permissions_map(MSRPM_SIZE, GFP_KERNEL_ACCOUNT);
+ }
+ 
++#define svm_copy_lbrs(to, from)                                       \
++do {                                                          \
++      (to)->dbgctl            = (from)->dbgctl;               \
++      (to)->br_from           = (from)->br_from;              \
++      (to)->br_to             = (from)->br_to;                \
++      (to)->last_excp_from    = (from)->last_excp_from;       \
++      (to)->last_excp_to      = (from)->last_excp_to;         \
++} while (0)
++
+ void svm_vcpu_free_msrpm(void *msrpm);
+-void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
+ void svm_enable_lbrv(struct kvm_vcpu *vcpu);
+ void svm_update_lbrv(struct kvm_vcpu *vcpu);
+ 
diff --git a/queue-6.18/kvm-x86-defer-non-architectural-deliver-of-exception-payload-to-userspace-read.patch b/queue-6.18/kvm-x86-defer-non-architectural-deliver-of-exception-payload-to-userspace-read.patch

new file mode 100644 (file)

index 0000000..3522bef
--- /dev/null
+++ b/queue-6.18/kvm-x86-defer-non-architectural-deliver-of-exception-payload-to-userspace-read.patch
@@ -0,0 +1,175 @@
+From d0ad1b05bbe6f8da159a4dfb6692b3b7ce30ccc8 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 17 Feb 2026 16:54:38 -0800
+Subject: KVM: x86: Defer non-architectural deliver of exception payload to userspace read
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit d0ad1b05bbe6f8da159a4dfb6692b3b7ce30ccc8 upstream.
+
+When attempting to play nice with userspace that hasn't enabled
+KVM_CAP_EXCEPTION_PAYLOAD, defer KVM's non-architectural delivery of the
+payload until userspace actually reads relevant vCPU state, and more
+importantly, force delivery of the payload in *all* paths where userspace
+saves relevant vCPU state, not just KVM_GET_VCPU_EVENTS.
+
+Ignoring userspace save/restore for the moment, delivering the payload
+before the exception is injected is wrong regardless of whether L1 or L2
+is running.  To make matters even more confusing, the flaw *currently*
+being papered over by the !is_guest_mode() check isn't even the same bug
+that commit da998b46d244 ("kvm: x86: Defer setting of CR2 until #PF
+delivery") was trying to avoid.
+
+At the time of commit da998b46d244, KVM didn't correctly handle exception
+intercepts, as KVM would wait until VM-Entry into L2 was imminent to check
+if the queued exception should morph to a nested VM-Exit.  I.e. KVM would
+deliver the payload to L2 and then synthesize a VM-Exit into L1.  But the
+payload was only the most blatant issue, e.g. waiting to check exception
+intercepts would also lead to KVM incorrectly escalating a
+should-be-intercepted #PF into a #DF.
+
+That underlying bug was eventually fixed by commit 7709aba8f716 ("KVM: x86:
+Morph pending exceptions to pending VM-Exits at queue time"), but in the
+interim, commit a06230b62b89 ("KVM: x86: Deliver exception payload on
+KVM_GET_VCPU_EVENTS") came along and subtly added another dependency on
+the !is_guest_mode() check.
+
+While not recorded in the changelog, the motivation for deferring the
+!exception_payload_enabled delivery was to fix a flaw where a synthesized
+MTF (Monitor Trap Flag) VM-Exit would drop a pending #DB and clobber DR6.
+On a VM-Exit, VMX CPUs save pending #DB information into the VMCS, which
+is emulated by KVM in nested_vmx_update_pending_dbg() by grabbing the
+payload from the queue/pending exception.  I.e. prematurely delivering the
+payload would cause the pending #DB to not be recorded in the VMCS, and of
+course, clobber L2's DR6 as seen by L1.
+
+Jumping back to save+restore, the quirked behavior of forcing delivery of
+the payload only works if userspace does KVM_GET_VCPU_EVENTS *before*
+CR2 or DR6 is saved, i.e. before KVM_GET_SREGS{,2} and KVM_GET_DEBUGREGS.
+E.g. if userspace does KVM_GET_SREGS before KVM_GET_VCPU_EVENTS, then the
+CR2 saved by userspace won't contain the payload for the exception save by
+KVM_GET_VCPU_EVENTS.
+
+Deliberately deliver the payload in the store_regs() path, as it's the
+least awful option even though userspace may not be doing save+restore.
+Because if userspace _is_ doing save restore, it could elide KVM_GET_SREGS
+knowing that SREGS were already saved when the vCPU exited.
+
+Link: https://lore.kernel.org/all/20200207103608.110305-1-oupton@google.com
+Cc: Yosry Ahmed <yosry.ahmed@linux.dev>
+Cc: stable@vger.kernel.org
+Reviewed-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Tested-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20260218005438.2619063-1-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |   62 +++++++++++++++++++++++++++++++++--------------------
+ 1 file changed, 39 insertions(+), 23 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -861,9 +861,6 @@ static void kvm_multiple_exception(struc
+               vcpu->arch.exception.error_code = error_code;
+               vcpu->arch.exception.has_payload = has_payload;
+               vcpu->arch.exception.payload = payload;
+-              if (!is_guest_mode(vcpu))
+-                      kvm_deliver_exception_payload(vcpu,
+-                                                    &vcpu->arch.exception);
+               return;
+       }
+ 
+@@ -5555,18 +5552,8 @@ static int kvm_vcpu_ioctl_x86_set_mce(st
+       return 0;
+ }
+ 
+-static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
+-                                             struct kvm_vcpu_events *events)
++static struct kvm_queued_exception *kvm_get_exception_to_save(struct kvm_vcpu *vcpu)
+ {
+-      struct kvm_queued_exception *ex;
+-
+-      process_nmi(vcpu);
+-
+-#ifdef CONFIG_KVM_SMM
+-      if (kvm_check_request(KVM_REQ_SMI, vcpu))
+-              process_smi(vcpu);
+-#endif
+-
+       /*
+        * KVM's ABI only allows for one exception to be migrated.  Luckily,
+        * the only time there can be two queued exceptions is if there's a
+@@ -5577,21 +5564,46 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_
+       if (vcpu->arch.exception_vmexit.pending &&
+           !vcpu->arch.exception.pending &&
+           !vcpu->arch.exception.injected)
+-              ex = &vcpu->arch.exception_vmexit;
+-      else
+-              ex = &vcpu->arch.exception;
++              return &vcpu->arch.exception_vmexit;
++
++      return &vcpu->arch.exception;
++}
++
++static void kvm_handle_exception_payload_quirk(struct kvm_vcpu *vcpu)
++{
++      struct kvm_queued_exception *ex = kvm_get_exception_to_save(vcpu);
+ 
+       /*
+-       * In guest mode, payload delivery should be deferred if the exception
+-       * will be intercepted by L1, e.g. KVM should not modifying CR2 if L1
+-       * intercepts #PF, ditto for DR6 and #DBs.  If the per-VM capability,
+-       * KVM_CAP_EXCEPTION_PAYLOAD, is not set, userspace may or may not
+-       * propagate the payload and so it cannot be safely deferred.  Deliver
+-       * the payload if the capability hasn't been requested.
++       * If KVM_CAP_EXCEPTION_PAYLOAD is disabled, then (prematurely) deliver
++       * the pending exception payload when userspace saves *any* vCPU state
++       * that interacts with exception payloads to avoid breaking userspace.
++       *
++       * Architecturally, KVM must not deliver an exception payload until the
++       * exception is actually injected, e.g. to avoid losing pending #DB
++       * information (which VMX tracks in the VMCS), and to avoid clobbering
++       * state if the exception is never injected for whatever reason.  But
++       * if KVM_CAP_EXCEPTION_PAYLOAD isn't enabled, then userspace may or
++       * may not propagate the payload across save+restore, and so KVM can't
++       * safely defer delivery of the payload.
+        */
+       if (!vcpu->kvm->arch.exception_payload_enabled &&
+           ex->pending && ex->has_payload)
+               kvm_deliver_exception_payload(vcpu, ex);
++}
++
++static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
++                                             struct kvm_vcpu_events *events)
++{
++      struct kvm_queued_exception *ex = kvm_get_exception_to_save(vcpu);
++
++      process_nmi(vcpu);
++
++#ifdef CONFIG_KVM_SMM
++      if (kvm_check_request(KVM_REQ_SMI, vcpu))
++              process_smi(vcpu);
++#endif
++
++      kvm_handle_exception_payload_quirk(vcpu);
+ 
+       memset(events, 0, sizeof(*events));
+ 
+@@ -5770,6 +5782,8 @@ static int kvm_vcpu_ioctl_x86_get_debugr
+           vcpu->arch.guest_state_protected)
+               return -EINVAL;
+ 
++      kvm_handle_exception_payload_quirk(vcpu);
++
+       memset(dbgregs, 0, sizeof(*dbgregs));
+ 
+       BUILD_BUG_ON(ARRAY_SIZE(vcpu->arch.db) != ARRAY_SIZE(dbgregs->db));
+@@ -12125,6 +12139,8 @@ static void __get_sregs_common(struct kv
+       if (vcpu->arch.guest_state_protected)
+               goto skip_protected_regs;
+ 
++      kvm_handle_exception_payload_quirk(vcpu);
++
+       kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
+       kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
+       kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
diff --git a/queue-6.18/loongarch-kvm-use-csr_crmd_plv-in-kvm_arch_vcpu_in_kernel.patch b/queue-6.18/loongarch-kvm-use-csr_crmd_plv-in-kvm_arch_vcpu_in_kernel.patch

new file mode 100644 (file)

index 0000000..5bab460
--- /dev/null
+++ b/queue-6.18/loongarch-kvm-use-csr_crmd_plv-in-kvm_arch_vcpu_in_kernel.patch
@@ -0,0 +1,33 @@
+From da773ea3f59032f659bfc4c450ca86e384786168 Mon Sep 17 00:00:00 2001
+From: Tao Cui <cuitao@kylinos.cn>
+Date: Thu, 9 Apr 2026 18:56:36 +0800
+Subject: LoongArch: KVM: Use CSR_CRMD_PLV in kvm_arch_vcpu_in_kernel()
+
+From: Tao Cui <cuitao@kylinos.cn>
+
+commit da773ea3f59032f659bfc4c450ca86e384786168 upstream.
+
+The function reads LOONGARCH_CSR_CRMD but uses CSR_PRMD_PPLV to
+extract the privilege level. While both masks have the same value
+(0x3), CSR_CRMD_PLV is the semantically correct constant for CRMD.
+
+Cc: stable@vger.kernel.org
+Reviewed-by: Bibo Mao <maobibo@loongson.cn>
+Signed-off-by: Tao Cui <cuitao@kylinos.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kvm/vcpu.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/loongarch/kvm/vcpu.c
++++ b/arch/loongarch/kvm/vcpu.c
+@@ -375,7 +375,7 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_
+       val = gcsr_read(LOONGARCH_CSR_CRMD);
+       preempt_enable();
+ 
+-      return (val & CSR_PRMD_PPLV) == PLV_KERN;
++      return (val & CSR_CRMD_PLV) == PLV_KERN;
+ }
+ 
+ #ifdef CONFIG_GUEST_PERF_EVENTS
diff --git a/queue-6.18/mm-damon-core-use-time_in_range_open-for-damos-quota-window-start.patch b/queue-6.18/mm-damon-core-use-time_in_range_open-for-damos-quota-window-start.patch

new file mode 100644 (file)

index 0000000..bfb9f65
--- /dev/null
+++ b/queue-6.18/mm-damon-core-use-time_in_range_open-for-damos-quota-window-start.patch
@@ -0,0 +1,56 @@
+From 049a57421dd67a28c45ae7e92c36df758033e5fa Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Sun, 29 Mar 2026 08:23:05 -0700
+Subject: mm/damon/core: use time_in_range_open() for damos quota window start
+
+From: SeongJae Park <sj@kernel.org>
+
+commit 049a57421dd67a28c45ae7e92c36df758033e5fa upstream.
+
+damos_adjust_quota() uses time_after_eq() to show if it is time to start a
+new quota charge window, comparing the current jiffies and the scheduled
+next charge window start time.  If it is, the next charge window start
+time is updated and the new charge window starts.
+
+The time check and next window start time update is skipped while the
+scheme is deactivated by the watermarks.  Let's suppose the deactivation
+is kept more than LONG_MAX jiffies (assuming CONFIG_HZ of 250, more than
+99 days in 32 bit systems and more than one billion years in 64 bit
+systems), resulting in having the jiffies larger than the next charge
+window start time + LONG_MAX.  Then, the time_after_eq() call can return
+false until another LONG_MAX jiffies are passed.
+
+This means the scheme can continue working after being reactivated by the
+watermarks.  But, soon, the quota will be exceeded and the scheme will
+again effectively stop working until the next charge window starts.
+Because the current charge window is extended to up to LONG_MAX jiffies,
+however, it will look like it stopped unexpectedly and indefinitely, from
+the user's perspective.
+
+Fix this by using !time_in_range_open() instead.
+
+The issue was discovered [1] by sashiko.
+
+Link: https://lore.kernel.org/20260329152306.45796-1-sj@kernel.org
+Link: https://lore.kernel.org/20260324040722.57944-1-sj@kernel.org [1]
+Fixes: ee801b7dd782 ("mm/damon/schemes: activate schemes based on a watermarks mechanism")
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org> # 5.16.x
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/core.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/mm/damon/core.c
++++ b/mm/damon/core.c
+@@ -2179,7 +2179,8 @@ static void damos_adjust_quota(struct da
+       }
+ 
+       /* New charge window starts */
+-      if (time_after_eq(jiffies, quota->charged_from +
++      if (!time_in_range_open(jiffies, quota->charged_from,
++                              quota->charged_from +
+                               msecs_to_jiffies(quota->reset_interval))) {
+               if (quota->esz && quota->charged_sz >= quota->esz)
+                       s->stat.qt_exceeds++;
diff --git a/queue-6.18/mm-damon-core-validate-damos_quota_goal-nid-for-node_mem_-used-free-_bp.patch b/queue-6.18/mm-damon-core-validate-damos_quota_goal-nid-for-node_mem_-used-free-_bp.patch

new file mode 100644 (file)

index 0000000..d1e16fe
--- /dev/null
+++ b/queue-6.18/mm-damon-core-validate-damos_quota_goal-nid-for-node_mem_-used-free-_bp.patch
@@ -0,0 +1,79 @@
+From 40250b2dded0604a112be605f3828700d80ad7c2 Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Sat, 28 Mar 2026 21:38:59 -0700
+Subject: mm/damon/core: validate damos_quota_goal->nid for node_mem_{used,free}_bp
+
+From: SeongJae Park <sj@kernel.org>
+
+commit 40250b2dded0604a112be605f3828700d80ad7c2 upstream.
+
+Patch series "mm/damon/core: validate damos_quota_goal->nid".
+
+node_mem[cg]_{used,free}_bp DAMOS quota goals receive the node id.  The
+node id is used for si_meminfo_node() and NODE_DATA() without proper
+validation.  As a result, privileged users can trigger an out of bounds
+memory access using DAMON_SYSFS.  Fix the issues.
+
+The issue was originally reported [1] with a fix by another author.  The
+original author announced [2] that they will stop working including the
+fix that was still in the review stage.  Hence I'm restarting this.
+
+
+This patch (of 2):
+
+Users can set damos_quota_goal->nid with arbitrary value for
+node_mem_{used,free}_bp.  But DAMON core is using those for
+si_meminfo_node() without the validation of the value.  This can result in
+out of bounds memory access.  The issue can actually triggered using DAMON
+user-space tool (damo), like below.
+
+    $ sudo ./damo start --damos_action stat \
+       --damos_quota_goal node_mem_used_bp 50% -1 \
+       --damos_quota_interval 1s
+    $ sudo dmesg
+    [...]
+    [   65.565986] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000098
+
+Fix this issue by adding the validation of the given node.  If an invalid
+node id is given, it returns 0% for used memory ratio, and 100% for free
+memory ratio.
+
+Link: https://lore.kernel.org/20260329043902.46163-2-sj@kernel.org
+Link: https://lore.kernel.org/20260325073034.140353-1-objecting@objecting.org [1]
+Link: https://lore.kernel.org/20260327040924.68553-1-sj@kernel.org [2]
+Fixes: 0e1c773b501f ("mm/damon/core: introduce damos quota goal metrics for memory node utilization")
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org> # 6.16.x
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/core.c |   12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/mm/damon/core.c
++++ b/mm/damon/core.c
+@@ -2038,12 +2038,24 @@ static inline u64 damos_get_some_mem_psi
+ #endif        /* CONFIG_PSI */
+ 
+ #ifdef CONFIG_NUMA
++static bool invalid_mem_node(int nid)
++{
++      return nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY);
++}
++
+ static __kernel_ulong_t damos_get_node_mem_bp(
+               struct damos_quota_goal *goal)
+ {
+       struct sysinfo i;
+       __kernel_ulong_t numerator;
+ 
++      if (invalid_mem_node(goal->nid)) {
++              if (goal->metric == DAMOS_QUOTA_NODE_MEM_USED_BP)
++                      return 0;
++              else    /* DAMOS_QUOTA_NODE_MEM_FREE_BP */
++                      return 10000;
++      }
++
+       si_meminfo_node(&i, goal->nid);
+       if (goal->metric == DAMOS_QUOTA_NODE_MEM_USED_BP)
+               numerator = i.totalram - i.freeram;
diff --git a/queue-6.18/mm-damon-stat-fix-memory-leak-on-damon_start-failure-in-damon_stat_start.patch b/queue-6.18/mm-damon-stat-fix-memory-leak-on-damon_start-failure-in-damon_stat_start.patch

new file mode 100644 (file)

index 0000000..92d550e
--- /dev/null
+++ b/queue-6.18/mm-damon-stat-fix-memory-leak-on-damon_start-failure-in-damon_stat_start.patch
@@ -0,0 +1,41 @@
+From e04ed278d25bf15769800bf6e35c6737f137186f Mon Sep 17 00:00:00 2001
+From: Jackie Liu <liuyun01@kylinos.cn>
+Date: Tue, 31 Mar 2026 18:15:53 +0800
+Subject: mm/damon/stat: fix memory leak on damon_start() failure in damon_stat_start()
+
+From: Jackie Liu <liuyun01@kylinos.cn>
+
+commit e04ed278d25bf15769800bf6e35c6737f137186f upstream.
+
+Destroy the DAMON context and reset the global pointer when damon_start()
+fails.  Otherwise, the context allocated by damon_stat_build_ctx() is
+leaked, and the stale damon_stat_context pointer will be overwritten on
+the next enable attempt, making the old allocation permanently
+unreachable.
+
+Link: https://lore.kernel.org/20260331101553.88422-1-liu.yun@linux.dev
+Fixes: 369c415e6073 ("mm/damon: introduce DAMON_STAT module")
+Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
+Reviewed-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org> # 6.17.x
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/stat.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/mm/damon/stat.c
++++ b/mm/damon/stat.c
+@@ -247,8 +247,11 @@ static int damon_stat_start(void)
+       if (!damon_stat_context)
+               return -ENOMEM;
+       err = damon_start(&damon_stat_context, 1, true);
+-      if (err)
++      if (err) {
++              damon_destroy_ctx(damon_stat_context);
++              damon_stat_context = NULL;
+               return err;
++      }
+ 
+       damon_stat_last_refresh_jiffies = jiffies;
+       call_control.data = damon_stat_context;
diff --git a/queue-6.18/mm-mempolicy-fix-memory-leaks-in-weighted_interleave_auto_store.patch b/queue-6.18/mm-mempolicy-fix-memory-leaks-in-weighted_interleave_auto_store.patch

new file mode 100644 (file)

index 0000000..7306440
--- /dev/null
+++ b/queue-6.18/mm-mempolicy-fix-memory-leaks-in-weighted_interleave_auto_store.patch
@@ -0,0 +1,76 @@
+From 6fae274ce0e3109cbbc4c18b354eaace1f0af7d7 Mon Sep 17 00:00:00 2001
+From: Jackie Liu <liuyun01@kylinos.cn>
+Date: Wed, 1 Apr 2026 08:57:02 +0800
+Subject: mm/mempolicy: fix memory leaks in weighted_interleave_auto_store()
+
+From: Jackie Liu <liuyun01@kylinos.cn>
+
+commit 6fae274ce0e3109cbbc4c18b354eaace1f0af7d7 upstream.
+
+weighted_interleave_auto_store() fetches old_wi_state inside the if
+(!input) block only.  This causes two memory leaks:
+
+1. When a user writes "false" and the current mode is already manual,
+   the function returns early without freeing the freshly allocated
+   new_wi_state.
+
+2. When a user writes "true", old_wi_state stays NULL because the
+   fetch is skipped entirely. The old state is then overwritten by
+   rcu_assign_pointer() but never freed, since the cleanup path is
+   gated on old_wi_state being non-NULL. A user can trigger this
+   repeatedly by writing "1" in a loop.
+
+Fix both leaks by moving the old_wi_state fetch before the input check,
+making it unconditional.  This also allows a unified early return for both
+"true" and "false" when the requested mode matches the current mode.
+
+Link: https://lore.kernel.org/20260401005702.7096-1-liu.yun@linux.dev
+Link: https://sashiko.dev/#/patchset/20260331100740.84906-1-liu.yun@linux.dev
+Fixes: e341f9c3c841 ("mm/mempolicy: Weighted Interleave Auto-tuning")
+Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
+Reviewed-by: Joshua Hahn <joshua.hahnjy@gmail.com>
+Reviewed by: Donet Tom <donettom@linux.ibm.com>
+Cc: Gregory Price <gourry@gourry.net>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Byungchul Park <byungchul@sk.com>
+Cc: David Hildenbrand <david@kernel.org>
+Cc: <stable@vger.kernel.org> # v6.16+
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mempolicy.c |   23 ++++++++++++-----------
+ 1 file changed, 12 insertions(+), 11 deletions(-)
+
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -3636,18 +3636,19 @@ static ssize_t weighted_interleave_auto_
+               new_wi_state->iw_table[i] = 1;
+ 
+       mutex_lock(&wi_state_lock);
+-      if (!input) {
+-              old_wi_state = rcu_dereference_protected(wi_state,
+-                                      lockdep_is_held(&wi_state_lock));
+-              if (!old_wi_state)
+-                      goto update_wi_state;
+-              if (input == old_wi_state->mode_auto) {
+-                      mutex_unlock(&wi_state_lock);
+-                      return count;
+-              }
++      old_wi_state = rcu_dereference_protected(wi_state,
++                              lockdep_is_held(&wi_state_lock));
++
++      if (old_wi_state && input == old_wi_state->mode_auto) {
++              mutex_unlock(&wi_state_lock);
++              kfree(new_wi_state);
++              return count;
++      }
+ 
+-              memcpy(new_wi_state->iw_table, old_wi_state->iw_table,
+-                                             nr_node_ids * sizeof(u8));
++      if (!input) {
++              if (old_wi_state)
++                      memcpy(new_wi_state->iw_table, old_wi_state->iw_table,
++                                                     nr_node_ids * sizeof(u8));
+               goto update_wi_state;
+       }
+ 
diff --git a/queue-6.18/mm-vmalloc-take-vmap_purge_lock-in-shrinker.patch b/queue-6.18/mm-vmalloc-take-vmap_purge_lock-in-shrinker.patch

new file mode 100644 (file)

index 0000000..5dd71df
--- /dev/null
+++ b/queue-6.18/mm-vmalloc-take-vmap_purge_lock-in-shrinker.patch
@@ -0,0 +1,42 @@
+From ec05f51f1e65bce95528543eb73fda56fd201d94 Mon Sep 17 00:00:00 2001
+From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
+Date: Mon, 13 Apr 2026 21:26:46 +0200
+Subject: mm/vmalloc: take vmap_purge_lock in shrinker
+
+From: Uladzislau Rezki (Sony) <urezki@gmail.com>
+
+commit ec05f51f1e65bce95528543eb73fda56fd201d94 upstream.
+
+decay_va_pool_node() can be invoked concurrently from two paths:
+__purge_vmap_area_lazy() when pools are being purged, and the shrinker via
+vmap_node_shrink_scan().
+
+However, decay_va_pool_node() is not safe to run concurrently, and the
+shrinker path currently lacks serialization, leading to races and possible
+leaks.
+
+Protect decay_va_pool_node() by taking vmap_purge_lock in the shrinker
+path to ensure serialization with purge users.
+
+Link: https://lore.kernel.org/20260413192646.14683-1-urezki@gmail.com
+Fixes: 7679ba6b36db ("mm: vmalloc: add a shrinker to drain vmap pools")
+Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
+Reviewed-by: Baoquan He <baoquan.he@linux.dev>
+Cc: chenyichong <chenyichong@uniontech.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/vmalloc.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -5255,6 +5255,7 @@ vmap_node_shrink_scan(struct shrinker *s
+ {
+       struct vmap_node *vn;
+ 
++      guard(mutex)(&vmap_purge_lock);
+       for_each_vmap_node(vn)
+               decay_va_pool_node(vn, true);
+ 
diff --git a/queue-6.18/mmc-block-use-single-block-write-in-retry.patch b/queue-6.18/mmc-block-use-single-block-write-in-retry.patch

new file mode 100644 (file)

index 0000000..b654f94
--- /dev/null
+++ b/queue-6.18/mmc-block-use-single-block-write-in-retry.patch
@@ -0,0 +1,92 @@
+From c7c6d4f5103864f73ee3a78bfd6da241f84197dd Mon Sep 17 00:00:00 2001
+From: Bin Liu <b-liu@ti.com>
+Date: Wed, 25 Mar 2026 08:49:47 -0500
+Subject: mmc: block: use single block write in retry
+
+From: Bin Liu <b-liu@ti.com>
+
+commit c7c6d4f5103864f73ee3a78bfd6da241f84197dd upstream.
+
+Due to errata i2493[0], multi-block write would still fail in retries.
+
+With i2493, the MMC interface has the potential of write failures when
+issuing multi-block writes operating in HS200 mode with excessive IO
+supply noise.
+
+While the errata provides guidance in hardware design and layout to
+minimize the IO supply noise, in theory the write failure cannot be
+resolved in hardware. The software solution to ensure the data integrity
+is to add minimum 5us delay between block writes. Single-block write is
+the practical way to introduce the delay.
+
+This patch reuses recovery_mode flag, and switches to single-block
+write in retry when multi-block write fails. It covers both CQE and
+non-CQE cases.
+
+[0] https://www.ti.com/lit/pdf/sprz582
+Cc: stable@vger.kernel.org
+Suggested-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Bin Liu <b-liu@ti.com>
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/core/block.c |   12 ++++++++++--
+ drivers/mmc/core/queue.h |    3 +++
+ 2 files changed, 13 insertions(+), 2 deletions(-)
+
+--- a/drivers/mmc/core/block.c
++++ b/drivers/mmc/core/block.c
+@@ -1404,6 +1404,9 @@ static void mmc_blk_data_prep(struct mmc
+                   rq_data_dir(req) == WRITE &&
+                   (md->flags & MMC_BLK_REL_WR);
+ 
++      if (mqrq->flags & MQRQ_XFER_SINGLE_BLOCK)
++              recovery_mode = 1;
++
+       memset(brq, 0, sizeof(struct mmc_blk_request));
+ 
+       mmc_crypto_prepare_req(mqrq);
+@@ -1543,10 +1546,13 @@ static void mmc_blk_cqe_complete_rq(stru
+               err = 0;
+ 
+       if (err) {
+-              if (mqrq->retries++ < MMC_CQE_RETRIES)
++              if (mqrq->retries++ < MMC_CQE_RETRIES) {
++                      if (rq_data_dir(req) == WRITE)
++                              mqrq->flags |= MQRQ_XFER_SINGLE_BLOCK;
+                       blk_mq_requeue_request(req, true);
+-              else
++              } else {
+                       blk_mq_end_request(req, BLK_STS_IOERR);
++              }
+       } else if (mrq->data) {
+               if (blk_update_request(req, BLK_STS_OK, mrq->data->bytes_xfered))
+                       blk_mq_requeue_request(req, true);
+@@ -2088,6 +2094,8 @@ static void mmc_blk_mq_complete_rq(struc
+       } else if (!blk_rq_bytes(req)) {
+               __blk_mq_end_request(req, BLK_STS_IOERR);
+       } else if (mqrq->retries++ < MMC_MAX_RETRIES) {
++              if (rq_data_dir(req) == WRITE)
++                      mqrq->flags |= MQRQ_XFER_SINGLE_BLOCK;
+               blk_mq_requeue_request(req, true);
+       } else {
+               if (mmc_card_removed(mq->card))
+--- a/drivers/mmc/core/queue.h
++++ b/drivers/mmc/core/queue.h
+@@ -61,6 +61,8 @@ enum mmc_drv_op {
+       MMC_DRV_OP_GET_EXT_CSD,
+ };
+ 
++#define       MQRQ_XFER_SINGLE_BLOCK          BIT(0)
++
+ struct mmc_queue_req {
+       struct mmc_blk_request  brq;
+       struct scatterlist      *sg;
+@@ -69,6 +71,7 @@ struct mmc_queue_req {
+       void                    *drv_op_data;
+       unsigned int            ioc_count;
+       int                     retries;
++      u32                     flags;
+ };
+ 
+ struct mmc_queue {
diff --git a/queue-6.18/mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch b/queue-6.18/mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch

new file mode 100644 (file)

index 0000000..60e956e
--- /dev/null
+++ b/queue-6.18/mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch
@@ -0,0 +1,81 @@
+From 6546a49bbe656981d99a389195560999058c89c4 Mon Sep 17 00:00:00 2001
+From: Shawn Lin <shawn.lin@rock-chips.com>
+Date: Wed, 8 Apr 2026 15:18:49 +0800
+Subject: mmc: sdhci-of-dwcmshc: Disable clock before DLL configuration
+
+From: Shawn Lin <shawn.lin@rock-chips.com>
+
+commit 6546a49bbe656981d99a389195560999058c89c4 upstream.
+
+According to the ASIC design recommendations, the clock must be
+disabled before operating the DLL to prevent glitches that could
+affect the internal digital logic. In extreme cases, failing to
+do so may cause the controller to malfunction completely.
+
+Adds a step to disable the clock before DLL configuration and
+re-enables it at the end.
+
+Fixes: 08f3dff799d4 ("mmc: sdhci-of-dwcmshc: add rockchip platform support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
+Acked-by: Adrian Hunter <adrian.hunter@intel.com>
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/host/sdhci-of-dwcmshc.c |   19 ++++++++++++++++---
+ 1 file changed, 16 insertions(+), 3 deletions(-)
+
+--- a/drivers/mmc/host/sdhci-of-dwcmshc.c
++++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
+@@ -607,12 +607,15 @@ static void dwcmshc_rk3568_set_clock(str
+       extra &= ~BIT(0);
+       sdhci_writel(host, extra, reg);
+ 
++      /* Disable clock while config DLL */
++      sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
++
+       if (clock <= 52000000) {
+               if (host->mmc->ios.timing == MMC_TIMING_MMC_HS200 ||
+                   host->mmc->ios.timing == MMC_TIMING_MMC_HS400) {
+                       dev_err(mmc_dev(host->mmc),
+                               "Can't reduce the clock below 52MHz in HS200/HS400 mode");
+-                      return;
++                      goto enable_clk;
+               }
+ 
+               /*
+@@ -632,7 +635,7 @@ static void dwcmshc_rk3568_set_clock(str
+                       DLL_STRBIN_DELAY_NUM_SEL |
+                       DLL_STRBIN_DELAY_NUM_DEFAULT << DLL_STRBIN_DELAY_NUM_OFFSET;
+               sdhci_writel(host, extra, DWCMSHC_EMMC_DLL_STRBIN);
+-              return;
++              goto enable_clk;
+       }
+ 
+       /* Reset DLL */
+@@ -659,7 +662,7 @@ static void dwcmshc_rk3568_set_clock(str
+                                500 * USEC_PER_MSEC);
+       if (err) {
+               dev_err(mmc_dev(host->mmc), "DLL lock timeout!\n");
+-              return;
++              goto enable_clk;
+       }
+ 
+       extra = 0x1 << 16 | /* tune clock stop en */
+@@ -692,6 +695,16 @@ static void dwcmshc_rk3568_set_clock(str
+               DLL_STRBIN_TAPNUM_DEFAULT |
+               DLL_STRBIN_TAPNUM_FROM_SW;
+       sdhci_writel(host, extra, DWCMSHC_EMMC_DLL_STRBIN);
++
++enable_clk:
++      /*
++       * The sdclk frequency select bits in SDHCI_CLOCK_CONTROL are not functional
++       * on Rockchip's SDHCI implementation. Instead, the clock frequency is fully
++       * controlled via external clk provider by calling clk_set_rate(). Consequently,
++       * passing 0 to sdhci_enable_clk() only re-enables the already-configured clock,
++       * which matches the hardware's actual behavior.
++       */
++      sdhci_enable_clk(host, 0);
+ }
+ 
+ static void rk35xx_sdhci_reset(struct sdhci_host *host, u8 mask)
diff --git a/queue-6.18/pwm-imx-tpm-count-the-number-of-enabled-channels-in-probe.patch b/queue-6.18/pwm-imx-tpm-count-the-number-of-enabled-channels-in-probe.patch

new file mode 100644 (file)

index 0000000..8c5c478
--- /dev/null
+++ b/queue-6.18/pwm-imx-tpm-count-the-number-of-enabled-channels-in-probe.patch
@@ -0,0 +1,59 @@
+From 3962c24f2d14e8a7f8a23f56b7ce320523947342 Mon Sep 17 00:00:00 2001
+From: "Viorel Suman (OSS)" <viorel.suman@oss.nxp.com>
+Date: Wed, 11 Mar 2026 14:33:09 +0200
+Subject: pwm: imx-tpm: Count the number of enabled channels in probe
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Viorel Suman (OSS) <viorel.suman@oss.nxp.com>
+
+commit 3962c24f2d14e8a7f8a23f56b7ce320523947342 upstream.
+
+On a soft reset TPM PWM IP may preserve its internal state from previous
+runtime, therefore on a subsequent OS boot and driver probe
+"enable_count" value and TPM PWM IP internal channels "enabled" states
+may get unaligned. In consequence on a suspend/resume cycle the call "if
+(--tpm->enable_count == 0)" may lead to "enable_count" overflow the
+system being blocked from entering suspend due to:
+
+   if (tpm->enable_count > 0)
+       return -EBUSY;
+
+Fix the problem by counting the enabled channels in probe function.
+
+Signed-off-by: Viorel Suman (OSS) <viorel.suman@oss.nxp.com>
+Fixes: 738a1cfec2ed ("pwm: Add i.MX TPM PWM driver support")
+Link: https://patch.msgid.link/20260311123309.348904-1-viorel.suman@oss.nxp.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Uwe Kleine-König <ukleinek@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pwm/pwm-imx-tpm.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/drivers/pwm/pwm-imx-tpm.c
++++ b/drivers/pwm/pwm-imx-tpm.c
+@@ -352,7 +352,7 @@ static int pwm_imx_tpm_probe(struct plat
+       struct clk *clk;
+       void __iomem *base;
+       int ret;
+-      unsigned int npwm;
++      unsigned int i, npwm;
+       u32 val;
+ 
+       base = devm_platform_ioremap_resource(pdev, 0);
+@@ -382,6 +382,13 @@ static int pwm_imx_tpm_probe(struct plat
+ 
+       mutex_init(&tpm->lock);
+ 
++      /* count the enabled channels */
++      for (i = 0; i < npwm; ++i) {
++              val = readl(base + PWM_IMX_TPM_CnSC(i));
++              if (FIELD_GET(PWM_IMX_TPM_CnSC_ELS, val))
++                      ++tpm->enable_count;
++      }
++
+       ret = devm_pwmchip_add(&pdev->dev, chip);
+       if (ret)
+               return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n");
diff --git a/queue-6.18/randomize_kstack-maintain-kstack_offset-per-task.patch b/queue-6.18/randomize_kstack-maintain-kstack_offset-per-task.patch

new file mode 100644 (file)

index 0000000..a1bf1ad
--- /dev/null
+++ b/queue-6.18/randomize_kstack-maintain-kstack_offset-per-task.patch
@@ -0,0 +1,155 @@
+From 37beb42560165869838e7d91724f3e629db64129 Mon Sep 17 00:00:00 2001
+From: Ryan Roberts <ryan.roberts@arm.com>
+Date: Tue, 3 Mar 2026 15:08:38 +0000
+Subject: randomize_kstack: Maintain kstack_offset per task
+
+From: Ryan Roberts <ryan.roberts@arm.com>
+
+commit 37beb42560165869838e7d91724f3e629db64129 upstream.
+
+kstack_offset was previously maintained per-cpu, but this caused a
+couple of issues. So let's instead make it per-task.
+
+Issue 1: add_random_kstack_offset() and choose_random_kstack_offset()
+expected and required to be called with interrupts and preemption
+disabled so that it could manipulate per-cpu state. But arm64, loongarch
+and risc-v are calling them with interrupts and preemption enabled. I
+don't _think_ this causes any functional issues, but it's certainly
+unexpected and could lead to manipulating the wrong cpu's state, which
+could cause a minor performance degradation due to bouncing the cache
+lines. By maintaining the state per-task those functions can safely be
+called in preemptible context.
+
+Issue 2: add_random_kstack_offset() is called before executing the
+syscall and expands the stack using a previously chosen random offset.
+choose_random_kstack_offset() is called after executing the syscall and
+chooses and stores a new random offset for the next syscall. With
+per-cpu storage for this offset, an attacker could force cpu migration
+during the execution of the syscall and prevent the offset from being
+updated for the original cpu such that it is predictable for the next
+syscall on that cpu. By maintaining the state per-task, this problem
+goes away because the per-task random offset is updated after the
+syscall regardless of which cpu it is executing on.
+
+Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall")
+Closes: https://lore.kernel.org/all/dd8c37bc-795f-4c7a-9086-69e584d8ab24@arm.com/
+Cc: stable@vger.kernel.org
+Acked-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
+Link: https://patch.msgid.link/20260303150840.3789438-2-ryan.roberts@arm.com
+Signed-off-by: Kees Cook <kees@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/randomize_kstack.h |   26 +++++++++++++++-----------
+ include/linux/sched.h            |    4 ++++
+ init/main.c                      |    1 -
+ kernel/fork.c                    |    2 ++
+ 4 files changed, 21 insertions(+), 12 deletions(-)
+
+--- a/include/linux/randomize_kstack.h
++++ b/include/linux/randomize_kstack.h
+@@ -9,7 +9,6 @@
+ 
+ DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
+                        randomize_kstack_offset);
+-DECLARE_PER_CPU(u32, kstack_offset);
+ 
+ /*
+  * Do not use this anywhere else in the kernel. This is used here because
+@@ -50,15 +49,14 @@ DECLARE_PER_CPU(u32, kstack_offset);
+  * add_random_kstack_offset - Increase stack utilization by previously
+  *                          chosen random offset
+  *
+- * This should be used in the syscall entry path when interrupts and
+- * preempt are disabled, and after user registers have been stored to
+- * the stack. For testing the resulting entropy, please see:
+- * tools/testing/selftests/lkdtm/stack-entropy.sh
++ * This should be used in the syscall entry path after user registers have been
++ * stored to the stack. Preemption may be enabled. For testing the resulting
++ * entropy, please see: tools/testing/selftests/lkdtm/stack-entropy.sh
+  */
+ #define add_random_kstack_offset() do {                                       \
+       if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
+                               &randomize_kstack_offset)) {            \
+-              u32 offset = raw_cpu_read(kstack_offset);               \
++              u32 offset = current->kstack_offset;                    \
+               u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset));   \
+               /* Keep allocation even after "ptr" loses scope. */     \
+               asm volatile("" :: "r"(ptr) : "memory");                \
+@@ -69,9 +67,9 @@ DECLARE_PER_CPU(u32, kstack_offset);
+  * choose_random_kstack_offset - Choose the random offset for the next
+  *                             add_random_kstack_offset()
+  *
+- * This should only be used during syscall exit when interrupts and
+- * preempt are disabled. This position in the syscall flow is done to
+- * frustrate attacks from userspace attempting to learn the next offset:
++ * This should only be used during syscall exit. Preemption may be enabled. This
++ * position in the syscall flow is done to frustrate attacks from userspace
++ * attempting to learn the next offset:
+  * - Maximize the timing uncertainty visible from userspace: if the
+  *   offset is chosen at syscall entry, userspace has much more control
+  *   over the timing between choosing offsets. "How long will we be in
+@@ -85,14 +83,20 @@ DECLARE_PER_CPU(u32, kstack_offset);
+ #define choose_random_kstack_offset(rand) do {                                \
+       if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
+                               &randomize_kstack_offset)) {            \
+-              u32 offset = raw_cpu_read(kstack_offset);               \
++              u32 offset = current->kstack_offset;                    \
+               offset = ror32(offset, 5) ^ (rand);                     \
+-              raw_cpu_write(kstack_offset, offset);                   \
++              current->kstack_offset = offset;                        \
+       }                                                               \
+ } while (0)
++
++static inline void random_kstack_task_init(struct task_struct *tsk)
++{
++      tsk->kstack_offset = 0;
++}
+ #else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
+ #define add_random_kstack_offset()            do { } while (0)
+ #define choose_random_kstack_offset(rand)     do { } while (0)
++#define random_kstack_task_init(tsk)          do { } while (0)
+ #endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
+ 
+ #endif
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1614,6 +1614,10 @@ struct task_struct {
+       unsigned long                   prev_lowest_stack;
+ #endif
+ 
++#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
++      u32                             kstack_offset;
++#endif
++
+ #ifdef CONFIG_X86_MCE
+       void __user                     *mce_vaddr;
+       __u64                           mce_kflags;
+--- a/init/main.c
++++ b/init/main.c
+@@ -830,7 +830,6 @@ static inline void initcall_debug_enable
+ #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
+ DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
+                          randomize_kstack_offset);
+-DEFINE_PER_CPU(u32, kstack_offset);
+ 
+ static int __init early_randomize_kstack_offset(char *buf)
+ {
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -95,6 +95,7 @@
+ #include <linux/thread_info.h>
+ #include <linux/kstack_erase.h>
+ #include <linux/kasan.h>
++#include <linux/randomize_kstack.h>
+ #include <linux/scs.h>
+ #include <linux/io_uring.h>
+ #include <linux/bpf.h>
+@@ -2191,6 +2192,7 @@ __latent_entropy struct task_struct *cop
+       if (retval)
+               goto bad_fork_cleanup_io;
+ 
++      random_kstack_task_init(p);
+       stackleak_task_init(p);
+ 
+       if (pid != &init_struct_pid) {
diff --git a/queue-6.18/rtc-ntxec-fix-of-node-reference-imbalance.patch b/queue-6.18/rtc-ntxec-fix-of-node-reference-imbalance.patch

new file mode 100644 (file)

index 0000000..06f8ab7
--- /dev/null
+++ b/queue-6.18/rtc-ntxec-fix-of-node-reference-imbalance.patch
@@ -0,0 +1,41 @@
+From 30c4d2f26bb3538c328035cea2e6265c8320539e Mon Sep 17 00:00:00 2001
+From: Johan Hovold <johan@kernel.org>
+Date: Tue, 7 Apr 2026 14:27:17 +0200
+Subject: rtc: ntxec: fix OF node reference imbalance
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Johan Hovold <johan@kernel.org>
+
+commit 30c4d2f26bb3538c328035cea2e6265c8320539e upstream.
+
+The driver reuses the OF node of the parent multi-function device but
+fails to take another reference to balance the one dropped by the
+platform bus code when unbinding the MFD and deregistering the child
+devices.
+
+Fix this by using the intended helper for reusing OF nodes.
+
+Fixes: 435af89786c6 ("rtc: New driver for RTC in Netronix embedded controller")
+Cc: stable@vger.kernel.org     # 5.13
+Cc: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
+Signed-off-by: Johan Hovold <johan@kernel.org>
+Link: https://patch.msgid.link/20260407122717.2676774-1-johan@kernel.org
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/rtc/rtc-ntxec.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/rtc/rtc-ntxec.c
++++ b/drivers/rtc/rtc-ntxec.c
+@@ -110,7 +110,7 @@ static int ntxec_rtc_probe(struct platfo
+       struct rtc_device *dev;
+       struct ntxec_rtc *rtc;
+ 
+-      pdev->dev.of_node = pdev->dev.parent->of_node;
++      device_set_of_node_from_dev(&pdev->dev, pdev->dev.parent);
+ 
+       rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
+       if (!rtc)
diff --git a/queue-6.18/series b/queue-6.18/series

index 89e68be9e3fe4af2a6a6a8b5eaab12cedee211be..e00146668e55d3fad7b9ba0ef48211320176576d 100644 (file)
--- a/queue-6.18/series
+++ b/queue-6.18/series
@@ -140,3 +140,54 @@ hwmon-isl28022-fix-integer-overflow-in-power-calculation-on-32-bit.patch
  fs-prepare-for-adding-lsm-blob-to-backing_file.patch
  drm-amd-fix-set-but-not-used-warnings.patch
  asoc-intel-avs-replace-strcmp-with-sysfs_streq.patch
+hwmon-pt5161l-fix-bugs-in-pt5161l_read_block_data.patch
+randomize_kstack-maintain-kstack_offset-per-task.patch
+mmc-block-use-single-block-write-in-retry.patch
+mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch
+arm64-dts-ti-am62-verdin-enable-pullup-for-emmc-data-pins.patch
+crypto-qat-fix-irq-cleanup-on-6xxx-probe-failure.patch
+xfs-start-gc-on-zonegc_low_space-attribute-updates.patch
+xfs-fix-a-resource-leak-in-xfs_alloc_buftarg.patch
+firmware-google-framebuffer-do-not-unregister-platform-device.patch
+crypto-talitos-fix-sec1-32k-ahash-request-limitation.patch
+crypto-talitos-rename-first-last-to-first_desc-last_desc.patch
+pwm-imx-tpm-count-the-number-of-enabled-channels-in-probe.patch
+tpm2-sessions-fix-missing-tpm_buf_destroy-in-tpm2_read_public.patch
+tpm-fix-auth-session-leak-in-tpm2_get_random-error-path.patch
+tpm-use-kfree_sensitive-to-free-auth-session-in-tpm_dev_release.patch
+tpm-tpm_tis-add-error-logging-for-data-transfer.patch
+tpm-tpm_tis-stop-transmit-if-retries-are-exhausted.patch
+rtc-ntxec-fix-of-node-reference-imbalance.patch
+mm-vmalloc-take-vmap_purge_lock-in-shrinker.patch
+mm-mempolicy-fix-memory-leaks-in-weighted_interleave_auto_store.patch
+mm-damon-stat-fix-memory-leak-on-damon_start-failure-in-damon_stat_start.patch
+mm-damon-core-validate-damos_quota_goal-nid-for-node_mem_-used-free-_bp.patch
+mm-damon-core-use-time_in_range_open-for-damos-quota-window-start.patch
+userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch
+loongarch-kvm-use-csr_crmd_plv-in-kvm_arch_vcpu_in_kernel.patch
+kvm-x86-defer-non-architectural-deliver-of-exception-payload-to-userspace-read.patch
+kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch
+kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch
+kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch
+kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch
+kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch
+kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch
+kvm-nsvm-always-use-nextrip-as-vmcb02-s-nextrip-after-first-l2-vmrun.patch
+kvm-nsvm-delay-stuffing-l2-s-current-rip-into-nextrip-until-vcpu-run.patch
+kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch
+kvm-nsvm-avoid-clearing-vmcb_lbr-in-vmcb12.patch
+kvm-nsvm-delay-setting-soft-irq-rip-tracking-fields-until-vcpu-run.patch
+kvm-svm-switch-svm_copy_lbrs-to-a-macro.patch
+kvm-svm-add-missing-save-restore-handling-of-lbr-msrs.patch
+kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch
+kvm-nsvm-refactor-checking-lbrv-enablement-in-vmcb12-into-a-helper.patch
+kvm-nsvm-refactor-writing-vmcb12-on-nested-vmexit-as-a-helper.patch
+kvm-nsvm-triple-fault-if-mapping-vmcb12-fails-on-nested-vmexit.patch
+kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch
+kvm-nsvm-clear-eventinj-fields-in-vmcb12-on-nested-vmexit.patch
+kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch
+kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch
+kvm-nsvm-drop-the-non-architectural-consistency-check-for-np_enable.patch
+kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch
+kvm-nsvm-raise-ud-if-unhandled-vmmcall-isn-t-intercepted-by-l1.patch
+kvm-nsvm-always-intercept-vmmcall-when-l2-is-active.patch
diff --git a/queue-6.18/tpm-fix-auth-session-leak-in-tpm2_get_random-error-path.patch b/queue-6.18/tpm-fix-auth-session-leak-in-tpm2_get_random-error-path.patch

new file mode 100644 (file)

index 0000000..e584da7
--- /dev/null
+++ b/queue-6.18/tpm-fix-auth-session-leak-in-tpm2_get_random-error-path.patch
@@ -0,0 +1,47 @@
+From 666c1a2ca603d8314231200bf8bbb3a81bd64c6b Mon Sep 17 00:00:00 2001
+From: Gunnar Kudrjavets <gunnarku@amazon.com>
+Date: Wed, 8 Apr 2026 12:00:27 +0300
+Subject: tpm: Fix auth session leak in tpm2_get_random() error path
+
+From: Gunnar Kudrjavets <gunnarku@amazon.com>
+
+commit 666c1a2ca603d8314231200bf8bbb3a81bd64c6b upstream.
+
+When tpm_buf_fill_hmac_session() fails inside the do-while loop in
+tpm2_get_random(), the function returns directly after destroying the
+buffer, without ending the auth session via tpm2_end_auth_session().
+
+This leaks the TPM auth session resource. All other error paths within
+the loop correctly reach the 'out' label which calls both
+tpm_buf_destroy() and tpm2_end_auth_session().
+
+Fix this by replacing the early return with a goto to the existing 'out'
+label, which already handles both cleanup operations. The redundant
+tpm_buf_destroy() call is removed since 'out' takes care of it.
+
+Cc: stable@vger.kernel.org # v6.19+
+Fixes: 6e9722e9a7bf ("tpm2-sessions: Fix out of range indexing in name_size")
+Signed-off-by: Gunnar Kudrjavets <gunnarku@amazon.com>
+Reviewed-by: Justinien Bouron <jbouron@amazon.com>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/tpm2-cmd.c |    6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/drivers/char/tpm/tpm2-cmd.c
++++ b/drivers/char/tpm/tpm2-cmd.c
+@@ -275,10 +275,8 @@ int tpm2_get_random(struct tpm_chip *chi
+                                               NULL, 0);
+               tpm_buf_append_u16(&buf, num_bytes);
+               err = tpm_buf_fill_hmac_session(chip, &buf);
+-              if (err) {
+-                      tpm_buf_destroy(&buf);
+-                      return err;
+-              }
++              if (err)
++                      goto out;
+ 
+               err = tpm_transmit_cmd(chip, &buf,
+                                      offsetof(struct tpm2_get_random_out,
diff --git a/queue-6.18/tpm-tpm_tis-add-error-logging-for-data-transfer.patch b/queue-6.18/tpm-tpm_tis-add-error-logging-for-data-transfer.patch

new file mode 100644 (file)

index 0000000..2a7914c
--- /dev/null
+++ b/queue-6.18/tpm-tpm_tis-add-error-logging-for-data-transfer.patch
@@ -0,0 +1,42 @@
+From 0471921e2d1043dcc6de5cffb49dd37709521abe Mon Sep 17 00:00:00 2001
+From: Jacqueline Wong <jacqwong@google.com>
+Date: Wed, 15 Apr 2026 16:00:05 +0000
+Subject: tpm: tpm_tis: add error logging for data transfer
+
+From: Jacqueline Wong <jacqwong@google.com>
+
+commit 0471921e2d1043dcc6de5cffb49dd37709521abe upstream.
+
+Add logging to more easily determine reason for transmit failure
+
+Cc: stable@vger.kernel.org # v6.6+
+Fixes: 280db21e153d8 ("tpm_tis: Resend command to recover from data transfer errors")
+Signed-off-by: Jacqueline Wong <jacqwong@google.com>
+Signed-off-by: Jordan Hand <jhand@google.com>
+Link: https://lore.kernel.org/r/20260415160006.2275325-2-jacqwong@google.com
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/tpm_tis_core.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/char/tpm/tpm_tis_core.c
++++ b/drivers/char/tpm/tpm_tis_core.c
+@@ -472,6 +472,8 @@ static int tpm_tis_send_data(struct tpm_
+               status = tpm_tis_status(chip);
+               if (!itpm && (status & TPM_STS_DATA_EXPECT) == 0) {
+                       rc = -EIO;
++                      dev_err(&chip->dev, "TPM_STS_DATA_EXPECT should be set. sts = 0x%08x\n",
++                              status);
+                       goto out_err;
+               }
+       }
+@@ -492,6 +494,8 @@ static int tpm_tis_send_data(struct tpm_
+       status = tpm_tis_status(chip);
+       if (!itpm && (status & TPM_STS_DATA_EXPECT) != 0) {
+               rc = -EIO;
++              dev_err(&chip->dev, "TPM_STS_DATA_EXPECT should be unset. sts = 0x%08x\n",
++                      status);
+               goto out_err;
+       }
+ 
diff --git a/queue-6.18/tpm-tpm_tis-stop-transmit-if-retries-are-exhausted.patch b/queue-6.18/tpm-tpm_tis-stop-transmit-if-retries-are-exhausted.patch

new file mode 100644 (file)

index 0000000..104dea3
--- /dev/null
+++ b/queue-6.18/tpm-tpm_tis-stop-transmit-if-retries-are-exhausted.patch
@@ -0,0 +1,48 @@
+From 949692da7211572fac419b2986b6abc0cd1aeb76 Mon Sep 17 00:00:00 2001
+From: Jacqueline Wong <jacqwong@google.com>
+Date: Wed, 15 Apr 2026 16:00:06 +0000
+Subject: tpm: tpm_tis: stop transmit if retries are exhausted
+
+From: Jacqueline Wong <jacqwong@google.com>
+
+commit 949692da7211572fac419b2986b6abc0cd1aeb76 upstream.
+
+tpm_tis_send_main() will attempt to retry sending data TPM_RETRY times.
+Currently, if those retries are exhausted, the driver will attempt to
+call execute. The TPM will be in the wrong state, leading to the
+operation simply timing out.
+
+Instead, if there is still an error after retries are exhausted, return
+that error immediately.
+
+Cc: stable@vger.kernel.org # v6.6+
+Fixes: 280db21e153d8 ("tpm_tis: Resend command to recover from data transfer errors")
+Signed-off-by: Jacqueline Wong <jacqwong@google.com>
+Signed-off-by: Jordan Hand <jhand@google.com>
+Link: https://lore.kernel.org/r/20260415160006.2275325-3-jacqwong@google.com
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/tpm_tis_core.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/char/tpm/tpm_tis_core.c
++++ b/drivers/char/tpm/tpm_tis_core.c
+@@ -557,11 +557,16 @@ static int tpm_tis_send_main(struct tpm_
+                       break;
+               else if (rc != -EAGAIN && rc != -EIO)
+                       /* Data transfer failed, not recoverable */
+-                      return rc;
++                      goto out_err;
+ 
+               usleep_range(priv->timeout_min, priv->timeout_max);
+       }
+ 
++      if (rc == -EAGAIN || rc == -EIO) {
++              dev_err(&chip->dev, "Exhausted %d tpm_tis_send_data retries\n", TPM_RETRY);
++              goto out_err;
++      }
++
+       /* go and do it */
+       rc = tpm_tis_write8(priv, TPM_STS(priv->locality), TPM_STS_GO);
+       if (rc < 0)
diff --git a/queue-6.18/tpm-use-kfree_sensitive-to-free-auth-session-in-tpm_dev_release.patch b/queue-6.18/tpm-use-kfree_sensitive-to-free-auth-session-in-tpm_dev_release.patch

new file mode 100644 (file)

index 0000000..626a0f1
--- /dev/null
+++ b/queue-6.18/tpm-use-kfree_sensitive-to-free-auth-session-in-tpm_dev_release.patch
@@ -0,0 +1,44 @@
+From c424d2664f08c77f08b4580b5f0cbaabf7c229b2 Mon Sep 17 00:00:00 2001
+From: Gunnar Kudrjavets <gunnarku@amazon.com>
+Date: Thu, 9 Apr 2026 17:20:54 +0000
+Subject: tpm: Use kfree_sensitive() to free auth session in tpm_dev_release()
+
+From: Gunnar Kudrjavets <gunnarku@amazon.com>
+
+commit c424d2664f08c77f08b4580b5f0cbaabf7c229b2 upstream.
+
+tpm_dev_release() uses plain kfree() to free chip->auth, which contains
+sensitive cryptographic material including HMAC session keys, nonces,
+and passphrase data (struct tpm2_auth).
+
+Every other code path that frees this structure uses kfree_sensitive()
+to zero the memory before releasing it: both tpm2_end_auth_session()
+and tpm_buf_check_hmac_response() do so. The tpm_dev_release() path
+is the only one that does not, leaving key material in freed slab
+memory until it is eventually overwritten.
+
+Use kfree_sensitive() for consistency with the rest of the driver and
+to ensure session keys are scrubbed during device teardown.
+
+Cc: stable@vger.kernel.org # v6.10+
+Fixes: 699e3efd6c64 ("tpm: Add HMAC session start and end functions")
+Signed-off-by: Gunnar Kudrjavets <gunnarku@amazon.com>
+Reviewed-by: Justinien Bouron <jbouron@amazon.com>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/tpm-chip.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/char/tpm/tpm-chip.c
++++ b/drivers/char/tpm/tpm-chip.c
+@@ -283,7 +283,7 @@ static void tpm_dev_release(struct devic
+       kfree(chip->work_space.context_buf);
+       kfree(chip->work_space.session_buf);
+ #ifdef CONFIG_TCG_TPM2_HMAC
+-      kfree(chip->auth);
++      kfree_sensitive(chip->auth);
+ #endif
+       kfree(chip);
+ }
diff --git a/queue-6.18/tpm2-sessions-fix-missing-tpm_buf_destroy-in-tpm2_read_public.patch b/queue-6.18/tpm2-sessions-fix-missing-tpm_buf_destroy-in-tpm2_read_public.patch

new file mode 100644 (file)

index 0000000..4b6f220
--- /dev/null
+++ b/queue-6.18/tpm2-sessions-fix-missing-tpm_buf_destroy-in-tpm2_read_public.patch
@@ -0,0 +1,57 @@
+From f0f75a3d98b7959a8677b6363e23190f3018636b Mon Sep 17 00:00:00 2001
+From: Gunnar Kudrjavets <gunnarku@amazon.com>
+Date: Wed, 15 Apr 2026 03:00:03 +0300
+Subject: tpm2-sessions: Fix missing tpm_buf_destroy() in tpm2_read_public()
+
+From: Gunnar Kudrjavets <gunnarku@amazon.com>
+
+commit f0f75a3d98b7959a8677b6363e23190f3018636b upstream.
+
+tpm2_read_public() calls tpm_buf_init() but fails to call
+tpm_buf_destroy() on two exit paths, leaking a page allocation:
+
+1. When name_size() returns an error (unrecognized hash algorithm),
+   the function returns directly without destroying the buffer.
+
+2. On the success path, the buffer is never destroyed before
+   returning.
+
+All other error paths in the function correctly call
+tpm_buf_destroy() before returning.
+
+Fix both by adding the missing tpm_buf_destroy() calls.
+
+Cc: stable@vger.kernel.org # v6.19+
+Fixes: bda1cbf73c6e ("tpm2-sessions: Fix tpm2_read_public range checks")
+Signed-off-by: Gunnar Kudrjavets <gunnarku@amazon.com>
+Reviewed-by: Justinien Bouron <jbouron@amazon.com>
+Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/tpm2-sessions.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/char/tpm/tpm2-sessions.c
++++ b/drivers/char/tpm/tpm2-sessions.c
+@@ -203,8 +203,10 @@ static int tpm2_read_public(struct tpm_c
+       rc = tpm_buf_read_u16(&buf, &offset);
+       name_size_alg = name_size(&buf.data[offset]);
+ 
+-      if (name_size_alg < 0)
++      if (name_size_alg < 0) {
++              tpm_buf_destroy(&buf);
+               return name_size_alg;
++      }
+ 
+       if (rc != name_size_alg) {
+               tpm_buf_destroy(&buf);
+@@ -217,6 +219,7 @@ static int tpm2_read_public(struct tpm_c
+       }
+ 
+       memcpy(name, &buf.data[offset], rc);
++      tpm_buf_destroy(&buf);
+       return name_size_alg;
+ }
+ #endif /* CONFIG_TCG_TPM2_HMAC */
diff --git a/queue-6.18/userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch b/queue-6.18/userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch

new file mode 100644 (file)

index 0000000..76e9890
--- /dev/null
+++ b/queue-6.18/userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch
@@ -0,0 +1,60 @@
+From 161ce69c2c89781784b945d8e281ff2da9dede9c Mon Sep 17 00:00:00 2001
+From: "Denis M. Karpov" <komlomal@gmail.com>
+Date: Thu, 9 Apr 2026 13:33:45 +0300
+Subject: userfaultfd: allow registration of ranges below mmap_min_addr
+
+From: Denis M. Karpov <komlomal@gmail.com>
+
+commit 161ce69c2c89781784b945d8e281ff2da9dede9c upstream.
+
+The current implementation of validate_range() in fs/userfaultfd.c
+performs a hard check against mmap_min_addr.  This is redundant because
+UFFDIO_REGISTER operates on memory ranges that must already be backed by a
+VMA.
+
+Enforcing mmap_min_addr or capability checks again in userfaultfd is
+unnecessary and prevents applications like binary compilers from using
+UFFD for valid memory regions mapped by application.
+
+Remove the redundant check for mmap_min_addr.
+
+We started using UFFD instead of the classic mprotect approach in the
+binary translator to track application writes.  During development, we
+encountered this bug.  The translator cannot control where the translated
+application chooses to map its memory and if the app requires a
+low-address area, UFFD fails, whereas mprotect would work just fine.  I
+believe this is a genuine logic bug rather than an improvement, and I
+would appreciate including the fix in stable.
+
+Link: https://lore.kernel.org/20260409103345.15044-1-komlomal@gmail.com
+Fixes: 86039bd3b4e6 ("userfaultfd: add new syscall to provide memory externalization")
+Signed-off-by: Denis M. Karpov <komlomal@gmail.com>
+Reviewed-by: Lorenzo Stoakes <ljs@kernel.org>
+Acked-by: Harry Yoo (Oracle) <harry@kernel.org>
+Reviewed-by: Pedro Falcato <pfalcato@suse.de>
+Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
+Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Jann Horn <jannh@google.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/userfaultfd.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/fs/userfaultfd.c
++++ b/fs/userfaultfd.c
+@@ -1219,8 +1219,6 @@ static __always_inline int validate_unal
+               return -EINVAL;
+       if (!len)
+               return -EINVAL;
+-      if (start < mmap_min_addr)
+-              return -EINVAL;
+       if (start >= task_size)
+               return -EINVAL;
+       if (len > task_size - start)
diff --git a/queue-6.18/xfs-fix-a-resource-leak-in-xfs_alloc_buftarg.patch b/queue-6.18/xfs-fix-a-resource-leak-in-xfs_alloc_buftarg.patch

new file mode 100644 (file)

index 0000000..0339693
--- /dev/null
+++ b/queue-6.18/xfs-fix-a-resource-leak-in-xfs_alloc_buftarg.patch
@@ -0,0 +1,32 @@
+From 29a7b2614357393b176ef06ba5bc3ff5afc8df69 Mon Sep 17 00:00:00 2001
+From: Haoxiang Li <lihaoxiang@isrc.iscas.ac.cn>
+Date: Wed, 1 Apr 2026 12:02:41 +0800
+Subject: xfs: fix a resource leak in xfs_alloc_buftarg()
+
+From: Haoxiang Li <lihaoxiang@isrc.iscas.ac.cn>
+
+commit 29a7b2614357393b176ef06ba5bc3ff5afc8df69 upstream.
+
+In the error path, call fs_put_dax() to drop the DAX
+device reference.
+
+Fixes: 6f643c57d57c ("xfs: implement ->notify_failure() for XFS")
+Cc: stable@vger.kernel.org
+Signed-off-by: Haoxiang Li <lihaoxiang@isrc.iscas.ac.cn>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_buf.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/xfs/xfs_buf.c
++++ b/fs/xfs/xfs_buf.c
+@@ -1831,6 +1831,7 @@ xfs_alloc_buftarg(
+       return btp;
+ 
+ error_free:
++      fs_put_dax(btp->bt_daxdev, mp);
+       kfree(btp);
+       return ERR_PTR(error);
+ }
diff --git a/queue-6.18/xfs-start-gc-on-zonegc_low_space-attribute-updates.patch b/queue-6.18/xfs-start-gc-on-zonegc_low_space-attribute-updates.patch

new file mode 100644 (file)

index 0000000..c6917ea
--- /dev/null
+++ b/queue-6.18/xfs-start-gc-on-zonegc_low_space-attribute-updates.patch
@@ -0,0 +1,104 @@
+From 181ea4e2de422aa0a66f355bd59bccccdd169826 Mon Sep 17 00:00:00 2001
+From: Hans Holmberg <hans.holmberg@wdc.com>
+Date: Wed, 25 Mar 2026 13:43:12 +0100
+Subject: xfs: start gc on zonegc_low_space attribute updates
+
+From: Hans Holmberg <hans.holmberg@wdc.com>
+
+commit 181ea4e2de422aa0a66f355bd59bccccdd169826 upstream.
+
+Start gc if the agressiveness of zone garbage collection is changed
+by the user (if the file system is not read only).
+
+Without this change, the new setting will not be taken into account
+until the gc thread is woken up by e.g. a write.
+
+Cc: stable@vger.kernel.org # v6.15
+Fixes: 845abeb1f06a8a ("xfs: add tunable threshold parameter for triggering zone GC")
+Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_sysfs.c      |    7 ++++++-
+ fs/xfs/xfs_zone_alloc.h |    4 ++++
+ fs/xfs/xfs_zone_gc.c    |   17 +++++++++++++++++
+ 3 files changed, 27 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_sysfs.c
++++ b/fs/xfs/xfs_sysfs.c
+@@ -14,6 +14,7 @@
+ #include "xfs_log_priv.h"
+ #include "xfs_mount.h"
+ #include "xfs_zones.h"
++#include "xfs_zone_alloc.h"
+ 
+ struct xfs_sysfs_attr {
+       struct attribute attr;
+@@ -724,6 +725,7 @@ zonegc_low_space_store(
+       const char              *buf,
+       size_t                  count)
+ {
++      struct xfs_mount        *mp = zoned_to_mp(kobj);
+       int                     ret;
+       unsigned int            val;
+ 
+@@ -734,7 +736,10 @@ zonegc_low_space_store(
+       if (val > 100)
+               return -EINVAL;
+ 
+-      zoned_to_mp(kobj)->m_zonegc_low_space = val;
++      if (mp->m_zonegc_low_space != val) {
++              mp->m_zonegc_low_space = val;
++              xfs_zone_gc_wakeup(mp);
++      }
+ 
+       return count;
+ }
+--- a/fs/xfs/xfs_zone_alloc.h
++++ b/fs/xfs/xfs_zone_alloc.h
+@@ -51,6 +51,7 @@ int xfs_mount_zones(struct xfs_mount *mp
+ void xfs_unmount_zones(struct xfs_mount *mp);
+ void xfs_zone_gc_start(struct xfs_mount *mp);
+ void xfs_zone_gc_stop(struct xfs_mount *mp);
++void xfs_zone_gc_wakeup(struct xfs_mount *mp);
+ #else
+ static inline int xfs_mount_zones(struct xfs_mount *mp)
+ {
+@@ -65,6 +66,9 @@ static inline void xfs_zone_gc_start(str
+ static inline void xfs_zone_gc_stop(struct xfs_mount *mp)
+ {
+ }
++static inline void xfs_zone_gc_wakeup(struct xfs_mount *mp)
++{
++}
+ #endif /* CONFIG_XFS_RT */
+ 
+ #endif /* _XFS_ZONE_ALLOC_H */
+--- a/fs/xfs/xfs_zone_gc.c
++++ b/fs/xfs/xfs_zone_gc.c
+@@ -1147,6 +1147,23 @@ xfs_zone_gc_stop(
+               kthread_park(mp->m_zone_info->zi_gc_thread);
+ }
+ 
++void
++xfs_zone_gc_wakeup(
++      struct xfs_mount        *mp)
++{
++      struct super_block      *sb = mp->m_super;
++
++      /*
++       * If we are unmounting the file system we must not try to
++       * wake gc as m_zone_info might have been freed already.
++       */
++      if (down_read_trylock(&sb->s_umount)) {
++              if (!xfs_is_readonly(mp))
++                      wake_up_process(mp->m_zone_info->zi_gc_thread);
++              up_read(&sb->s_umount);
++      }
++}
++
+ int
+ xfs_zone_gc_mount(
+       struct xfs_mount        *mp)
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 3 May 2026 12:45:19 +0000 (14:45 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 3 May 2026 12:45:19 +0000 (14:45 +0200)
queue-6.18/arm64-dts-ti-am62-verdin-enable-pullup-for-emmc-data-pins.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/crypto-qat-fix-irq-cleanup-on-6xxx-probe-failure.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/crypto-talitos-fix-sec1-32k-ahash-request-limitation.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/crypto-talitos-rename-first-last-to-first_desc-last_desc.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/firmware-google-framebuffer-do-not-unregister-platform-device.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/hwmon-pt5161l-fix-bugs-in-pt5161l_read_block_data.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-always-intercept-vmmcall-when-l2-is-active.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-always-use-nextrip-as-vmcb02-s-nextrip-after-first-l2-vmrun.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-avoid-clearing-vmcb_lbr-in-vmcb12.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-clear-eventinj-fields-in-vmcb12-on-nested-vmexit.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-delay-setting-soft-irq-rip-tracking-fields-until-vcpu-run.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-delay-stuffing-l2-s-current-rip-into-nextrip-until-vcpu-run.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-drop-the-non-architectural-consistency-check-for-np_enable.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-raise-ud-if-unhandled-vmmcall-isn-t-intercepted-by-l1.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-refactor-checking-lbrv-enablement-in-vmcb12-into-a-helper.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-refactor-writing-vmcb12-on-nested-vmexit-as-a-helper.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-triple-fault-if-mapping-vmcb12-fails-on-nested-vmexit.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-svm-add-missing-save-restore-handling-of-lbr-msrs.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-svm-switch-svm_copy_lbrs-to-a-macro.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/kvm-x86-defer-non-architectural-deliver-of-exception-payload-to-userspace-read.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/loongarch-kvm-use-csr_crmd_plv-in-kvm_arch_vcpu_in_kernel.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/mm-damon-core-use-time_in_range_open-for-damos-quota-window-start.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/mm-damon-core-validate-damos_quota_goal-nid-for-node_mem_-used-free-_bp.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/mm-damon-stat-fix-memory-leak-on-damon_start-failure-in-damon_stat_start.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/mm-mempolicy-fix-memory-leaks-in-weighted_interleave_auto_store.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/mm-vmalloc-take-vmap_purge_lock-in-shrinker.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/mmc-block-use-single-block-write-in-retry.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/pwm-imx-tpm-count-the-number-of-enabled-channels-in-probe.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/randomize_kstack-maintain-kstack_offset-per-task.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/rtc-ntxec-fix-of-node-reference-imbalance.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/series		patch \| blob \| blame \| history
queue-6.18/tpm-fix-auth-session-leak-in-tpm2_get_random-error-path.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/tpm-tpm_tis-add-error-logging-for-data-transfer.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/tpm-tpm_tis-stop-transmit-if-retries-are-exhausted.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/tpm-use-kfree_sensitive-to-free-auth-session-in-tpm_dev_release.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/tpm2-sessions-fix-missing-tpm_buf_destroy-in-tpm2_read_public.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/xfs-fix-a-resource-leak-in-xfs_alloc_buftarg.patch	[new file with mode: 0644]	patch \| blob
queue-6.18/xfs-start-gc-on-zonegc_low_space-attribute-updates.patch	[new file with mode: 0644]	patch \| blob