--- /dev/null
+From d5325810814ee995debfa0b6c4a22e0391598bef Mon Sep 17 00:00:00 2001
+From: Francesco Dolcini <francesco.dolcini@toradex.com>
+Date: Fri, 20 Mar 2026 08:30:30 +0100
+Subject: arm64: dts: ti: am62-verdin: Enable pullup for eMMC data pins
+
+From: Francesco Dolcini <francesco.dolcini@toradex.com>
+
+commit d5325810814ee995debfa0b6c4a22e0391598bef upstream.
+
+Verdin AM62 board does not have external pullups on eMMC DAT1-DAT7 pins.
+Enable internal pullups on DAT1-DAT7 considering:
+
+ - without a host-side pullup, these lines rely solely on the eMMC
+ device's internal pullup (R_int, 10kohm-150kohm per JEDEC), which may
+ exceed the recommended 50kohm max for 1.8V VCCQ
+ - JEDEC JESD84-B51 Table 200 requires host-side pullups (R_DAT,
+ 10kohm-100kohm) on all data lines to prevent bus floating
+
+Fixes: 316b80246b16 ("arm64: dts: ti: add verdin am62")
+Cc: stable@vger.kernel.org
+Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>
+Link: https://patch.msgid.link/20260320073032.10427-1-francesco@dolcini.it
+Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi | 20 ++++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+--- a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi
++++ b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi
+@@ -572,16 +572,16 @@
+ /* On-module eMMC */
+ pinctrl_sdhci0: main-mmc0-default-pins {
+ pinctrl-single,pins = <
+- AM62X_IOPAD(0x220, PIN_INPUT, 0) /* (Y3) MMC0_CMD */
+- AM62X_IOPAD(0x218, PIN_INPUT, 0) /* (AB1) MMC0_CLK */
+- AM62X_IOPAD(0x214, PIN_INPUT, 0) /* (AA2) MMC0_DAT0 */
+- AM62X_IOPAD(0x210, PIN_INPUT, 0) /* (AA1) MMC0_DAT1 */
+- AM62X_IOPAD(0x20c, PIN_INPUT, 0) /* (AA3) MMC0_DAT2 */
+- AM62X_IOPAD(0x208, PIN_INPUT, 0) /* (Y4) MMC0_DAT3 */
+- AM62X_IOPAD(0x204, PIN_INPUT, 0) /* (AB2) MMC0_DAT4 */
+- AM62X_IOPAD(0x200, PIN_INPUT, 0) /* (AC1) MMC0_DAT5 */
+- AM62X_IOPAD(0x1fc, PIN_INPUT, 0) /* (AD2) MMC0_DAT6 */
+- AM62X_IOPAD(0x1f8, PIN_INPUT, 0) /* (AC2) MMC0_DAT7 */
++ AM62X_IOPAD(0x220, PIN_INPUT, 0) /* (Y3) MMC0_CMD */
++ AM62X_IOPAD(0x218, PIN_INPUT, 0) /* (AB1) MMC0_CLK */
++ AM62X_IOPAD(0x214, PIN_INPUT, 0) /* (AA2) MMC0_DAT0 */
++ AM62X_IOPAD(0x210, PIN_INPUT_PULLUP, 0) /* (AA1) MMC0_DAT1 */
++ AM62X_IOPAD(0x20c, PIN_INPUT_PULLUP, 0) /* (AA3) MMC0_DAT2 */
++ AM62X_IOPAD(0x208, PIN_INPUT_PULLUP, 0) /* (Y4) MMC0_DAT3 */
++ AM62X_IOPAD(0x204, PIN_INPUT_PULLUP, 0) /* (AB2) MMC0_DAT4 */
++ AM62X_IOPAD(0x200, PIN_INPUT_PULLUP, 0) /* (AC1) MMC0_DAT5 */
++ AM62X_IOPAD(0x1fc, PIN_INPUT_PULLUP, 0) /* (AD2) MMC0_DAT6 */
++ AM62X_IOPAD(0x1f8, PIN_INPUT_PULLUP, 0) /* (AC2) MMC0_DAT7 */
+ >;
+ };
+
--- /dev/null
+From 95aed2af87ec43fa7624cc81dd13d37824ad4972 Mon Sep 17 00:00:00 2001
+From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+Date: Wed, 1 Apr 2026 10:31:11 +0100
+Subject: crypto: qat - fix IRQ cleanup on 6xxx probe failure
+
+From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+
+commit 95aed2af87ec43fa7624cc81dd13d37824ad4972 upstream.
+
+When adf_dev_up() partially completes and then fails, the IRQ
+handlers registered during adf_isr_resource_alloc() are not detached
+before the MSI-X vectors are released.
+
+Since the device is enabled with pcim_enable_device(), calling
+pci_alloc_irq_vectors() internally registers pcim_msi_release() as a
+devres action. On probe failure, devres runs pcim_msi_release() which
+calls pci_free_irq_vectors(), tearing down the MSI-X vectors while IRQ
+handlers (for example 'qat0-bundle0') are still attached. This causes
+remove_proc_entry() warnings:
+
+ [ 22.163964] remove_proc_entry: removing non-empty directory 'irq/143', leaking at least 'qat0-bundle0'
+
+Moving the devm_add_action_or_reset() before adf_dev_up() does not solve
+the problem since devres runs in LIFO order and pcim_msi_release(),
+registered later inside adf_dev_up(), would still fire before
+adf_device_down().
+
+Fix by calling adf_dev_down() explicitly when adf_dev_up() fails, to
+properly free IRQ handlers before devres releases the MSI-X vectors.
+
+Fixes: 17fd7514ae68 ("crypto: qat - add qat_6xxx driver")
+Cc: stable@vger.kernel.org
+Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+Reviewed-by: Ahsan Atta <ahsan.atta@intel.com>
+Reviewed-by: Laurent M Coquerel <laurent.m.coquerel@intel.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/crypto/intel/qat/qat_6xxx/adf_drv.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/crypto/intel/qat/qat_6xxx/adf_drv.c
++++ b/drivers/crypto/intel/qat/qat_6xxx/adf_drv.c
+@@ -182,8 +182,10 @@ static int adf_probe(struct pci_dev *pde
+ return ret;
+
+ ret = adf_dev_up(accel_dev, true);
+- if (ret)
++ if (ret) {
++ adf_dev_down(accel_dev);
+ return ret;
++ }
+
+ ret = devm_add_action_or_reset(dev, adf_device_down, accel_dev);
+ if (ret)
--- /dev/null
+From 655ef638a2bc3cd0a9eff99a02f83cab94a3a917 Mon Sep 17 00:00:00 2001
+From: Paul Louvel <paul.louvel@bootlin.com>
+Date: Mon, 30 Mar 2026 12:28:18 +0200
+Subject: crypto: talitos - fix SEC1 32k ahash request limitation
+
+From: Paul Louvel <paul.louvel@bootlin.com>
+
+commit 655ef638a2bc3cd0a9eff99a02f83cab94a3a917 upstream.
+
+Since commit c662b043cdca ("crypto: af_alg/hash: Support
+MSG_SPLICE_PAGES"), the crypto core may pass large scatterlists spanning
+multiple pages to drivers supporting ahash operations. As a result, a
+driver can now receive large ahash requests.
+
+The SEC1 engine has a limitation where a single descriptor cannot
+process more than 32k of data. The current implementation attempts to
+handle the entire request within a single descriptor, which leads to
+failures raised by the driver:
+
+ "length exceeds h/w max limit"
+
+Address this limitation by splitting large ahash requests into multiple
+descriptors, each respecting the 32k hardware limit. This allows
+processing arbitrarily large requests.
+
+Cc: stable@vger.kernel.org
+Fixes: c662b043cdca ("crypto: af_alg/hash: Support MSG_SPLICE_PAGES")
+Signed-off-by: Paul Louvel <paul.louvel@bootlin.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/crypto/talitos.c | 216 +++++++++++++++++++++++++++++++----------------
+ 1 file changed, 147 insertions(+), 69 deletions(-)
+
+--- a/drivers/crypto/talitos.c
++++ b/drivers/crypto/talitos.c
+@@ -12,6 +12,7 @@
+ * All rights reserved.
+ */
+
++#include <linux/workqueue.h>
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/mod_devicetable.h>
+@@ -870,10 +871,18 @@ struct talitos_ahash_req_ctx {
+ unsigned int swinit;
+ unsigned int first;
+ unsigned int last;
++ unsigned int last_request;
+ unsigned int to_hash_later;
+ unsigned int nbuf;
+ struct scatterlist bufsl[2];
+ struct scatterlist *psrc;
++
++ struct scatterlist request_bufsl[2];
++ struct ahash_request *areq;
++ struct scatterlist *request_sl;
++ unsigned int remaining_ahash_request_bytes;
++ unsigned int current_ahash_request_bytes;
++ struct work_struct sec1_ahash_process_remaining;
+ };
+
+ struct talitos_export_state {
+@@ -1759,7 +1768,20 @@ static void ahash_done(struct device *de
+
+ kfree(edesc);
+
+- ahash_request_complete(areq, err);
++ if (err) {
++ ahash_request_complete(areq, err);
++ return;
++ }
++
++ req_ctx->remaining_ahash_request_bytes -=
++ req_ctx->current_ahash_request_bytes;
++
++ if (!req_ctx->remaining_ahash_request_bytes) {
++ ahash_request_complete(areq, 0);
++ return;
++ }
++
++ schedule_work(&req_ctx->sec1_ahash_process_remaining);
+ }
+
+ /*
+@@ -1925,60 +1947,7 @@ static struct talitos_edesc *ahash_edesc
+ nbytes, 0, 0, 0, areq->base.flags, false);
+ }
+
+-static int ahash_init(struct ahash_request *areq)
+-{
+- struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+- struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
+- struct device *dev = ctx->dev;
+- struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+- unsigned int size;
+- dma_addr_t dma;
+-
+- /* Initialize the context */
+- req_ctx->buf_idx = 0;
+- req_ctx->nbuf = 0;
+- req_ctx->first = 1; /* first indicates h/w must init its context */
+- req_ctx->swinit = 0; /* assume h/w init of context */
+- size = (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE)
+- ? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256
+- : TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512;
+- req_ctx->hw_context_size = size;
+-
+- dma = dma_map_single(dev, req_ctx->hw_context, req_ctx->hw_context_size,
+- DMA_TO_DEVICE);
+- dma_unmap_single(dev, dma, req_ctx->hw_context_size, DMA_TO_DEVICE);
+-
+- return 0;
+-}
+-
+-/*
+- * on h/w without explicit sha224 support, we initialize h/w context
+- * manually with sha224 constants, and tell it to run sha256.
+- */
+-static int ahash_init_sha224_swinit(struct ahash_request *areq)
+-{
+- struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+-
+- req_ctx->hw_context[0] = SHA224_H0;
+- req_ctx->hw_context[1] = SHA224_H1;
+- req_ctx->hw_context[2] = SHA224_H2;
+- req_ctx->hw_context[3] = SHA224_H3;
+- req_ctx->hw_context[4] = SHA224_H4;
+- req_ctx->hw_context[5] = SHA224_H5;
+- req_ctx->hw_context[6] = SHA224_H6;
+- req_ctx->hw_context[7] = SHA224_H7;
+-
+- /* init 64-bit count */
+- req_ctx->hw_context[8] = 0;
+- req_ctx->hw_context[9] = 0;
+-
+- ahash_init(areq);
+- req_ctx->swinit = 1;/* prevent h/w initting context with sha256 values*/
+-
+- return 0;
+-}
+-
+-static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
++static int ahash_process_req_one(struct ahash_request *areq, unsigned int nbytes)
+ {
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+ struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
+@@ -1997,12 +1966,12 @@ static int ahash_process_req(struct ahas
+
+ if (!req_ctx->last && (nbytes + req_ctx->nbuf <= blocksize)) {
+ /* Buffer up to one whole block */
+- nents = sg_nents_for_len(areq->src, nbytes);
++ nents = sg_nents_for_len(req_ctx->request_sl, nbytes);
+ if (nents < 0) {
+ dev_err(dev, "Invalid number of src SG.\n");
+ return nents;
+ }
+- sg_copy_to_buffer(areq->src, nents,
++ sg_copy_to_buffer(req_ctx->request_sl, nents,
+ ctx_buf + req_ctx->nbuf, nbytes);
+ req_ctx->nbuf += nbytes;
+ return 0;
+@@ -2029,7 +1998,7 @@ static int ahash_process_req(struct ahas
+ sg_init_table(req_ctx->bufsl, nsg);
+ sg_set_buf(req_ctx->bufsl, ctx_buf, req_ctx->nbuf);
+ if (nsg > 1)
+- sg_chain(req_ctx->bufsl, 2, areq->src);
++ sg_chain(req_ctx->bufsl, 2, req_ctx->request_sl);
+ req_ctx->psrc = req_ctx->bufsl;
+ } else if (is_sec1 && req_ctx->nbuf && req_ctx->nbuf < blocksize) {
+ int offset;
+@@ -2038,26 +2007,26 @@ static int ahash_process_req(struct ahas
+ offset = blocksize - req_ctx->nbuf;
+ else
+ offset = nbytes_to_hash - req_ctx->nbuf;
+- nents = sg_nents_for_len(areq->src, offset);
++ nents = sg_nents_for_len(req_ctx->request_sl, offset);
+ if (nents < 0) {
+ dev_err(dev, "Invalid number of src SG.\n");
+ return nents;
+ }
+- sg_copy_to_buffer(areq->src, nents,
++ sg_copy_to_buffer(req_ctx->request_sl, nents,
+ ctx_buf + req_ctx->nbuf, offset);
+ req_ctx->nbuf += offset;
+- req_ctx->psrc = scatterwalk_ffwd(req_ctx->bufsl, areq->src,
++ req_ctx->psrc = scatterwalk_ffwd(req_ctx->bufsl, req_ctx->request_sl,
+ offset);
+ } else
+- req_ctx->psrc = areq->src;
++ req_ctx->psrc = req_ctx->request_sl;
+
+ if (to_hash_later) {
+- nents = sg_nents_for_len(areq->src, nbytes);
++ nents = sg_nents_for_len(req_ctx->request_sl, nbytes);
+ if (nents < 0) {
+ dev_err(dev, "Invalid number of src SG.\n");
+ return nents;
+ }
+- sg_pcopy_to_buffer(areq->src, nents,
++ sg_pcopy_to_buffer(req_ctx->request_sl, nents,
+ req_ctx->buf[(req_ctx->buf_idx + 1) & 1],
+ to_hash_later,
+ nbytes - to_hash_later);
+@@ -2065,7 +2034,7 @@ static int ahash_process_req(struct ahas
+ req_ctx->to_hash_later = to_hash_later;
+
+ /* Allocate extended descriptor */
+- edesc = ahash_edesc_alloc(areq, nbytes_to_hash);
++ edesc = ahash_edesc_alloc(req_ctx->areq, nbytes_to_hash);
+ if (IS_ERR(edesc))
+ return PTR_ERR(edesc);
+
+@@ -2087,14 +2056,123 @@ static int ahash_process_req(struct ahas
+ if (ctx->keylen && (req_ctx->first || req_ctx->last))
+ edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_HMAC;
+
+- return common_nonsnoop_hash(edesc, areq, nbytes_to_hash, ahash_done);
++ return common_nonsnoop_hash(edesc, req_ctx->areq, nbytes_to_hash, ahash_done);
+ }
+
+-static int ahash_update(struct ahash_request *areq)
++static void sec1_ahash_process_remaining(struct work_struct *work)
+ {
++ struct talitos_ahash_req_ctx *req_ctx =
++ container_of(work, struct talitos_ahash_req_ctx,
++ sec1_ahash_process_remaining);
++ int err = 0;
++
++ req_ctx->request_sl = scatterwalk_ffwd(req_ctx->request_bufsl,
++ req_ctx->request_sl, TALITOS1_MAX_DATA_LEN);
++
++ if (req_ctx->remaining_ahash_request_bytes > TALITOS1_MAX_DATA_LEN)
++ req_ctx->current_ahash_request_bytes = TALITOS1_MAX_DATA_LEN;
++ else {
++ req_ctx->current_ahash_request_bytes =
++ req_ctx->remaining_ahash_request_bytes;
++
++ if (req_ctx->last_request)
++ req_ctx->last = 1;
++ }
++
++ err = ahash_process_req_one(req_ctx->areq,
++ req_ctx->current_ahash_request_bytes);
++
++ if (err != -EINPROGRESS)
++ ahash_request_complete(req_ctx->areq, err);
++}
++
++static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
++{
++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
++ struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
++ struct device *dev = ctx->dev;
++ struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
++ struct talitos_private *priv = dev_get_drvdata(dev);
++ bool is_sec1 = has_ftr_sec1(priv);
++
++ req_ctx->areq = areq;
++ req_ctx->request_sl = areq->src;
++ req_ctx->remaining_ahash_request_bytes = nbytes;
++
++ if (is_sec1) {
++ if (nbytes > TALITOS1_MAX_DATA_LEN)
++ nbytes = TALITOS1_MAX_DATA_LEN;
++ else if (req_ctx->last_request)
++ req_ctx->last = 1;
++ }
++
++ req_ctx->current_ahash_request_bytes = nbytes;
++
++ return ahash_process_req_one(req_ctx->areq,
++ req_ctx->current_ahash_request_bytes);
++}
++
++static int ahash_init(struct ahash_request *areq)
++{
++ struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
++ struct talitos_ctx *ctx = crypto_ahash_ctx(tfm);
++ struct device *dev = ctx->dev;
+ struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
++ unsigned int size;
++ dma_addr_t dma;
+
++ /* Initialize the context */
++ req_ctx->buf_idx = 0;
++ req_ctx->nbuf = 0;
++ req_ctx->first = 1; /* first indicates h/w must init its context */
++ req_ctx->swinit = 0; /* assume h/w init of context */
++ size = (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE)
++ ? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256
++ : TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512;
++ req_ctx->hw_context_size = size;
++ req_ctx->last_request = 0;
+ req_ctx->last = 0;
++ INIT_WORK(&req_ctx->sec1_ahash_process_remaining, sec1_ahash_process_remaining);
++
++ dma = dma_map_single(dev, req_ctx->hw_context, req_ctx->hw_context_size,
++ DMA_TO_DEVICE);
++ dma_unmap_single(dev, dma, req_ctx->hw_context_size, DMA_TO_DEVICE);
++
++ return 0;
++}
++
++/*
++ * on h/w without explicit sha224 support, we initialize h/w context
++ * manually with sha224 constants, and tell it to run sha256.
++ */
++static int ahash_init_sha224_swinit(struct ahash_request *areq)
++{
++ struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
++
++ req_ctx->hw_context[0] = SHA224_H0;
++ req_ctx->hw_context[1] = SHA224_H1;
++ req_ctx->hw_context[2] = SHA224_H2;
++ req_ctx->hw_context[3] = SHA224_H3;
++ req_ctx->hw_context[4] = SHA224_H4;
++ req_ctx->hw_context[5] = SHA224_H5;
++ req_ctx->hw_context[6] = SHA224_H6;
++ req_ctx->hw_context[7] = SHA224_H7;
++
++ /* init 64-bit count */
++ req_ctx->hw_context[8] = 0;
++ req_ctx->hw_context[9] = 0;
++
++ ahash_init(areq);
++ req_ctx->swinit = 1;/* prevent h/w initting context with sha256 values*/
++
++ return 0;
++}
++
++static int ahash_update(struct ahash_request *areq)
++{
++ struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
++
++ req_ctx->last_request = 0;
+
+ return ahash_process_req(areq, areq->nbytes);
+ }
+@@ -2103,7 +2181,7 @@ static int ahash_final(struct ahash_requ
+ {
+ struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+- req_ctx->last = 1;
++ req_ctx->last_request = 1;
+
+ return ahash_process_req(areq, 0);
+ }
+@@ -2112,7 +2190,7 @@ static int ahash_finup(struct ahash_requ
+ {
+ struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+- req_ctx->last = 1;
++ req_ctx->last_request = 1;
+
+ return ahash_process_req(areq, areq->nbytes);
+ }
--- /dev/null
+From a1b80018b8cec27fc06a8b04a7f8b5f6cfe86eae Mon Sep 17 00:00:00 2001
+From: Paul Louvel <paul.louvel@bootlin.com>
+Date: Mon, 30 Mar 2026 12:28:19 +0200
+Subject: crypto: talitos - rename first/last to first_desc/last_desc
+
+From: Paul Louvel <paul.louvel@bootlin.com>
+
+commit a1b80018b8cec27fc06a8b04a7f8b5f6cfe86eae upstream.
+
+Previous commit introduces a new last_request variable in the context
+structure.
+
+Renaming the first/last existing member variable in the context
+structure to improve readability.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Paul Louvel <paul.louvel@bootlin.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/crypto/talitos.c | 46 +++++++++++++++++++++++-----------------------
+ 1 file changed, 23 insertions(+), 23 deletions(-)
+
+--- a/drivers/crypto/talitos.c
++++ b/drivers/crypto/talitos.c
+@@ -869,8 +869,8 @@ struct talitos_ahash_req_ctx {
+ u8 buf[2][HASH_MAX_BLOCK_SIZE];
+ int buf_idx;
+ unsigned int swinit;
+- unsigned int first;
+- unsigned int last;
++ unsigned int first_desc;
++ unsigned int last_desc;
+ unsigned int last_request;
+ unsigned int to_hash_later;
+ unsigned int nbuf;
+@@ -889,8 +889,8 @@ struct talitos_export_state {
+ u32 hw_context[TALITOS_MDEU_MAX_CONTEXT_SIZE / sizeof(u32)];
+ u8 buf[HASH_MAX_BLOCK_SIZE];
+ unsigned int swinit;
+- unsigned int first;
+- unsigned int last;
++ unsigned int first_desc;
++ unsigned int last_desc;
+ unsigned int to_hash_later;
+ unsigned int nbuf;
+ };
+@@ -1722,7 +1722,7 @@ static void common_nonsnoop_hash_unmap(s
+ if (desc->next_desc &&
+ desc->ptr[5].ptr != desc2->ptr[5].ptr)
+ unmap_single_talitos_ptr(dev, &desc2->ptr[5], DMA_FROM_DEVICE);
+- if (req_ctx->last)
++ if (req_ctx->last_desc)
+ memcpy(areq->result, req_ctx->hw_context,
+ crypto_ahash_digestsize(tfm));
+
+@@ -1759,7 +1759,7 @@ static void ahash_done(struct device *de
+ container_of(desc, struct talitos_edesc, desc);
+ struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+
+- if (!req_ctx->last && req_ctx->to_hash_later) {
++ if (!req_ctx->last_desc && req_ctx->to_hash_later) {
+ /* Position any partial block for next update/final/finup */
+ req_ctx->buf_idx = (req_ctx->buf_idx + 1) & 1;
+ req_ctx->nbuf = req_ctx->to_hash_later;
+@@ -1825,7 +1825,7 @@ static int common_nonsnoop_hash(struct t
+ /* first DWORD empty */
+
+ /* hash context in */
+- if (!req_ctx->first || req_ctx->swinit) {
++ if (!req_ctx->first_desc || req_ctx->swinit) {
+ map_single_talitos_ptr_nosync(dev, &desc->ptr[1],
+ req_ctx->hw_context_size,
+ req_ctx->hw_context,
+@@ -1833,7 +1833,7 @@ static int common_nonsnoop_hash(struct t
+ req_ctx->swinit = 0;
+ }
+ /* Indicate next op is not the first. */
+- req_ctx->first = 0;
++ req_ctx->first_desc = 0;
+
+ /* HMAC key */
+ if (ctx->keylen)
+@@ -1866,7 +1866,7 @@ static int common_nonsnoop_hash(struct t
+ /* fifth DWORD empty */
+
+ /* hash/HMAC out -or- hash context out */
+- if (req_ctx->last)
++ if (req_ctx->last_desc)
+ map_single_talitos_ptr(dev, &desc->ptr[5],
+ crypto_ahash_digestsize(tfm),
+ req_ctx->hw_context, DMA_FROM_DEVICE);
+@@ -1908,7 +1908,7 @@ static int common_nonsnoop_hash(struct t
+ if (sg_count > 1)
+ sync_needed = true;
+ copy_talitos_ptr(&desc2->ptr[5], &desc->ptr[5], is_sec1);
+- if (req_ctx->last)
++ if (req_ctx->last_desc)
+ map_single_talitos_ptr_nosync(dev, &desc->ptr[5],
+ req_ctx->hw_context_size,
+ req_ctx->hw_context,
+@@ -1964,7 +1964,7 @@ static int ahash_process_req_one(struct
+ bool is_sec1 = has_ftr_sec1(priv);
+ u8 *ctx_buf = req_ctx->buf[req_ctx->buf_idx];
+
+- if (!req_ctx->last && (nbytes + req_ctx->nbuf <= blocksize)) {
++ if (!req_ctx->last_desc && (nbytes + req_ctx->nbuf <= blocksize)) {
+ /* Buffer up to one whole block */
+ nents = sg_nents_for_len(req_ctx->request_sl, nbytes);
+ if (nents < 0) {
+@@ -1981,7 +1981,7 @@ static int ahash_process_req_one(struct
+ nbytes_to_hash = nbytes + req_ctx->nbuf;
+ to_hash_later = nbytes_to_hash & (blocksize - 1);
+
+- if (req_ctx->last)
++ if (req_ctx->last_desc)
+ to_hash_later = 0;
+ else if (to_hash_later)
+ /* There is a partial block. Hash the full block(s) now */
+@@ -2041,19 +2041,19 @@ static int ahash_process_req_one(struct
+ edesc->desc.hdr = ctx->desc_hdr_template;
+
+ /* On last one, request SEC to pad; otherwise continue */
+- if (req_ctx->last)
++ if (req_ctx->last_desc)
+ edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_PAD;
+ else
+ edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_CONT;
+
+ /* request SEC to INIT hash. */
+- if (req_ctx->first && !req_ctx->swinit)
++ if (req_ctx->first_desc && !req_ctx->swinit)
+ edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_INIT;
+
+ /* When the tfm context has a keylen, it's an HMAC.
+ * A first or last (ie. not middle) descriptor must request HMAC.
+ */
+- if (ctx->keylen && (req_ctx->first || req_ctx->last))
++ if (ctx->keylen && (req_ctx->first_desc || req_ctx->last_desc))
+ edesc->desc.hdr |= DESC_HDR_MODE0_MDEU_HMAC;
+
+ return common_nonsnoop_hash(edesc, req_ctx->areq, nbytes_to_hash, ahash_done);
+@@ -2076,7 +2076,7 @@ static void sec1_ahash_process_remaining
+ req_ctx->remaining_ahash_request_bytes;
+
+ if (req_ctx->last_request)
+- req_ctx->last = 1;
++ req_ctx->last_desc = 1;
+ }
+
+ err = ahash_process_req_one(req_ctx->areq,
+@@ -2103,7 +2103,7 @@ static int ahash_process_req(struct ahas
+ if (nbytes > TALITOS1_MAX_DATA_LEN)
+ nbytes = TALITOS1_MAX_DATA_LEN;
+ else if (req_ctx->last_request)
+- req_ctx->last = 1;
++ req_ctx->last_desc = 1;
+ }
+
+ req_ctx->current_ahash_request_bytes = nbytes;
+@@ -2124,14 +2124,14 @@ static int ahash_init(struct ahash_reque
+ /* Initialize the context */
+ req_ctx->buf_idx = 0;
+ req_ctx->nbuf = 0;
+- req_ctx->first = 1; /* first indicates h/w must init its context */
++ req_ctx->first_desc = 1; /* first_desc indicates h/w must init its context */
+ req_ctx->swinit = 0; /* assume h/w init of context */
+ size = (crypto_ahash_digestsize(tfm) <= SHA256_DIGEST_SIZE)
+ ? TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256
+ : TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512;
+ req_ctx->hw_context_size = size;
+ req_ctx->last_request = 0;
+- req_ctx->last = 0;
++ req_ctx->last_desc = 0;
+ INIT_WORK(&req_ctx->sec1_ahash_process_remaining, sec1_ahash_process_remaining);
+
+ dma = dma_map_single(dev, req_ctx->hw_context, req_ctx->hw_context_size,
+@@ -2224,8 +2224,8 @@ static int ahash_export(struct ahash_req
+ req_ctx->hw_context_size);
+ memcpy(export->buf, req_ctx->buf[req_ctx->buf_idx], req_ctx->nbuf);
+ export->swinit = req_ctx->swinit;
+- export->first = req_ctx->first;
+- export->last = req_ctx->last;
++ export->first_desc = req_ctx->first_desc;
++ export->last_desc = req_ctx->last_desc;
+ export->to_hash_later = req_ctx->to_hash_later;
+ export->nbuf = req_ctx->nbuf;
+
+@@ -2250,8 +2250,8 @@ static int ahash_import(struct ahash_req
+ memcpy(req_ctx->hw_context, export->hw_context, size);
+ memcpy(req_ctx->buf[0], export->buf, export->nbuf);
+ req_ctx->swinit = export->swinit;
+- req_ctx->first = export->first;
+- req_ctx->last = export->last;
++ req_ctx->first_desc = export->first_desc;
++ req_ctx->last_desc = export->last_desc;
+ req_ctx->to_hash_later = export->to_hash_later;
+ req_ctx->nbuf = export->nbuf;
+
--- /dev/null
+From a2be37eedb52ea26938fa4cc9de1ff84963c57ad Mon Sep 17 00:00:00 2001
+From: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
+Date: Tue, 24 Feb 2026 11:42:04 +0100
+Subject: firmware: exynos-acpm: Drop fake 'const' on handle pointer
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
+
+commit a2be37eedb52ea26938fa4cc9de1ff84963c57ad upstream.
+
+All the functions operating on the 'handle' pointer are claiming it is a
+pointer to const thus they should not modify the handle. In fact that's
+a false statement, because first thing these functions do is drop the
+cast to const with container_of:
+
+ struct acpm_info *acpm = handle_to_acpm_info(handle);
+
+And with such cast the handle is easily writable with simple:
+
+ acpm->handle.ops.pmic_ops.read_reg = NULL;
+
+The code is not correct logically, either, because functions like
+acpm_get_by_node() and acpm_handle_put() are meant to modify the handle
+reference counting, thus they must modify the handle. Modification here
+happens anyway, even if the reference counting is stored in the
+container which the handle is part of.
+
+The code does not have actual visible bug, but incorrect 'const'
+annotations could lead to incorrect compiler decisions.
+
+Fixes: a88927b534ba ("firmware: add Exynos ACPM protocol driver")
+Cc: stable@vger.kernel.org
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@oss.qualcomm.com>
+Link: https://patch.msgid.link/20260224104203.42950-2-krzysztof.kozlowski@oss.qualcomm.com
+Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/clk/samsung/clk-acpm.c | 4 -
+ drivers/firmware/samsung/exynos-acpm-dvfs.c | 4 -
+ drivers/firmware/samsung/exynos-acpm-dvfs.h | 4 -
+ drivers/firmware/samsung/exynos-acpm-pmic.c | 10 ++--
+ drivers/firmware/samsung/exynos-acpm-pmic.h | 10 ++--
+ drivers/firmware/samsung/exynos-acpm.c | 16 ++++---
+ drivers/firmware/samsung/exynos-acpm.h | 2
+ drivers/mfd/sec-acpm.c | 10 ++--
+ include/linux/firmware/samsung/exynos-acpm-protocol.h | 40 +++++++-----------
+ 9 files changed, 48 insertions(+), 52 deletions(-)
+
+--- a/drivers/clk/samsung/clk-acpm.c
++++ b/drivers/clk/samsung/clk-acpm.c
+@@ -20,7 +20,7 @@ struct acpm_clk {
+ u32 id;
+ struct clk_hw hw;
+ unsigned int mbox_chan_id;
+- const struct acpm_handle *handle;
++ struct acpm_handle *handle;
+ };
+
+ struct acpm_clk_variant {
+@@ -113,7 +113,7 @@ static int acpm_clk_register(struct devi
+
+ static int acpm_clk_probe(struct platform_device *pdev)
+ {
+- const struct acpm_handle *acpm_handle;
++ struct acpm_handle *acpm_handle;
+ struct clk_hw_onecell_data *clk_data;
+ struct clk_hw **hws;
+ struct device *dev = &pdev->dev;
+--- a/drivers/firmware/samsung/exynos-acpm-dvfs.c
++++ b/drivers/firmware/samsung/exynos-acpm-dvfs.c
+@@ -42,7 +42,7 @@ static void acpm_dvfs_init_set_rate_cmd(
+ cmd[3] = ktime_to_ms(ktime_get());
+ }
+
+-int acpm_dvfs_set_rate(const struct acpm_handle *handle,
++int acpm_dvfs_set_rate(struct acpm_handle *handle,
+ unsigned int acpm_chan_id, unsigned int clk_id,
+ unsigned long rate)
+ {
+@@ -62,7 +62,7 @@ static void acpm_dvfs_init_get_rate_cmd(
+ cmd[3] = ktime_to_ms(ktime_get());
+ }
+
+-unsigned long acpm_dvfs_get_rate(const struct acpm_handle *handle,
++unsigned long acpm_dvfs_get_rate(struct acpm_handle *handle,
+ unsigned int acpm_chan_id, unsigned int clk_id)
+ {
+ struct acpm_xfer xfer;
+--- a/drivers/firmware/samsung/exynos-acpm-dvfs.h
++++ b/drivers/firmware/samsung/exynos-acpm-dvfs.h
+@@ -11,10 +11,10 @@
+
+ struct acpm_handle;
+
+-int acpm_dvfs_set_rate(const struct acpm_handle *handle,
++int acpm_dvfs_set_rate(struct acpm_handle *handle,
+ unsigned int acpm_chan_id, unsigned int id,
+ unsigned long rate);
+-unsigned long acpm_dvfs_get_rate(const struct acpm_handle *handle,
++unsigned long acpm_dvfs_get_rate(struct acpm_handle *handle,
+ unsigned int acpm_chan_id,
+ unsigned int clk_id);
+
+--- a/drivers/firmware/samsung/exynos-acpm-pmic.c
++++ b/drivers/firmware/samsung/exynos-acpm-pmic.c
+@@ -77,7 +77,7 @@ static void acpm_pmic_init_read_cmd(u32
+ cmd[3] = ktime_to_ms(ktime_get());
+ }
+
+-int acpm_pmic_read_reg(const struct acpm_handle *handle,
++int acpm_pmic_read_reg(struct acpm_handle *handle,
+ unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+ u8 *buf)
+ {
+@@ -107,7 +107,7 @@ static void acpm_pmic_init_bulk_read_cmd
+ FIELD_PREP(ACPM_PMIC_VALUE, count);
+ }
+
+-int acpm_pmic_bulk_read(const struct acpm_handle *handle,
++int acpm_pmic_bulk_read(struct acpm_handle *handle,
+ unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+ u8 count, u8 *buf)
+ {
+@@ -150,7 +150,7 @@ static void acpm_pmic_init_write_cmd(u32
+ cmd[3] = ktime_to_ms(ktime_get());
+ }
+
+-int acpm_pmic_write_reg(const struct acpm_handle *handle,
++int acpm_pmic_write_reg(struct acpm_handle *handle,
+ unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+ u8 value)
+ {
+@@ -187,7 +187,7 @@ static void acpm_pmic_init_bulk_write_cm
+ }
+ }
+
+-int acpm_pmic_bulk_write(const struct acpm_handle *handle,
++int acpm_pmic_bulk_write(struct acpm_handle *handle,
+ unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+ u8 count, const u8 *buf)
+ {
+@@ -220,7 +220,7 @@ static void acpm_pmic_init_update_cmd(u3
+ cmd[3] = ktime_to_ms(ktime_get());
+ }
+
+-int acpm_pmic_update_reg(const struct acpm_handle *handle,
++int acpm_pmic_update_reg(struct acpm_handle *handle,
+ unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+ u8 value, u8 mask)
+ {
+--- a/drivers/firmware/samsung/exynos-acpm-pmic.h
++++ b/drivers/firmware/samsung/exynos-acpm-pmic.h
+@@ -11,19 +11,19 @@
+
+ struct acpm_handle;
+
+-int acpm_pmic_read_reg(const struct acpm_handle *handle,
++int acpm_pmic_read_reg(struct acpm_handle *handle,
+ unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+ u8 *buf);
+-int acpm_pmic_bulk_read(const struct acpm_handle *handle,
++int acpm_pmic_bulk_read(struct acpm_handle *handle,
+ unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+ u8 count, u8 *buf);
+-int acpm_pmic_write_reg(const struct acpm_handle *handle,
++int acpm_pmic_write_reg(struct acpm_handle *handle,
+ unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+ u8 value);
+-int acpm_pmic_bulk_write(const struct acpm_handle *handle,
++int acpm_pmic_bulk_write(struct acpm_handle *handle,
+ unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+ u8 count, const u8 *buf);
+-int acpm_pmic_update_reg(const struct acpm_handle *handle,
++int acpm_pmic_update_reg(struct acpm_handle *handle,
+ unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+ u8 value, u8 mask);
+ #endif /* __EXYNOS_ACPM_PMIC_H__ */
+--- a/drivers/firmware/samsung/exynos-acpm.c
++++ b/drivers/firmware/samsung/exynos-acpm.c
+@@ -412,7 +412,7 @@ static int acpm_wait_for_message_respons
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+-int acpm_do_xfer(const struct acpm_handle *handle, const struct acpm_xfer *xfer)
++int acpm_do_xfer(struct acpm_handle *handle, const struct acpm_xfer *xfer)
+ {
+ struct acpm_info *acpm = handle_to_acpm_info(handle);
+ struct exynos_mbox_msg msg;
+@@ -674,7 +674,7 @@ static int acpm_probe(struct platform_de
+ * acpm_handle_put() - release the handle acquired by acpm_get_by_phandle.
+ * @handle: Handle acquired by acpm_get_by_phandle.
+ */
+-static void acpm_handle_put(const struct acpm_handle *handle)
++static void acpm_handle_put(struct acpm_handle *handle)
+ {
+ struct acpm_info *acpm = handle_to_acpm_info(handle);
+ struct device *dev = acpm->dev;
+@@ -700,9 +700,11 @@ static void devm_acpm_release(struct dev
+ * @np: ACPM device tree node.
+ *
+ * Return: pointer to handle on success, ERR_PTR(-errno) otherwise.
++ *
++ * Note: handle CANNOT be pointer to const
+ */
+-static const struct acpm_handle *acpm_get_by_node(struct device *dev,
+- struct device_node *np)
++static struct acpm_handle *acpm_get_by_node(struct device *dev,
++ struct device_node *np)
+ {
+ struct platform_device *pdev;
+ struct device_link *link;
+@@ -743,10 +745,10 @@ static const struct acpm_handle *acpm_ge
+ *
+ * Return: pointer to handle on success, ERR_PTR(-errno) otherwise.
+ */
+-const struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
+- struct device_node *np)
++struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
++ struct device_node *np)
+ {
+- const struct acpm_handle **ptr, *handle;
++ struct acpm_handle **ptr, *handle;
+
+ ptr = devres_alloc(devm_acpm_release, sizeof(*ptr), GFP_KERNEL);
+ if (!ptr)
+--- a/drivers/firmware/samsung/exynos-acpm.h
++++ b/drivers/firmware/samsung/exynos-acpm.h
+@@ -17,7 +17,7 @@ struct acpm_xfer {
+
+ struct acpm_handle;
+
+-int acpm_do_xfer(const struct acpm_handle *handle,
++int acpm_do_xfer(struct acpm_handle *handle,
+ const struct acpm_xfer *xfer);
+
+ #endif /* __EXYNOS_ACPM_H__ */
+--- a/drivers/mfd/sec-acpm.c
++++ b/drivers/mfd/sec-acpm.c
+@@ -367,7 +367,7 @@ static const struct regmap_config s2mpg1
+ };
+
+ struct sec_pmic_acpm_shared_bus_context {
+- const struct acpm_handle *acpm;
++ struct acpm_handle *acpm;
+ unsigned int acpm_chan_id;
+ u8 speedy_channel;
+ };
+@@ -390,7 +390,7 @@ static int sec_pmic_acpm_bus_write(void
+ size_t count)
+ {
+ struct sec_pmic_acpm_bus_context *ctx = context;
+- const struct acpm_handle *acpm = ctx->shared->acpm;
++ struct acpm_handle *acpm = ctx->shared->acpm;
+ const struct acpm_pmic_ops *pmic_ops = &acpm->ops.pmic_ops;
+ size_t val_count = count - BITS_TO_BYTES(ACPM_ADDR_BITS);
+ const u8 *d = data;
+@@ -410,7 +410,7 @@ static int sec_pmic_acpm_bus_read(void *
+ void *val_buf, size_t val_size)
+ {
+ struct sec_pmic_acpm_bus_context *ctx = context;
+- const struct acpm_handle *acpm = ctx->shared->acpm;
++ struct acpm_handle *acpm = ctx->shared->acpm;
+ const struct acpm_pmic_ops *pmic_ops = &acpm->ops.pmic_ops;
+ const u8 *r = reg_buf;
+ u8 reg;
+@@ -429,7 +429,7 @@ static int sec_pmic_acpm_bus_reg_update_
+ unsigned int val)
+ {
+ struct sec_pmic_acpm_bus_context *ctx = context;
+- const struct acpm_handle *acpm = ctx->shared->acpm;
++ struct acpm_handle *acpm = ctx->shared->acpm;
+ const struct acpm_pmic_ops *pmic_ops = &acpm->ops.pmic_ops;
+
+ return pmic_ops->update_reg(acpm, ctx->shared->acpm_chan_id, ctx->type, reg & 0xff,
+@@ -480,7 +480,7 @@ static int sec_pmic_acpm_probe(struct pl
+ struct regmap *regmap_common, *regmap_pmic, *regmap;
+ const struct sec_pmic_acpm_platform_data *pdata;
+ struct sec_pmic_acpm_shared_bus_context *shared_ctx;
+- const struct acpm_handle *acpm;
++ struct acpm_handle *acpm;
+ struct device *dev = &pdev->dev;
+ int ret, irq;
+
+--- a/include/linux/firmware/samsung/exynos-acpm-protocol.h
++++ b/include/linux/firmware/samsung/exynos-acpm-protocol.h
+@@ -14,30 +14,24 @@ struct acpm_handle;
+ struct device_node;
+
+ struct acpm_dvfs_ops {
+- int (*set_rate)(const struct acpm_handle *handle,
+- unsigned int acpm_chan_id, unsigned int clk_id,
+- unsigned long rate);
+- unsigned long (*get_rate)(const struct acpm_handle *handle,
++ int (*set_rate)(struct acpm_handle *handle, unsigned int acpm_chan_id,
++ unsigned int clk_id, unsigned long rate);
++ unsigned long (*get_rate)(struct acpm_handle *handle,
+ unsigned int acpm_chan_id,
+ unsigned int clk_id);
+ };
+
+ struct acpm_pmic_ops {
+- int (*read_reg)(const struct acpm_handle *handle,
+- unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+- u8 *buf);
+- int (*bulk_read)(const struct acpm_handle *handle,
+- unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+- u8 count, u8 *buf);
+- int (*write_reg)(const struct acpm_handle *handle,
+- unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+- u8 value);
+- int (*bulk_write)(const struct acpm_handle *handle,
+- unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+- u8 count, const u8 *buf);
+- int (*update_reg)(const struct acpm_handle *handle,
+- unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan,
+- u8 value, u8 mask);
++ int (*read_reg)(struct acpm_handle *handle, unsigned int acpm_chan_id,
++ u8 type, u8 reg, u8 chan, u8 *buf);
++ int (*bulk_read)(struct acpm_handle *handle, unsigned int acpm_chan_id,
++ u8 type, u8 reg, u8 chan, u8 count, u8 *buf);
++ int (*write_reg)(struct acpm_handle *handle, unsigned int acpm_chan_id,
++ u8 type, u8 reg, u8 chan, u8 value);
++ int (*bulk_write)(struct acpm_handle *handle, unsigned int acpm_chan_id,
++ u8 type, u8 reg, u8 chan, u8 count, const u8 *buf);
++ int (*update_reg)(struct acpm_handle *handle, unsigned int acpm_chan_id,
++ u8 type, u8 reg, u8 chan, u8 value, u8 mask);
+ };
+
+ struct acpm_ops {
+@@ -56,12 +50,12 @@ struct acpm_handle {
+ struct device;
+
+ #if IS_ENABLED(CONFIG_EXYNOS_ACPM_PROTOCOL)
+-const struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
+- struct device_node *np);
++struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
++ struct device_node *np);
+ #else
+
+-static inline const struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
+- struct device_node *np)
++static inline struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
++ struct device_node *np)
+ {
+ return NULL;
+ }
--- /dev/null
+From 5cd28bd28c8ce426b56ce4230dbd17537181d5ad Mon Sep 17 00:00:00 2001
+From: Thomas Zimmermann <tzimmermann@suse.de>
+Date: Tue, 17 Feb 2026 16:56:11 +0100
+Subject: firmware: google: framebuffer: Do not unregister platform device
+
+From: Thomas Zimmermann <tzimmermann@suse.de>
+
+commit 5cd28bd28c8ce426b56ce4230dbd17537181d5ad upstream.
+
+The native driver takes over the framebuffer aperture by removing the
+system- framebuffer platform device. Afterwards the pointer in drvdata
+is dangling. Remove the entire logic around drvdata and let the kernel's
+aperture helpers handle this. The platform device depends on the native
+hardware device instead of the coreboot device anyway.
+
+When commit 851b4c14532d ("firmware: coreboot: Add coreboot framebuffer
+driver") added the coreboot framebuffer code, the kernel did not support
+device-based aperture management. Instead native driviers only removed
+the conflicting fbdev device. At that point, unregistering the framebuffer
+device most likely worked correctly. It was definitely broken after
+commit d9702b2a2171 ("fbdev/simplefb: Do not use struct
+fb_info.apertures"). So take this commit for the Fixes tag. Earlier
+releases might work depending on the native hardware driver.
+
+Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
+Fixes: d9702b2a2171 ("fbdev/simplefb: Do not use struct fb_info.apertures")
+Acked-by: Tzung-Bi Shih <tzungbi@kernel.org>
+Acked-by: Julius Werner <jwerner@chromium.org>
+Cc: Thomas Zimmermann <tzimmermann@suse.de>
+Cc: Javier Martinez Canillas <javierm@redhat.com>
+Cc: Hans de Goede <hansg@kernel.org>
+Cc: linux-fbdev@vger.kernel.org
+Cc: <stable@vger.kernel.org> # v6.3+
+Link: https://patch.msgid.link/20260217155836.96267-2-tzimmermann@suse.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/google/framebuffer-coreboot.c | 10 ----------
+ 1 file changed, 10 deletions(-)
+
+--- a/drivers/firmware/google/framebuffer-coreboot.c
++++ b/drivers/firmware/google/framebuffer-coreboot.c
+@@ -81,19 +81,10 @@ static int framebuffer_probe(struct core
+ sizeof(pdata));
+ if (IS_ERR(pdev))
+ pr_warn("coreboot: could not register framebuffer\n");
+- else
+- dev_set_drvdata(&dev->dev, pdev);
+
+ return PTR_ERR_OR_ZERO(pdev);
+ }
+
+-static void framebuffer_remove(struct coreboot_device *dev)
+-{
+- struct platform_device *pdev = dev_get_drvdata(&dev->dev);
+-
+- platform_device_unregister(pdev);
+-}
+-
+ static const struct coreboot_device_id framebuffer_ids[] = {
+ { .tag = CB_TAG_FRAMEBUFFER },
+ { /* sentinel */ }
+@@ -102,7 +93,6 @@ MODULE_DEVICE_TABLE(coreboot, framebuffe
+
+ static struct coreboot_driver framebuffer_driver = {
+ .probe = framebuffer_probe,
+- .remove = framebuffer_remove,
+ .drv = {
+ .name = "framebuffer",
+ },
--- /dev/null
+From 24c73e93d6a756e1b8626bb259d2e07c5b89b370 Mon Sep 17 00:00:00 2001
+From: Sanman Pradhan <psanman@juniper.net>
+Date: Fri, 10 Apr 2026 00:25:55 +0000
+Subject: hwmon: (pt5161l) Fix bugs in pt5161l_read_block_data()
+
+From: Sanman Pradhan <psanman@juniper.net>
+
+commit 24c73e93d6a756e1b8626bb259d2e07c5b89b370 upstream.
+
+Fix two bugs in pt5161l_read_block_data():
+
+1. Buffer overrun: The local buffer rbuf is declared as u8 rbuf[24],
+ but i2c_smbus_read_block_data() can return up to
+ I2C_SMBUS_BLOCK_MAX (32) bytes. The i2c-core copies the data into
+ the caller's buffer before the return value can be checked, so
+ the post-read length validation does not prevent a stack overrun
+ if a device returns more than 24 bytes. Resize the buffer to
+ I2C_SMBUS_BLOCK_MAX.
+
+2. Unexpected positive return on length mismatch: When all three
+ retries are exhausted because the device returns data with an
+ unexpected length, i2c_smbus_read_block_data() returns a positive
+ byte count. The function returns this directly, and callers treat
+ any non-negative return as success, processing stale or incomplete
+ buffer contents. Return -EIO when retries are exhausted with a
+ positive return value, preserving the negative error code on I2C
+ failure.
+
+Fixes: 1b2ca93cd0592 ("hwmon: Add driver for Astera Labs PT5161L retimer")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sanman Pradhan <psanman@juniper.net>
+Link: https://lore.kernel.org/r/20260410002549.424162-1-sanman.pradhan@hpe.com
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/hwmon/pt5161l.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/hwmon/pt5161l.c
++++ b/drivers/hwmon/pt5161l.c
+@@ -121,7 +121,7 @@ static int pt5161l_read_block_data(struc
+ int ret, tries;
+ u8 remain_len = len;
+ u8 curr_len;
+- u8 wbuf[16], rbuf[24];
++ u8 wbuf[16], rbuf[I2C_SMBUS_BLOCK_MAX];
+ u8 cmd = 0x08; /* [7]:pec_en, [4:2]:func, [1]:start, [0]:end */
+ u8 config = 0x00; /* [6]:cfg_type, [4:1]:burst_len, [0]:address bit16 */
+
+@@ -151,7 +151,7 @@ static int pt5161l_read_block_data(struc
+ break;
+ }
+ if (tries >= 3)
+- return ret;
++ return ret < 0 ? ret : -EIO;
+
+ memcpy(val, rbuf, curr_len);
+ val += curr_len;
--- /dev/null
+From d70d4323dd9636e35696639f6b4c2b2735291516 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Wed, 1 Apr 2026 11:36:00 +0100
+Subject: KVM: arm64: Account for RESx bits in __compute_fgt()
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit d70d4323dd9636e35696639f6b4c2b2735291516 upstream.
+
+When computing Fine Grained Traps, it is preferable to account for
+the reserved bits. The HW will most probably ignore them, unless the
+bits have been repurposed to do something else.
+
+Use caution, and fold our view of the reserved bits in,
+
+Reviewed-by: Sascha Bischoff <sascha.bischoff@arm.com>
+Fixes: c259d763e6b09 ("KVM: arm64: Account for RES1 bits in DECLARE_FEAT_MAP() and co")
+Link: https://sashiko.dev/#/patchset/20260319154937.3619520-1-sascha.bischoff%40arm.com
+Cc: stable@vger.kernel.org
+Link: https://patch.msgid.link/20260401103611.357092-6-maz@kernel.org
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/config.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/arm64/kvm/config.c
++++ b/arch/arm64/kvm/config.c
+@@ -1585,8 +1585,8 @@ static __always_inline void __compute_fg
+ clear |= ~nested & m->nmask;
+ }
+
+- val |= set;
+- val &= ~clear;
++ val |= set | m->res1;
++ val &= ~(clear | m->res0);
+ *vcpu_fgt(vcpu, reg) = val;
+ }
+
--- /dev/null
+From 96bd3e76a171a8e21a6387e54e4c420a81968492 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:10 +0000
+Subject: KVM: nSVM: Add missing consistency check for EFER, CR0, CR4, and CS
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 96bd3e76a171a8e21a6387e54e4c420a81968492 upstream.
+
+According to the APM Volume #2, 15.5, Canonicalization and Consistency
+Checks (24593—Rev. 3.42—March 2024), the following condition (among
+others) results in a #VMEXIT with VMEXIT_INVALID (aka SVM_EXIT_ERR):
+
+ EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
+
+In the list of consistency checks done when EFER.LME and CR0.PG are set,
+add a check that CS.L and CS.D are not both set, after the existing
+check that CR4.PAE is set.
+
+This is functionally a nop because the nested VMRUN results in
+SVM_EXIT_ERR in HW, which is forwarded to L1, but KVM makes all
+consistency checks before a VMRUN is actually attempted.
+
+Fixes: 3d6368ef580a ("KVM: SVM: Add VMRUN handler")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-17-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 6 ++++++
+ arch/x86/kvm/svm/svm.h | 1 +
+ 2 files changed, 7 insertions(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -392,6 +392,10 @@ static bool __nested_vmcb_check_save(str
+ CC(!(save->cr0 & X86_CR0_PE)) ||
+ CC(!kvm_vcpu_is_legal_cr3(vcpu, save->cr3)))
+ return false;
++
++ if (CC((save->cs.attrib & SVM_SELECTOR_L_MASK) &&
++ (save->cs.attrib & SVM_SELECTOR_DB_MASK)))
++ return false;
+ }
+
+ /* Note, SVM doesn't have any additional restrictions on CR4. */
+@@ -508,6 +512,8 @@ static void __nested_copy_vmcb_save_to_c
+ * Copy only fields that are validated, as we need them
+ * to avoid TOC/TOU races.
+ */
++ to->cs = from->cs;
++
+ to->efer = from->efer;
+ to->cr0 = from->cr0;
+ to->cr3 = from->cr3;
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -140,6 +140,7 @@ struct kvm_vmcb_info {
+ };
+
+ struct vmcb_save_area_cached {
++ struct vmcb_seg cs;
+ u64 efer;
+ u64 cr4;
+ u64 cr3;
--- /dev/null
+From b71138fcc362c67ebe66747bb22cb4e6b4d6a651 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:09 +0000
+Subject: KVM: nSVM: Add missing consistency check for nCR3 validity
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit b71138fcc362c67ebe66747bb22cb4e6b4d6a651 upstream.
+
+From the APM Volume #2, 15.25.4 (24593—Rev. 3.42—March 2024):
+
+ When VMRUN is executed with nested paging enabled (NP_ENABLE = 1), the
+ following conditions are considered illegal state combinations, in
+ addition to those mentioned in “Canonicalization and Consistency Checks”:
+ • Any MBZ bit of nCR3 is set.
+ • Any G_PAT.PA field has an unsupported type encoding or any
+ reserved field in G_PAT has a nonzero value.
+
+Add the consistency check for nCR3 being a legal GPA with no MBZ bits
+set. Note, the G_PAT.PA check is being handled separately[*].
+
+Link: https://lore.kernel.org/kvm/20260205214326.1029278-3-jmattson@google.com [*]
+Fixes: 4b16184c1cca ("KVM: SVM: Initialize Nested Nested MMU context on VMRUN")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-16-yosry@kernel.org
+[sean: capture everything in CC(), massage changelog formatting]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -350,6 +350,10 @@ static bool __nested_vmcb_check_controls
+ if (CC(control->asid == 0))
+ return false;
+
++ if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
++ !kvm_vcpu_is_legal_gpa(vcpu, control->nested_cr3)))
++ return false;
++
+ if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa,
+ MSRPM_SIZE)))
+ return false;
--- /dev/null
+From 01ddcdc55e097ca38c28ae656711b8e6d1df71f8 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:33:59 +0000
+Subject: KVM: nSVM: Always inject a #GP if mapping VMCB12 fails on nested VMRUN
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 01ddcdc55e097ca38c28ae656711b8e6d1df71f8 upstream.
+
+nested_svm_vmrun() currently only injects a #GP if kvm_vcpu_map() fails
+with -EINVAL. But it could also fail with -EFAULT if creating a host
+mapping failed. Inject a #GP in all cases, no reason to treat failure
+modes differently.
+
+Fixes: 8c5fbf1a7231 ("KVM/nSVM: Use the new mapping API for mapping guest memory")
+CC: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-6-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1021,12 +1021,9 @@ int nested_svm_vmrun(struct kvm_vcpu *vc
+ }
+
+ vmcb12_gpa = svm->vmcb->save.rax;
+- ret = kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map);
+- if (ret == -EINVAL) {
++ if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map)) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+- } else if (ret) {
+- return kvm_skip_emulated_instruction(vcpu);
+ }
+
+ ret = kvm_skip_emulated_instruction(vcpu);
--- /dev/null
+From 33d3617a52f9930d22b2af59f813c2fbdefa6dd5 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 3 Mar 2026 16:22:23 -0800
+Subject: KVM: nSVM: Always intercept VMMCALL when L2 is active
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 33d3617a52f9930d22b2af59f813c2fbdefa6dd5 upstream.
+
+Always intercept VMMCALL now that KVM properly synthesizes a #UD as
+appropriate, i.e. when L1 doesn't want to intercept VMMCALL, to avoid
+putting L2 into an infinite #UD loop if KVM_X86_QUIRK_FIX_HYPERCALL_INSN
+is enabled.
+
+By letting L2 execute VMMCALL natively and thus #UD, for all intents and
+purposes KVM morphs the VMMCALL intercept into a #UD intercept (KVM always
+intercepts #UD). When the hypercall quirk is enabled, KVM "emulates"
+VMMCALL in response to the #UD by trying to fixup the opcode to the "right"
+vendor, then restarts the guest, without skipping the VMMCALL. As a
+result, the guest sees an endless stream of #UDs since it's already
+executing the correct vendor hypercall instruction, i.e. the emulator
+doesn't anticipate that the #UD could be due to lack of interception, as
+opposed to a truly undefined opcode.
+
+Fixes: 0d945bd93511 ("KVM: SVM: Don't allow nested guest to VMMCALL into host")
+Cc: stable@vger.kernel.org
+Reviewed-by: Yosry Ahmed <yosry@kernel.org>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Link: https://patch.msgid.link/20260304002223.1105129-3-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/hyperv.h | 4 ----
+ arch/x86/kvm/svm/nested.c | 7 -------
+ 2 files changed, 11 deletions(-)
+
+--- a/arch/x86/kvm/svm/hyperv.h
++++ b/arch/x86/kvm/svm/hyperv.h
+@@ -51,10 +51,6 @@ static inline bool nested_svm_is_l2_tlb_
+ void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu);
+ #else /* CONFIG_KVM_HYPERV */
+ static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu) {}
+-static inline bool nested_svm_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu)
+-{
+- return false;
+-}
+ static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu)
+ {
+ return false;
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -158,13 +158,6 @@ void recalc_intercepts(struct vcpu_svm *
+ vmcb_clr_intercept(c, INTERCEPT_VINTR);
+ }
+
+- /*
+- * We want to see VMMCALLs from a nested guest only when Hyper-V L2 TLB
+- * flush feature is enabled.
+- */
+- if (!nested_svm_l2_tlb_flush_enabled(&svm->vcpu))
+- vmcb_clr_intercept(c, INTERCEPT_VMMCALL);
+-
+ for (i = 0; i < MAX_INTERCEPT; i++)
+ c->intercepts[i] |= g->intercepts[i];
+
--- /dev/null
+From 8d397582f6b5e9fbcf09781c7c934b4910e94a50 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Wed, 25 Feb 2026 00:59:47 +0000
+Subject: KVM: nSVM: Always use NextRIP as vmcb02's NextRIP after first L2 VMRUN
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 8d397582f6b5e9fbcf09781c7c934b4910e94a50 upstream.
+
+For guests with NRIPS disabled, L1 does not provide NextRIP when running
+an L2 with an injected soft interrupt, instead it advances the current RIP
+before running it. KVM uses the current RIP as the NextRIP in vmcb02 to
+emulate a CPU without NRIPS.
+
+However, after L2 runs the first time, NextRIP will be updated by the CPU
+and/or KVM, and the current RIP is no longer the correct value to use in
+vmcb02. Hence, after save/restore, use the current RIP if and only if a
+nested run is pending, otherwise use NextRIP. Give soft_int_next_rip the
+same treatment, as it's the same logic, just for a narrower use case.
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260225005950.3739782-6-yosry@kernel.org
+[sean: give soft_int_next_rip the same treatment]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 28 ++++++++++++++++++----------
+ 1 file changed, 18 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -856,24 +856,32 @@ static void nested_vmcb02_prepare_contro
+ vmcb02->control.event_inj_err = svm->nested.ctl.event_inj_err;
+
+ /*
+- * next_rip is consumed on VMRUN as the return address pushed on the
++ * NextRIP is consumed on VMRUN as the return address pushed on the
+ * stack for injected soft exceptions/interrupts. If nrips is exposed
+- * to L1, take it verbatim from vmcb12. If nrips is supported in
+- * hardware but not exposed to L1, stuff the actual L2 RIP to emulate
+- * what a nrips=0 CPU would do (L1 is responsible for advancing RIP
+- * prior to injecting the event).
++ * to L1, take it verbatim from vmcb12.
++ *
++ * If nrips is supported in hardware but not exposed to L1, stuff the
++ * actual L2 RIP to emulate what a nrips=0 CPU would do (L1 is
++ * responsible for advancing RIP prior to injecting the event). This is
++ * only the case for the first L2 run after VMRUN. After that (e.g.
++ * during save/restore), NextRIP is updated by the CPU and/or KVM, and
++ * the value of the L2 RIP from vmcb12 should not be used.
+ */
+- if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
+- vmcb02->control.next_rip = svm->nested.ctl.next_rip;
+- else if (boot_cpu_has(X86_FEATURE_NRIPS))
+- vmcb02->control.next_rip = vmcb12_rip;
++ if (boot_cpu_has(X86_FEATURE_NRIPS)) {
++ if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) ||
++ !svm->nested.nested_run_pending)
++ vmcb02->control.next_rip = svm->nested.ctl.next_rip;
++ else
++ vmcb02->control.next_rip = vmcb12_rip;
++ }
+
+ svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj);
+ if (is_evtinj_soft(vmcb02->control.event_inj)) {
+ svm->soft_int_injected = true;
+ svm->soft_int_csbase = vmcb12_csbase;
+ svm->soft_int_old_rip = vmcb12_rip;
+- if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
++ if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) ||
++ !svm->nested.nested_run_pending)
+ svm->soft_int_next_rip = svm->nested.ctl.next_rip;
+ else
+ svm->soft_int_next_rip = vmcb12_rip;
--- /dev/null
+From b53ab5167a81537777ac780bbd93d32613aa3bda Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:33:55 +0000
+Subject: KVM: nSVM: Avoid clearing VMCB_LBR in vmcb12
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit b53ab5167a81537777ac780bbd93d32613aa3bda upstream.
+
+svm_copy_lbrs() always marks VMCB_LBR dirty in the destination VMCB.
+However, nested_svm_vmexit() uses it to copy LBRs to vmcb12, and
+clearing clean bits in vmcb12 is not architecturally defined.
+
+Move vmcb_mark_dirty() to callers and drop it for vmcb12.
+
+This also facilitates incoming refactoring that does not pass the entire
+VMCB to svm_copy_lbrs().
+
+Fixes: d20c796ca370 ("KVM: x86: nSVM: implement nested LBR virtualization")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-2-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 7 +++++--
+ arch/x86/kvm/svm/svm.c | 2 --
+ 2 files changed, 5 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -726,6 +726,7 @@ static void nested_vmcb02_prepare_save(s
+ } else {
+ svm_copy_lbrs(vmcb02, vmcb01);
+ }
++ vmcb_mark_dirty(vmcb02, VMCB_LBR);
+ svm_update_lbrv(&svm->vcpu);
+ }
+
+@@ -1242,10 +1243,12 @@ int nested_svm_vmexit(struct vcpu_svm *s
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+
+ if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
+- (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)))
++ (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+ svm_copy_lbrs(vmcb12, vmcb02);
+- else
++ } else {
+ svm_copy_lbrs(vmcb01, vmcb02);
++ vmcb_mark_dirty(vmcb01, VMCB_LBR);
++ }
+
+ svm_update_lbrv(vcpu);
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -848,8 +848,6 @@ void svm_copy_lbrs(struct vmcb *to_vmcb,
+ to_vmcb->save.br_to = from_vmcb->save.br_to;
+ to_vmcb->save.last_excp_from = from_vmcb->save.last_excp_from;
+ to_vmcb->save.last_excp_to = from_vmcb->save.last_excp_to;
+-
+- vmcb_mark_dirty(to_vmcb, VMCB_LBR);
+ }
+
+ static void __svm_enable_lbrv(struct kvm_vcpu *vcpu)
--- /dev/null
+From 69b721a86d0dcb026f6db7d111dcde7550442d2e Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:05 +0000
+Subject: KVM: nSVM: Clear EVENTINJ fields in vmcb12 on nested #VMEXIT
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 69b721a86d0dcb026f6db7d111dcde7550442d2e upstream.
+
+According to the APM, from the reference of the VMRUN instruction:
+
+ Upon #VMEXIT, the processor performs the following actions in order to
+ return to the host execution context:
+
+ ...
+
+ clear EVENTINJ field in VMCB
+
+KVM already syncs EVENTINJ fields from vmcb02 to cached vmcb12 on every
+L2->L0 #VMEXIT. Since these fields are zeroed by the CPU on #VMEXIT, they
+will mostly be zeroed in vmcb12 on nested #VMEXIT by nested_svm_vmexit().
+
+However, this is not the case when:
+
+ 1. Consistency checks fail, as nested_svm_vmexit() is not called.
+ 2. Entering guest mode fails before L2 runs (e.g. due to failed load of
+ CR3).
+
+(2) was broken by commit 2d8a42be0e2b ("KVM: nSVM: synchronize VMCB
+controls updated by the processor on every vmexit"), as prior to that
+nested_svm_vmexit() always zeroed EVENTINJ fields.
+
+Explicitly clear the fields in all nested #VMEXIT code paths.
+
+Fixes: 3d6368ef580a ("KVM: SVM: Add VMRUN handler")
+Fixes: 2d8a42be0e2b ("KVM: nSVM: synchronize VMCB controls updated by the processor on every vmexit")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-12-yosry@kernel.org
+[sean: massage changelog formatting]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1045,6 +1045,8 @@ int nested_svm_vmrun(struct kvm_vcpu *vc
+ vmcb12->control.exit_code = SVM_EXIT_ERR;
+ vmcb12->control.exit_info_1 = 0;
+ vmcb12->control.exit_info_2 = 0;
++ vmcb12->control.event_inj = 0;
++ vmcb12->control.event_inj_err = 0;
+ svm_set_gif(svm, false);
+ goto out;
+ }
+@@ -1188,9 +1190,9 @@ static int nested_svm_vmexit_update_vmcb
+ if (nested_vmcb12_has_lbrv(vcpu))
+ svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
+
++ vmcb12->control.event_inj = 0;
++ vmcb12->control.event_inj_err = 0;
+ vmcb12->control.int_ctl = svm->nested.ctl.int_ctl;
+- vmcb12->control.event_inj = svm->nested.ctl.event_inj;
+- vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err;
+
+ trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code,
+ vmcb12->control.exit_info_1,
--- /dev/null
+From f85a6ce06e4a0d49652f57967a649ab09e06287c Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:04 +0000
+Subject: KVM: nSVM: Clear GIF on nested #VMEXIT(INVALID)
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit f85a6ce06e4a0d49652f57967a649ab09e06287c upstream.
+
+According to the APM, GIF is set to 0 on any #VMEXIT, including
+an #VMEXIT(INVALID) due to failed consistency checks. Clear GIF on
+consistency check failures.
+
+Fixes: 3d6368ef580a ("KVM: SVM: Add VMRUN handler")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-11-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1045,6 +1045,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vc
+ vmcb12->control.exit_code = SVM_EXIT_ERR;
+ vmcb12->control.exit_info_1 = 0;
+ vmcb12->control.exit_info_2 = 0;
++ svm_set_gif(svm, false);
+ goto out;
+ }
+
--- /dev/null
+From 8998e1d012f3f45d0456f16706682cef04c3c436 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:06 +0000
+Subject: KVM: nSVM: Clear tracking of L1->L2 NMI and soft IRQ on nested #VMEXIT
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 8998e1d012f3f45d0456f16706682cef04c3c436 upstream.
+
+KVM clears tracking of L1->L2 injected NMIs (i.e. nmi_l1_to_l2) and soft
+IRQs (i.e. soft_int_injected) on a synthesized #VMEXIT(INVALID) due to
+failed VMRUN. However, they are not explicitly cleared in other
+synthesized #VMEXITs.
+
+soft_int_injected is always cleared after the first VMRUN of L2 when
+completing interrupts, as any re-injection is then tracked by KVM
+(instead of purely in vmcb02).
+
+nmi_l1_to_l2 is not cleared after the first VMRUN if NMI injection
+failed, as KVM still needs to keep track that the NMI originated from L1
+to avoid blocking NMIs for L1. It is only cleared when the NMI injection
+succeeds.
+
+KVM could synthesize a #VMEXIT to L1 before successfully injecting the
+NMI into L2 (e.g. due to a #NPF on L2's NMI handler in L1's NPTs). In
+this case, nmi_l1_to_l2 will remain true, and KVM may not correctly mask
+NMIs and intercept IRET when injecting an NMI into L1.
+
+Clear both nmi_l1_to_l2 and soft_int_injected in nested_svm_vmexit(), i.e.
+for all #VMEXITs except those that occur due to failed consistency checks,
+as those happen before nmi_l1_to_l2 or soft_int_injected are set.
+
+Fixes: 159fc6fa3b7d ("KVM: nSVM: Transparently handle L1 -> L2 NMI re-injection")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-13-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1074,8 +1074,6 @@ int nested_svm_vmrun(struct kvm_vcpu *vc
+
+ out_exit_err:
+ svm->nested.nested_run_pending = 0;
+- svm->nmi_l1_to_l2 = false;
+- svm->soft_int_injected = false;
+
+ svm->vmcb->control.exit_code = SVM_EXIT_ERR;
+ svm->vmcb->control.exit_info_1 = 0;
+@@ -1331,6 +1329,10 @@ void nested_svm_vmexit(struct vcpu_svm *
+ if (nested_svm_load_cr3(vcpu, vmcb01->save.cr3, false, true))
+ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+
++ /* Drop tracking for L1->L2 injected NMIs and soft IRQs */
++ svm->nmi_l1_to_l2 = false;
++ svm->soft_int_injected = false;
++
+ /*
+ * Drop what we picked up for L2 via svm_complete_interrupts() so it
+ * doesn't end up in L1.
--- /dev/null
+From c64bc6ed1764c1b7e3c0017019f743196074092f Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 4 Mar 2026 16:06:56 -0800
+Subject: KVM: nSVM: Delay setting soft IRQ RIP tracking fields until vCPU run
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit c64bc6ed1764c1b7e3c0017019f743196074092f upstream.
+
+In the save+restore path, when restoring nested state, the values of RIP
+and CS base passed into nested_vmcb02_prepare_control() are mostly
+incorrect. They are both pulled from the vmcb02. For CS base, the value
+is only correct if system regs are restored before nested state. The
+value of RIP is whatever the vCPU had in vmcb02 before restoring nested
+state (zero on a freshly created vCPU).
+
+Instead, take a similar approach to NextRIP, and delay initializing the
+RIP tracking fields until shortly before the vCPU is run, to make sure
+the most up-to-date values of RIP and CS base are used regardless of
+KVM_SET_SREGS, KVM_SET_REGS, and KVM_SET_NESTED_STATE's relative
+ordering.
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260225005950.3739782-8-yosry@kernel.org
+[sean: deal with the svm_cancel_injection() madness]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 17 ++++++++---------
+ arch/x86/kvm/svm/svm.c | 29 +++++++++++++++++++++++++++++
+ 2 files changed, 37 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -754,9 +754,7 @@ static bool is_evtinj_nmi(u32 evtinj)
+ return type == SVM_EVTINJ_TYPE_NMI;
+ }
+
+-static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
+- unsigned long vmcb12_rip,
+- unsigned long vmcb12_csbase)
++static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
+ {
+ u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK;
+ u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
+@@ -868,15 +866,16 @@ static void nested_vmcb02_prepare_contro
+ vmcb02->control.next_rip = svm->nested.ctl.next_rip;
+
+ svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj);
++
++ /*
++ * soft_int_csbase, soft_int_old_rip, and soft_int_next_rip (if L1
++ * doesn't have NRIPS) are initialized later, before the vCPU is run.
++ */
+ if (is_evtinj_soft(vmcb02->control.event_inj)) {
+ svm->soft_int_injected = true;
+- svm->soft_int_csbase = vmcb12_csbase;
+- svm->soft_int_old_rip = vmcb12_rip;
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) ||
+ !svm->nested.nested_run_pending)
+ svm->soft_int_next_rip = svm->nested.ctl.next_rip;
+- else
+- svm->soft_int_next_rip = vmcb12_rip;
+ }
+
+ /* LBR_CTL_ENABLE_MASK is controlled by svm_update_lbrv() */
+@@ -974,7 +973,7 @@ int enter_svm_guest_mode(struct kvm_vcpu
+ nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
+
+ svm_switch_vmcb(svm, &svm->nested.vmcb02);
+- nested_vmcb02_prepare_control(svm, vmcb12->save.rip, vmcb12->save.cs.base);
++ nested_vmcb02_prepare_control(svm);
+ nested_vmcb02_prepare_save(svm, vmcb12);
+
+ ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
+@@ -1920,7 +1919,7 @@ static int svm_set_nested_state(struct k
+ nested_copy_vmcb_control_to_cache(svm, ctl);
+
+ svm_switch_vmcb(svm, &svm->nested.vmcb02);
+- nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base);
++ nested_vmcb02_prepare_control(svm);
+
+ /*
+ * Any previously restored state (e.g. KVM_SET_SREGS) would mark fields
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -3639,6 +3639,16 @@ static int svm_handle_exit(struct kvm_vc
+ return svm_invoke_exit_handler(vcpu, svm->vmcb->control.exit_code);
+ }
+
++static void svm_set_nested_run_soft_int_state(struct kvm_vcpu *vcpu)
++{
++ struct vcpu_svm *svm = to_svm(vcpu);
++
++ svm->soft_int_csbase = svm->vmcb->save.cs.base;
++ svm->soft_int_old_rip = kvm_rip_read(vcpu);
++ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
++ svm->soft_int_next_rip = kvm_rip_read(vcpu);
++}
++
+ static int pre_svm_run(struct kvm_vcpu *vcpu)
+ {
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
+@@ -3761,6 +3771,13 @@ static void svm_fixup_nested_rips(struct
+ if (boot_cpu_has(X86_FEATURE_NRIPS) &&
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
+ svm->vmcb->control.next_rip = kvm_rip_read(vcpu);
++
++ /*
++ * Simiarly, initialize the soft int metadata here to use the most
++ * up-to-date values of RIP and CS base, regardless of restore order.
++ */
++ if (svm->soft_int_injected)
++ svm_set_nested_run_soft_int_state(vcpu);
+ }
+
+ void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
+@@ -4131,6 +4148,18 @@ static void svm_complete_soft_interrupt(
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ /*
++ * Initialize the soft int fields *before* reading them below if KVM
++ * aborted entry to the guest with a nested VMRUN pending. To ensure
++ * KVM uses up-to-date values for RIP and CS base across save/restore,
++ * regardless of restore order, KVM waits to set the soft int fields
++ * until VMRUN is imminent. But when canceling injection, KVM requeues
++ * the soft int and will reinject it via the standard injection flow,
++ * and so KVM needs to grab the state from the pending nested VMRUN.
++ */
++ if (is_guest_mode(vcpu) && svm->nested.nested_run_pending)
++ svm_set_nested_run_soft_int_state(vcpu);
++
++ /*
+ * If NRIPS is enabled, KVM must snapshot the pre-VMRUN next_rip that's
+ * associated with the original soft exception/interrupt. next_rip is
+ * cleared on all exits that can occur while vectoring an event, so KVM
--- /dev/null
+From a0592461f39c00b28f552fe842a063a00043eaa8 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Wed, 25 Feb 2026 00:59:48 +0000
+Subject: KVM: nSVM: Delay stuffing L2's current RIP into NextRIP until vCPU run
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit a0592461f39c00b28f552fe842a063a00043eaa8 upstream.
+
+For guests with NRIPS disabled, L1 does not provide NextRIP when running
+an L2 with an injected soft interrupt, instead it advances L2's RIP
+before running it. KVM uses L2's current RIP as the NextRIP in vmcb02 to
+emulate a CPU without NRIPS.
+
+However, in svm_set_nested_state(), the value used for L2's current RIP
+comes from vmcb02, which is just whatever the vCPU had in vmcb02 before
+restoring nested state (zero on a freshly created vCPU). Passing the
+cached RIP value instead (i.e. kvm_rip_read()) would only fix the issue
+if registers are restored before nested state.
+
+Instead, split the logic of setting NextRIP in vmcb02. Handle the
+'normal' case of initializing vmcb02's NextRIP using NextRIP from vmcb12
+(or KVM_GET_NESTED_STATE's payload) in nested_vmcb02_prepare_control().
+Delay the special case of stuffing L2's current RIP into vmcb02's
+NextRIP until shortly before the vCPU is run, to make sure the most
+up-to-date value of RIP is used regardless of KVM_SET_REGS and
+KVM_SET_NESTED_STATE's relative ordering.
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260225005950.3739782-7-yosry@kernel.org
+[sean: use new helper, svm_fixup_nested_rips()]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 25 ++++++++-----------------
+ arch/x86/kvm/svm/svm.c | 25 +++++++++++++++++++++++++
+ 2 files changed, 33 insertions(+), 17 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -856,24 +856,15 @@ static void nested_vmcb02_prepare_contro
+ vmcb02->control.event_inj_err = svm->nested.ctl.event_inj_err;
+
+ /*
+- * NextRIP is consumed on VMRUN as the return address pushed on the
+- * stack for injected soft exceptions/interrupts. If nrips is exposed
+- * to L1, take it verbatim from vmcb12.
+- *
+- * If nrips is supported in hardware but not exposed to L1, stuff the
+- * actual L2 RIP to emulate what a nrips=0 CPU would do (L1 is
+- * responsible for advancing RIP prior to injecting the event). This is
+- * only the case for the first L2 run after VMRUN. After that (e.g.
+- * during save/restore), NextRIP is updated by the CPU and/or KVM, and
+- * the value of the L2 RIP from vmcb12 should not be used.
++ * If nrips is exposed to L1, take NextRIP as-is. Otherwise, L1
++ * advances L2's RIP before VMRUN instead of using NextRIP. KVM will
++ * stuff the current RIP as vmcb02's NextRIP before L2 is run. After
++ * the first run of L2 (e.g. after save+restore), NextRIP is updated by
++ * the CPU and/or KVM and should be used regardless of L1's support.
+ */
+- if (boot_cpu_has(X86_FEATURE_NRIPS)) {
+- if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) ||
+- !svm->nested.nested_run_pending)
+- vmcb02->control.next_rip = svm->nested.ctl.next_rip;
+- else
+- vmcb02->control.next_rip = vmcb12_rip;
+- }
++ if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) ||
++ !svm->nested.nested_run_pending)
++ vmcb02->control.next_rip = svm->nested.ctl.next_rip;
+
+ svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj);
+ if (is_evtinj_soft(vmcb02->control.event_inj)) {
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -3742,6 +3742,29 @@ static void svm_inject_irq(struct kvm_vc
+ svm->vmcb->control.event_inj = intr->nr | SVM_EVTINJ_VALID | type;
+ }
+
++static void svm_fixup_nested_rips(struct kvm_vcpu *vcpu)
++{
++ struct vcpu_svm *svm = to_svm(vcpu);
++
++ if (!is_guest_mode(vcpu) || !svm->nested.nested_run_pending)
++ return;
++
++ /*
++ * If nrips is supported in hardware but not exposed to L1, stuff the
++ * actual L2 RIP to emulate what a nrips=0 CPU would do (L1 is
++ * responsible for advancing RIP prior to injecting the event). Once L2
++ * runs after L1 executes VMRUN, NextRIP is updated by the CPU and/or
++ * KVM, and this is no longer needed.
++ *
++ * This is done here (as opposed to when preparing vmcb02) to use the
++ * most up-to-date value of RIP regardless of the order of restoring
++ * registers and nested state in the vCPU save+restore path.
++ */
++ if (boot_cpu_has(X86_FEATURE_NRIPS) &&
++ !guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
++ svm->vmcb->control.next_rip = kvm_rip_read(vcpu);
++}
++
+ void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
+ int trig_mode, int vector)
+ {
+@@ -4338,6 +4361,8 @@ static __no_kcsan fastpath_t svm_vcpu_ru
+ kvm_register_is_dirty(vcpu, VCPU_EXREG_ERAPS))
+ svm->vmcb->control.erap_ctl |= ERAP_CONTROL_CLEAR_RAP;
+
++ svm_fixup_nested_rips(vcpu);
++
+ svm_hv_update_vp_id(svm->vmcb, vcpu);
+
+ /*
--- /dev/null
+From e0b6f031d64c086edd563e7af9c0c0a2261dd2a4 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:08 +0000
+Subject: KVM: nSVM: Drop the non-architectural consistency check for NP_ENABLE
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit e0b6f031d64c086edd563e7af9c0c0a2261dd2a4 upstream.
+
+KVM currenty fails a nested VMRUN and injects VMEXIT_INVALID (aka
+SVM_EXIT_ERR) if L1 sets NP_ENABLE and the host does not support NPTs.
+On first glance, it seems like the check should actually be for
+guest_cpu_cap_has(X86_FEATURE_NPT) instead, as it is possible for the
+host to support NPTs but the guest CPUID to not advertise it.
+
+However, the consistency check is not architectural to begin with. The
+APM does not mention VMEXIT_INVALID if NP_ENABLE is set on a processor
+that does not have X86_FEATURE_NPT. Hence, NP_ENABLE should be ignored
+if X86_FEATURE_NPT is not available for L1, so sanitize it when copying
+from the VMCB12 to KVM's cache.
+
+Apart from the consistency check, NP_ENABLE in VMCB12 is currently
+ignored because the bit is actually copied from VMCB01 to VMCB02, not
+from VMCB12.
+
+Fixes: 4b16184c1cca ("KVM: SVM: Initialize Nested Nested MMU context on VMRUN")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-15-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -350,9 +350,6 @@ static bool __nested_vmcb_check_controls
+ if (CC(control->asid == 0))
+ return false;
+
+- if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled))
+- return false;
+-
+ if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa,
+ MSRPM_SIZE)))
+ return false;
+@@ -462,6 +459,11 @@ void __nested_copy_vmcb_control_to_cache
+ nested_svm_sanitize_intercept(vcpu, to, SKINIT);
+ nested_svm_sanitize_intercept(vcpu, to, RDPRU);
+
++ /* Always clear SVM_NESTED_CTL_NP_ENABLE if the guest cannot use NPTs */
++ to->nested_ctl = from->nested_ctl;
++ if (!guest_cpu_cap_has(vcpu, X86_FEATURE_NPT))
++ to->nested_ctl &= ~SVM_NESTED_CTL_NP_ENABLE;
++
+ to->iopm_base_pa = from->iopm_base_pa;
+ to->msrpm_base_pa = from->msrpm_base_pa;
+ to->tsc_offset = from->tsc_offset;
+@@ -475,7 +477,6 @@ void __nested_copy_vmcb_control_to_cache
+ to->exit_info_2 = from->exit_info_2;
+ to->exit_int_info = from->exit_int_info;
+ to->exit_int_info_err = from->exit_int_info_err;
+- to->nested_ctl = from->nested_ctl;
+ to->event_inj = from->event_inj;
+ to->event_inj_err = from->event_inj_err;
+ to->next_rip = from->next_rip;
--- /dev/null
+From 24f7d36b824b65cf1a2db3db478059187b2a37b0 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 24 Feb 2026 22:50:17 +0000
+Subject: KVM: nSVM: Ensure AVIC is inhibited when restoring a vCPU to guest mode
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 24f7d36b824b65cf1a2db3db478059187b2a37b0 upstream.
+
+On nested VMRUN, KVM ensures AVIC is inhibited by requesting
+KVM_REQ_APICV_UPDATE, triggering a check of inhibit reasons, finding
+APICV_INHIBIT_REASON_NESTED, and disabling AVIC.
+
+However, when KVM_SET_NESTED_STATE is performed on a vCPU not in guest
+mode with AVIC enabled, KVM_REQ_APICV_UPDATE is not requested, and AVIC
+is not inhibited.
+
+Request KVM_REQ_APICV_UPDATE in the KVM_SET_NESTED_STATE path if AVIC is
+active, similar to the nested VMRUN path.
+
+Fixes: f44509f849fe ("KVM: x86: SVM: allow AVIC to co-exist with a nested guest running")
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260224225017.3303870-1-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1939,6 +1939,9 @@ static int svm_set_nested_state(struct k
+
+ svm->nested.force_msr_bitmap_recalc = true;
+
++ if (kvm_vcpu_apicv_active(vcpu))
++ kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
++
+ kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+ ret = 0;
+ out_free:
--- /dev/null
+From e63fb1379f4b9300a44739964e69549bebbcdca4 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+Date: Tue, 10 Feb 2026 01:08:06 +0000
+Subject: KVM: nSVM: Mark all of vmcb02 dirty when restoring nested state
+
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+
+commit e63fb1379f4b9300a44739964e69549bebbcdca4 upstream.
+
+When restoring a vCPU in guest mode, any state restored before
+KVM_SET_NESTED_STATE (e.g. KVM_SET_SREGS) will mark the corresponding
+dirty bits in vmcb01, as it is the active VMCB before switching to
+vmcb02 in svm_set_nested_state().
+
+Hence, mark all fields in vmcb02 dirty in svm_set_nested_state() to
+capture any previously restored fields.
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20260210010806.3204289-1-yosry.ahmed@linux.dev
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1918,6 +1918,12 @@ static int svm_set_nested_state(struct k
+ nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base);
+
+ /*
++ * Any previously restored state (e.g. KVM_SET_SREGS) would mark fields
++ * dirty in vmcb01 instead of vmcb02, so mark all of vmcb02 dirty here.
++ */
++ vmcb_mark_all_dirty(svm->vmcb);
++
++ /*
+ * While the nested guest CR3 is already checked and set by
+ * KVM_SET_SREGS, it was set when nested state was yet loaded,
+ * thus MMU might not be initialized correctly.
--- /dev/null
+From c36991c6f8d2ab56ee67aff04e3c357f45cfc76c Mon Sep 17 00:00:00 2001
+From: Kevin Cheng <chengkev@google.com>
+Date: Tue, 3 Mar 2026 16:22:22 -0800
+Subject: KVM: nSVM: Raise #UD if unhandled VMMCALL isn't intercepted by L1
+
+From: Kevin Cheng <chengkev@google.com>
+
+commit c36991c6f8d2ab56ee67aff04e3c357f45cfc76c upstream.
+
+Explicitly synthesize a #UD for VMMCALL if L2 is active, L1 does NOT want
+to intercept VMMCALL, nested_svm_l2_tlb_flush_enabled() is true, and the
+hypercall is something other than one of the supported Hyper-V hypercalls.
+When all of the above conditions are met, KVM will intercept VMMCALL but
+never forward it to L1, i.e. will let L2 make hypercalls as if it were L1.
+
+The TLFS says a whole lot of nothing about this scenario, so go with the
+architectural behavior, which says that VMMCALL #UDs if it's not
+intercepted.
+
+Opportunistically do a 2-for-1 stub trade by stub-ifying the new API
+instead of the helpers it uses. The last remaining "single" stub will
+soon be dropped as well.
+
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Fixes: 3f4a812edf5c ("KVM: nSVM: hyper-v: Enable L2 TLB flush")
+Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Kevin Cheng <chengkev@google.com>
+Link: https://patch.msgid.link/20260228033328.2285047-5-chengkev@google.com
+[sean: rewrite changelog and comment, tag for stable, remove defunct stubs]
+Reviewed-by: Yosry Ahmed <yosry@kernel.org>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Link: https://patch.msgid.link/20260304002223.1105129-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/hyperv.h | 8 --------
+ arch/x86/kvm/svm/hyperv.h | 11 +++++++++++
+ arch/x86/kvm/svm/nested.c | 4 +---
+ arch/x86/kvm/svm/svm.c | 19 ++++++++++++++++++-
+ 4 files changed, 30 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/kvm/hyperv.h
++++ b/arch/x86/kvm/hyperv.h
+@@ -305,14 +305,6 @@ static inline bool kvm_hv_has_stimer_pen
+ {
+ return false;
+ }
+-static inline bool kvm_hv_is_tlb_flush_hcall(struct kvm_vcpu *vcpu)
+-{
+- return false;
+-}
+-static inline bool guest_hv_cpuid_has_l2_tlb_flush(struct kvm_vcpu *vcpu)
+-{
+- return false;
+-}
+ static inline int kvm_hv_verify_vp_assist(struct kvm_vcpu *vcpu)
+ {
+ return 0;
+--- a/arch/x86/kvm/svm/hyperv.h
++++ b/arch/x86/kvm/svm/hyperv.h
+@@ -41,6 +41,13 @@ static inline bool nested_svm_l2_tlb_flu
+ return hv_vcpu->vp_assist_page.nested_control.features.directhypercall;
+ }
+
++static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu)
++{
++ return guest_hv_cpuid_has_l2_tlb_flush(vcpu) &&
++ nested_svm_l2_tlb_flush_enabled(vcpu) &&
++ kvm_hv_is_tlb_flush_hcall(vcpu);
++}
++
+ void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu);
+ #else /* CONFIG_KVM_HYPERV */
+ static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu) {}
+@@ -48,6 +55,10 @@ static inline bool nested_svm_l2_tlb_flu
+ {
+ return false;
+ }
++static inline bool nested_svm_is_l2_tlb_flush_hcall(struct kvm_vcpu *vcpu)
++{
++ return false;
++}
+ static inline void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu) {}
+ #endif /* CONFIG_KVM_HYPERV */
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1711,9 +1711,7 @@ int nested_svm_exit_special(struct vcpu_
+ }
+ case SVM_EXIT_VMMCALL:
+ /* Hyper-V L2 TLB flush hypercall is handled by L0 */
+- if (guest_hv_cpuid_has_l2_tlb_flush(vcpu) &&
+- nested_svm_l2_tlb_flush_enabled(vcpu) &&
+- kvm_hv_is_tlb_flush_hcall(vcpu))
++ if (nested_svm_is_l2_tlb_flush_hcall(vcpu))
+ return NESTED_EXIT_HOST;
+ break;
+ default:
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -52,6 +52,7 @@
+ #include "svm.h"
+ #include "svm_ops.h"
+
++#include "hyperv.h"
+ #include "kvm_onhyperv.h"
+ #include "svm_onhyperv.h"
+
+@@ -3249,6 +3250,22 @@ static int bus_lock_exit(struct kvm_vcpu
+ return 0;
+ }
+
++static int vmmcall_interception(struct kvm_vcpu *vcpu)
++{
++ /*
++ * Inject a #UD if L2 is active and the VMMCALL isn't a Hyper-V TLB
++ * hypercall, as VMMCALL #UDs if it's not intercepted, and this path is
++ * reachable if and only if L1 doesn't want to intercept VMMCALL or has
++ * enabled L0 (KVM) handling of Hyper-V L2 TLB flush hypercalls.
++ */
++ if (is_guest_mode(vcpu) && !nested_svm_is_l2_tlb_flush_hcall(vcpu)) {
++ kvm_queue_exception(vcpu, UD_VECTOR);
++ return 1;
++ }
++
++ return kvm_emulate_hypercall(vcpu);
++}
++
+ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
+ [SVM_EXIT_READ_CR0] = cr_interception,
+ [SVM_EXIT_READ_CR3] = cr_interception,
+@@ -3299,7 +3316,7 @@ static int (*const svm_exit_handlers[])(
+ [SVM_EXIT_TASK_SWITCH] = task_switch_interception,
+ [SVM_EXIT_SHUTDOWN] = shutdown_interception,
+ [SVM_EXIT_VMRUN] = vmrun_interception,
+- [SVM_EXIT_VMMCALL] = kvm_emulate_hypercall,
++ [SVM_EXIT_VMMCALL] = vmmcall_interception,
+ [SVM_EXIT_VMLOAD] = vmload_interception,
+ [SVM_EXIT_VMSAVE] = vmsave_interception,
+ [SVM_EXIT_STGI] = stgi_interception,
--- /dev/null
+From 290c8d82023ab0e1d2782d37136541e017174d7c Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:00 +0000
+Subject: KVM: nSVM: Refactor checking LBRV enablement in vmcb12 into a helper
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 290c8d82023ab0e1d2782d37136541e017174d7c upstream.
+
+Refactor the vCPU cap and vmcb12 flag checks into a helper. The
+unlikely() annotation is dropped, it's unlikely (huh) to make a
+difference and the CPU will probably predict it better on its own.
+
+CC: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-7-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -651,6 +651,12 @@ void nested_vmcb02_compute_g_pat(struct
+ svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat;
+ }
+
++static bool nested_vmcb12_has_lbrv(struct kvm_vcpu *vcpu)
++{
++ return guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
++ (to_svm(vcpu)->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK);
++}
++
+ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
+ {
+ bool new_vmcb12 = false;
+@@ -715,8 +721,7 @@ static void nested_vmcb02_prepare_save(s
+ vmcb_mark_dirty(vmcb02, VMCB_DR);
+ }
+
+- if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
+- (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
++ if (nested_vmcb12_has_lbrv(vcpu)) {
+ /*
+ * Reserved bits of DEBUGCTL are ignored. Be consistent with
+ * svm_set_msr's definition of reserved bits.
+@@ -1243,8 +1248,7 @@ int nested_svm_vmexit(struct vcpu_svm *s
+ if (!nested_exit_on_intr(svm))
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+
+- if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
+- (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
++ if (nested_vmcb12_has_lbrv(vcpu)) {
+ svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
+ } else {
+ svm_copy_lbrs(&vmcb01->save, &vmcb02->save);
--- /dev/null
+From dcf3648ab71437b504abbfdc4e74622a0f1a56e3 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:01 +0000
+Subject: KVM: nSVM: Refactor writing vmcb12 on nested #VMEXIT as a helper
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit dcf3648ab71437b504abbfdc4e74622a0f1a56e3 upstream.
+
+Move mapping vmcb12 and updating it out of nested_svm_vmexit() into a
+helper, no functional change intended.
+
+CC: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-8-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 77 ++++++++++++++++++++++++++--------------------
+ 1 file changed, 44 insertions(+), 33 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1134,36 +1134,20 @@ void svm_copy_vmloadsave_state(struct vm
+ to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
+ }
+
+-int nested_svm_vmexit(struct vcpu_svm *svm)
++static int nested_svm_vmexit_update_vmcb12(struct kvm_vcpu *vcpu)
+ {
+- struct kvm_vcpu *vcpu = &svm->vcpu;
+- struct vmcb *vmcb01 = svm->vmcb01.ptr;
++ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
+- struct vmcb *vmcb12;
+ struct kvm_host_map map;
++ struct vmcb *vmcb12;
+ int rc;
+
+ rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map);
+- if (rc) {
+- if (rc == -EINVAL)
+- kvm_inject_gp(vcpu, 0);
+- return 1;
+- }
++ if (rc)
++ return rc;
+
+ vmcb12 = map.hva;
+
+- /* Exit Guest-Mode */
+- leave_guest_mode(vcpu);
+- svm->nested.vmcb12_gpa = 0;
+- WARN_ON_ONCE(svm->nested.nested_run_pending);
+-
+- kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+-
+- /* in case we halted in L2 */
+- kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
+-
+- /* Give the current vmcb to the guest */
+-
+ vmcb12->save.es = vmcb02->save.es;
+ vmcb12->save.cs = vmcb02->save.cs;
+ vmcb12->save.ss = vmcb02->save.ss;
+@@ -1200,10 +1184,48 @@ int nested_svm_vmexit(struct vcpu_svm *s
+ if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
+ vmcb12->control.next_rip = vmcb02->control.next_rip;
+
++ if (nested_vmcb12_has_lbrv(vcpu))
++ svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
++
+ vmcb12->control.int_ctl = svm->nested.ctl.int_ctl;
+ vmcb12->control.event_inj = svm->nested.ctl.event_inj;
+ vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err;
+
++ trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code,
++ vmcb12->control.exit_info_1,
++ vmcb12->control.exit_info_2,
++ vmcb12->control.exit_int_info,
++ vmcb12->control.exit_int_info_err,
++ KVM_ISA_SVM);
++
++ kvm_vcpu_unmap(vcpu, &map);
++ return 0;
++}
++
++int nested_svm_vmexit(struct vcpu_svm *svm)
++{
++ struct kvm_vcpu *vcpu = &svm->vcpu;
++ struct vmcb *vmcb01 = svm->vmcb01.ptr;
++ struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
++ int rc;
++
++ rc = nested_svm_vmexit_update_vmcb12(vcpu);
++ if (rc) {
++ if (rc == -EINVAL)
++ kvm_inject_gp(vcpu, 0);
++ return 1;
++ }
++
++ /* Exit Guest-Mode */
++ leave_guest_mode(vcpu);
++ svm->nested.vmcb12_gpa = 0;
++ WARN_ON_ONCE(svm->nested.nested_run_pending);
++
++ kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
++
++ /* in case we halted in L2 */
++ kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
++
+ if (!kvm_pause_in_guest(vcpu->kvm)) {
+ vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
+ vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
+@@ -1248,9 +1270,7 @@ int nested_svm_vmexit(struct vcpu_svm *s
+ if (!nested_exit_on_intr(svm))
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+
+- if (nested_vmcb12_has_lbrv(vcpu)) {
+- svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
+- } else {
++ if (!nested_vmcb12_has_lbrv(vcpu)) {
+ svm_copy_lbrs(&vmcb01->save, &vmcb02->save);
+ vmcb_mark_dirty(vmcb01, VMCB_LBR);
+ }
+@@ -1306,15 +1326,6 @@ int nested_svm_vmexit(struct vcpu_svm *s
+ svm->vcpu.arch.dr7 = DR7_FIXED_1;
+ kvm_update_dr7(&svm->vcpu);
+
+- trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code,
+- vmcb12->control.exit_info_1,
+- vmcb12->control.exit_info_2,
+- vmcb12->control.exit_int_info,
+- vmcb12->control.exit_int_info_err,
+- KVM_ISA_SVM);
+-
+- kvm_vcpu_unmap(vcpu, &map);
+-
+ nested_svm_transition_tlb_flush(vcpu);
+
+ nested_svm_uninit_mmu_context(vcpu);
--- /dev/null
+From 03bee264f8ebfd39e0254c98e112d033a7aa9055 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Wed, 25 Feb 2026 00:59:44 +0000
+Subject: KVM: nSVM: Sync interrupt shadow to cached vmcb12 after VMRUN of L2
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 03bee264f8ebfd39e0254c98e112d033a7aa9055 upstream.
+
+After VMRUN in guest mode, nested_sync_control_from_vmcb02() syncs
+fields written by the CPU from vmcb02 to the cached vmcb12. This is
+because the cached vmcb12 is used as the authoritative copy of some of
+the controls, and is the payload when saving/restoring nested state.
+
+int_state is also written by the CPU, specifically bit 0 (i.e.
+SVM_INTERRUPT_SHADOW_MASK) for nested VMs, but it is not sync'd to
+cached vmcb12. This does not cause a problem if KVM_SET_NESTED_STATE
+preceeds KVM_SET_VCPU_EVENTS in the restore path, as an interrupt shadow
+would be correctly restored to vmcb02 (KVM_SET_VCPU_EVENTS overwrites
+what KVM_SET_NESTED_STATE restored in int_state).
+
+However, if KVM_SET_VCPU_EVENTS preceeds KVM_SET_NESTED_STATE, an
+interrupt shadow would be restored into vmcb01 instead of vmcb02. This
+would mostly be benign for L1 (delays an interrupt), but not for L2. For
+L2, the vCPU could hang (e.g. if a wakeup interrupt is delivered before
+a HLT that should have been in an interrupt shadow).
+
+Sync int_state to the cached vmcb12 in nested_sync_control_from_vmcb02()
+to avoid this problem. With that, KVM_SET_NESTED_STATE restores the
+correct interrupt shadow state, and if KVM_SET_VCPU_EVENTS follows it
+would overwrite it with the same value.
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260225005950.3739782-3-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -530,6 +530,7 @@ void nested_sync_control_from_vmcb02(str
+ u32 mask;
+ svm->nested.ctl.event_inj = svm->vmcb->control.event_inj;
+ svm->nested.ctl.event_inj_err = svm->vmcb->control.event_inj_err;
++ svm->nested.ctl.int_state = svm->vmcb->control.int_state;
+
+ /* Only a few fields of int_ctl are written by the processor. */
+ mask = V_IRQ_MASK | V_TPR_MASK;
--- /dev/null
+From 778d8c1b2a6ffe622ddcd3bb35b620e6e41f4da0 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Wed, 25 Feb 2026 00:59:43 +0000
+Subject: KVM: nSVM: Sync NextRIP to cached vmcb12 after VMRUN of L2
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 778d8c1b2a6ffe622ddcd3bb35b620e6e41f4da0 upstream.
+
+After VMRUN in guest mode, nested_sync_control_from_vmcb02() syncs
+fields written by the CPU from vmcb02 to the cached vmcb12. This is
+because the cached vmcb12 is used as the authoritative copy of some of
+the controls, and is the payload when saving/restoring nested state.
+
+NextRIP is also written by the CPU (in some cases) after VMRUN, but is
+not sync'd to the cached vmcb12. As a result, it is corrupted after
+save/restore (replaced by the original value written by L1 on nested
+VMRUN). This could cause problems for both KVM (e.g. when injecting a
+soft IRQ) or L1 (e.g. when using NextRIP to advance RIP after emulating
+an instruction).
+
+Fix this by sync'ing NextRIP to the cache after VMRUN of L2, but only
+after completing interrupts (not in nested_sync_control_from_vmcb02()),
+as KVM may update NextRIP (e.g. when re-injecting a soft IRQ).
+
+Fixes: cc440cdad5b7 ("KVM: nSVM: implement KVM_GET_NESTED_STATE and KVM_SET_NESTED_STATE")
+CC: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260225005950.3739782-2-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -4436,6 +4436,16 @@ static __no_kcsan fastpath_t svm_vcpu_ru
+
+ svm_complete_interrupts(vcpu);
+
++ /*
++ * Update the cache after completing interrupts to get an accurate
++ * NextRIP, e.g. when re-injecting a soft interrupt.
++ *
++ * FIXME: Rework svm_get_nested_state() to not pull data from the
++ * cache (except for maybe int_ctl).
++ */
++ if (is_guest_mode(vcpu))
++ svm->nested.ctl.next_rip = svm->vmcb->control.next_rip;
++
+ return svm_exit_handlers_fastpath(vcpu);
+ }
+
--- /dev/null
+From 1b30e7551767cb95b3e49bb169c72bbd76b56e05 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:02 +0000
+Subject: KVM: nSVM: Triple fault if mapping VMCB12 fails on nested #VMEXIT
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 1b30e7551767cb95b3e49bb169c72bbd76b56e05 upstream.
+
+KVM currently injects a #GP and hopes for the best if mapping VMCB12
+fails on nested #VMEXIT, and only if the failure mode is -EINVAL.
+Mapping the VMCB12 could also fail if creating host mappings fails.
+
+After the #GP is injected, nested_svm_vmexit() bails early, without
+cleaning up (e.g. KVM_REQ_GET_NESTED_STATE_PAGES is set, is_guest_mode()
+is true, etc).
+
+Instead of optionally injecting a #GP, triple fault the guest if mapping
+VMCB12 fails since KVM cannot make a sane recovery. The APM states that
+a #VMEXIT will triple fault if host state is illegal or an exception
+occurs while loading host state, so the behavior is not entirely made
+up.
+
+Do not return early from nested_svm_vmexit(), continue cleaning up the
+vCPU state (e.g. switch back to vmcb01), to handle the failure as
+gracefully as possible.
+
+Fixes: cf74a78b229d ("KVM: SVM: Add VMEXIT handler and intercepts")
+CC: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-9-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1208,12 +1208,8 @@ void nested_svm_vmexit(struct vcpu_svm *
+ struct vmcb *vmcb01 = svm->vmcb01.ptr;
+ struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
+
+- rc = nested_svm_vmexit_update_vmcb12(vcpu);
+- if (rc) {
+- if (rc == -EINVAL)
+- kvm_inject_gp(vcpu, 0);
+- return 1;
+- }
++ if (nested_svm_vmexit_update_vmcb12(vcpu))
++ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+
+ /* Exit Guest-Mode */
+ leave_guest_mode(vcpu);
--- /dev/null
+From 5d291ef0585ed880ed4dd71ea1a5965e0a65fb53 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:34:03 +0000
+Subject: KVM: nSVM: Triple fault if restore host CR3 fails on nested #VMEXIT
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 5d291ef0585ed880ed4dd71ea1a5965e0a65fb53 upstream.
+
+If loading L1's CR3 fails on a nested #VMEXIT, nested_svm_vmexit()
+returns an error code that is ignored by most callers, and continues to
+run L1 with corrupted state. A sane recovery is not possible in this
+case, and HW behavior is to cause a shutdown. Inject a triple fault
+instead, and do not return early from nested_svm_vmexit(). Continue
+cleaning up the vCPU state (e.g. clear pending exceptions), to handle
+the failure as gracefully as possible.
+
+From the APM:
+
+ Upon #VMEXIT, the processor performs the following actions in order to
+ return to the host execution context:
+
+ ...
+
+ if (illegal host state loaded, or exception while loading host state)
+ shutdown
+ else
+ execute first host instruction following the VMRUN
+
+Remove the return value of nested_svm_vmexit(), which is mostly
+unchecked anyway.
+
+Fixes: d82aaef9c88a ("KVM: nSVM: use nested_svm_load_cr3() on guest->host switch")
+CC: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-10-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 10 +++-------
+ arch/x86/kvm/svm/svm.c | 11 ++---------
+ arch/x86/kvm/svm/svm.h | 6 +++---
+ 3 files changed, 8 insertions(+), 19 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1202,12 +1202,11 @@ static int nested_svm_vmexit_update_vmcb
+ return 0;
+ }
+
+-int nested_svm_vmexit(struct vcpu_svm *svm)
++void nested_svm_vmexit(struct vcpu_svm *svm)
+ {
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ struct vmcb *vmcb01 = svm->vmcb01.ptr;
+ struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
+- int rc;
+
+ rc = nested_svm_vmexit_update_vmcb12(vcpu);
+ if (rc) {
+@@ -1330,9 +1329,8 @@ int nested_svm_vmexit(struct vcpu_svm *s
+
+ nested_svm_uninit_mmu_context(vcpu);
+
+- rc = nested_svm_load_cr3(vcpu, vmcb01->save.cr3, false, true);
+- if (rc)
+- return 1;
++ if (nested_svm_load_cr3(vcpu, vmcb01->save.cr3, false, true))
++ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+
+ /*
+ * Drop what we picked up for L2 via svm_complete_interrupts() so it
+@@ -1357,8 +1355,6 @@ int nested_svm_vmexit(struct vcpu_svm *s
+ */
+ if (kvm_apicv_activated(vcpu->kvm))
+ __kvm_vcpu_update_apicv(vcpu);
+-
+- return 0;
+ }
+
+ static void nested_svm_triple_fault(struct kvm_vcpu *vcpu)
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -2233,13 +2233,9 @@ static int emulate_svm_instr(struct kvm_
+ [SVM_INSTR_VMSAVE] = vmsave_interception,
+ };
+ struct vcpu_svm *svm = to_svm(vcpu);
+- int ret;
+
+ if (is_guest_mode(vcpu)) {
+- /* Returns '1' or -errno on failure, '0' on success. */
+- ret = nested_svm_simple_vmexit(svm, guest_mode_exit_codes[opcode]);
+- if (ret)
+- return ret;
++ nested_svm_simple_vmexit(svm, guest_mode_exit_codes[opcode]);
+ return 1;
+ }
+ return svm_instr_handlers[opcode](vcpu);
+@@ -4872,7 +4868,6 @@ static int svm_enter_smm(struct kvm_vcpu
+ {
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct kvm_host_map map_save;
+- int ret;
+
+ if (!is_guest_mode(vcpu))
+ return 0;
+@@ -4892,9 +4887,7 @@ static int svm_enter_smm(struct kvm_vcpu
+ svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
+ svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
+
+- ret = nested_svm_simple_vmexit(svm, SVM_EXIT_SW);
+- if (ret)
+- return ret;
++ nested_svm_simple_vmexit(svm, SVM_EXIT_SW);
+
+ /*
+ * KVM uses VMCB01 to store L1 host state while L2 runs but
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -793,14 +793,14 @@ int nested_svm_vmrun(struct kvm_vcpu *vc
+ void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
+ struct vmcb_save_area *from_save);
+ void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
+-int nested_svm_vmexit(struct vcpu_svm *svm);
++void nested_svm_vmexit(struct vcpu_svm *svm);
+
+-static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
++static inline void nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
+ {
+ svm->vmcb->control.exit_code = exit_code;
+ svm->vmcb->control.exit_info_1 = 0;
+ svm->vmcb->control.exit_info_2 = 0;
+- return nested_svm_vmexit(svm);
++ nested_svm_vmexit(svm);
+ }
+
+ int nested_svm_exit_handled(struct vcpu_svm *svm);
--- /dev/null
+From 5c247d08bc81bbad4c662dcf5654137a2f8483ec Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+Date: Tue, 3 Feb 2026 20:10:10 +0000
+Subject: KVM: nSVM: Use vcpu->arch.cr2 when updating vmcb12 on nested #VMEXIT
+
+From: Yosry Ahmed <yosry.ahmed@linux.dev>
+
+commit 5c247d08bc81bbad4c662dcf5654137a2f8483ec upstream.
+
+KVM currently uses the value of CR2 from vmcb02 to update vmcb12 on
+nested #VMEXIT. This value is incorrect in some cases, causing L1 to run
+L2 with a corrupted CR2. This could lead to segfaults or data corruption
+if L2 is in the middle of handling a #PF and reads a corrupted CR2. Use
+the correct value in vcpu->arch.cr2 instead.
+
+The value in vcpu->arch.cr2 is sync'd to vmcb02 shortly before a VMRUN
+of L2, and sync'd back to vcpu->arch.cr2 shortly after. The value are
+only out-of-sync in two cases: after save+restore, and after a #PF is
+injected into L2. In either case, if a #VMEXIT to L1 is synthesized
+before L2 runs, using the value in vmcb02 would be incorrect.
+
+After save+restore, the value of CR2 is restored by KVM_SET_SREGS into
+vcpu->arch.cr2. It is not reflect in vmcb02 until a VMRUN of L2. Before
+that, it holds whatever was in vmcb02 before restore, which would be
+zero on a new vCPU that never ran nested. If a #VMEXIT to L1 is
+synthesized before L2 ever runs, using vcpu->arch.cr2 to update vmcb12
+is the right thing to do.
+
+The #PF injection case is more nuanced. Although the APM is a bit
+unclear about when CR2 is written during a #PF, the SDM is more clear:
+
+ Processors update CR2 whenever a page fault is detected. If a
+ second page fault occurs while an earlier page fault is being
+ delivered, the faulting linear address of the second fault will
+ overwrite the contents of CR2 (replacing the previous address).
+ These updates to CR2 occur even if the page fault results in a
+ double fault or occurs during the delivery of a double fault.
+
+KVM injecting the exception surely counts as the #PF being "detected".
+More importantly, when an exception is injected into L2 at the time of a
+synthesized #VMEXIT, KVM updates exit_int_info in vmcb12 accordingly,
+such that an L1 hypervisor can re-inject the exception. If CR2 is not
+written at that point, the L1 hypervisor have no way of correctly
+re-injecting the #PF. Hence, if a #VMEXIT to L1 is synthesized after
+the #PF is injected into L2 but before it actually runs, using
+vcpu->arch.cr2 to update vmcb12 is also the right thing to do.
+
+Note that KVM does _not_ update vcpu->arch.cr2 when a #PF is pending for
+L2, only when it is injected. The distinction is important, because only
+injected (but not intercepted) exceptions are propagated to L1 through
+exit_int_info. It would be incorrect to update CR2 in vmcb12 for a
+pending #PF, as L1 would perceive an updated CR2 value with no #PF.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20260203201010.1871056-1-yosry.ahmed@linux.dev
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1166,7 +1166,7 @@ int nested_svm_vmexit(struct vcpu_svm *s
+ vmcb12->save.efer = svm->vcpu.arch.efer;
+ vmcb12->save.cr0 = kvm_read_cr0(vcpu);
+ vmcb12->save.cr3 = kvm_read_cr3(vcpu);
+- vmcb12->save.cr2 = vmcb02->save.cr2;
++ vmcb12->save.cr2 = vcpu->arch.cr2;
+ vmcb12->save.cr4 = svm->vcpu.arch.cr4;
+ vmcb12->save.rflags = kvm_get_rflags(vcpu);
+ vmcb12->save.rip = kvm_rip_read(vcpu);
--- /dev/null
+From 3700f0788da6acf73b2df56690f4b201aa4aefd2 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:33:57 +0000
+Subject: KVM: SVM: Add missing save/restore handling of LBR MSRs
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 3700f0788da6acf73b2df56690f4b201aa4aefd2 upstream.
+
+MSR_IA32_DEBUGCTLMSR and LBR MSRs are currently not enumerated by
+KVM_GET_MSR_INDEX_LIST, and LBR MSRs cannot be set with KVM_SET_MSRS. So
+save/restore is completely broken.
+
+Fix it by adding the MSRs to msrs_to_save_base, and allowing writes to
+LBR MSRs from userspace only (as they are read-only MSRs) if LBR
+virtualization is enabled. Additionally, to correctly restore L1's LBRs
+while L2 is running, make sure the LBRs are copied from the captured
+VMCB01 save area in svm_copy_vmrun_state().
+
+Note, for VMX, this also fixes a flaw where MSR_IA32_DEBUGCTLMSR isn't
+reported as an MSR to save/restore.
+
+Note #2, over-reporting MSR_IA32_LASTxxx on Intel is ok, as KVM already
+handles unsupported reads and writes thanks to commit b5e2fec0ebc3 ("KVM:
+Ignore DEBUGCTL MSRs with no effect") (kvm_do_msr_access() will morph the
+unsupported userspace write into a nop).
+
+Fixes: 24e09cbf480a ("KVM: SVM: enable LBR virtualization")
+Cc: stable@vger.kernel.org
+Reported-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-4-yosry@kernel.org
+[sean: guard with lbrv checks, massage changelog]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 5 +++++
+ arch/x86/kvm/svm/svm.c | 42 +++++++++++++++++++++++++++++++++++++-----
+ arch/x86/kvm/x86.c | 3 +++
+ 3 files changed, 45 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -1109,6 +1109,11 @@ void svm_copy_vmrun_state(struct vmcb_sa
+ to_save->isst_addr = from_save->isst_addr;
+ to_save->ssp = from_save->ssp;
+ }
++
++ if (kvm_cpu_cap_has(X86_FEATURE_LBRV)) {
++ svm_copy_lbrs(to_save, from_save);
++ to_save->dbgctl &= ~DEBUGCTL_RESERVED_BITS;
++ }
+ }
+
+ void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -2788,19 +2788,19 @@ static int svm_get_msr(struct kvm_vcpu *
+ msr_info->data = svm->tsc_aux;
+ break;
+ case MSR_IA32_DEBUGCTLMSR:
+- msr_info->data = svm->vmcb->save.dbgctl;
++ msr_info->data = lbrv ? svm->vmcb->save.dbgctl : 0;
+ break;
+ case MSR_IA32_LASTBRANCHFROMIP:
+- msr_info->data = svm->vmcb->save.br_from;
++ msr_info->data = lbrv ? svm->vmcb->save.br_from : 0;
+ break;
+ case MSR_IA32_LASTBRANCHTOIP:
+- msr_info->data = svm->vmcb->save.br_to;
++ msr_info->data = lbrv ? svm->vmcb->save.br_to : 0;
+ break;
+ case MSR_IA32_LASTINTFROMIP:
+- msr_info->data = svm->vmcb->save.last_excp_from;
++ msr_info->data = lbrv ? svm->vmcb->save.last_excp_from : 0;
+ break;
+ case MSR_IA32_LASTINTTOIP:
+- msr_info->data = svm->vmcb->save.last_excp_to;
++ msr_info->data = lbrv ? svm->vmcb->save.last_excp_to : 0;
+ break;
+ case MSR_VM_HSAVE_PA:
+ msr_info->data = svm->nested.hsave_msr;
+@@ -3075,6 +3075,38 @@ static int svm_set_msr(struct kvm_vcpu *
+ vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
+ svm_update_lbrv(vcpu);
+ break;
++ case MSR_IA32_LASTBRANCHFROMIP:
++ if (!lbrv)
++ return KVM_MSR_RET_UNSUPPORTED;
++ if (!msr->host_initiated)
++ return 1;
++ svm->vmcb->save.br_from = data;
++ vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
++ break;
++ case MSR_IA32_LASTBRANCHTOIP:
++ if (!lbrv)
++ return KVM_MSR_RET_UNSUPPORTED;
++ if (!msr->host_initiated)
++ return 1;
++ svm->vmcb->save.br_to = data;
++ vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
++ break;
++ case MSR_IA32_LASTINTFROMIP:
++ if (!lbrv)
++ return KVM_MSR_RET_UNSUPPORTED;
++ if (!msr->host_initiated)
++ return 1;
++ svm->vmcb->save.last_excp_from = data;
++ vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
++ break;
++ case MSR_IA32_LASTINTTOIP:
++ if (!lbrv)
++ return KVM_MSR_RET_UNSUPPORTED;
++ if (!msr->host_initiated)
++ return 1;
++ svm->vmcb->save.last_excp_to = data;
++ vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
++ break;
+ case MSR_VM_HSAVE_PA:
+ /*
+ * Old kernels did not validate the value written to
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -351,6 +351,9 @@ static const u32 msrs_to_save_base[] = {
+ MSR_IA32_U_CET, MSR_IA32_S_CET,
+ MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP,
+ MSR_IA32_PL3_SSP, MSR_IA32_INT_SSP_TAB,
++ MSR_IA32_DEBUGCTLMSR,
++ MSR_IA32_LASTBRANCHFROMIP, MSR_IA32_LASTBRANCHTOIP,
++ MSR_IA32_LASTINTFROMIP, MSR_IA32_LASTINTTOIP,
+ };
+
+ static const u32 msrs_to_save_pmu[] = {
--- /dev/null
+From d5bde6113aed8315a2bfe708730b721be9c2f48b Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 18 Feb 2026 15:09:51 -0800
+Subject: KVM: SVM: Explicitly mark vmcb01 dirty after modifying VMCB intercepts
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit d5bde6113aed8315a2bfe708730b721be9c2f48b upstream.
+
+When reacting to an intercept update, explicitly mark vmcb01's intercepts
+dirty, as KVM always initially operates on vmcb01, and nested_svm_vmexit()
+isn't guaranteed to mark VMCB_INTERCEPTS as dirty. I.e. if L2 is active,
+KVM will modify the intercepts for L1, but might not mark them as dirty
+before the next VMRUN of L1.
+
+Fixes: 116a0a23676e ("KVM: SVM: Add clean-bit for intercetps, tsc-offset and pause filter count")
+Cc: stable@vger.kernel.org
+Reviewed-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20260218230958.2877682-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -128,11 +128,13 @@ void recalc_intercepts(struct vcpu_svm *
+ struct vmcb_ctrl_area_cached *g;
+ unsigned int i;
+
+- vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
++ vmcb_mark_dirty(svm->vmcb01.ptr, VMCB_INTERCEPTS);
+
+ if (!is_guest_mode(&svm->vcpu))
+ return;
+
++ vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
++
+ c = &svm->vmcb->control;
+ h = &svm->vmcb01.ptr->control;
+ g = &svm->nested.ctl;
--- /dev/null
+From d99df02ff427f461102230f9c5b90a6c64ee8e23 Mon Sep 17 00:00:00 2001
+From: Kevin Cheng <chengkev@google.com>
+Date: Sat, 28 Feb 2026 03:33:26 +0000
+Subject: KVM: SVM: Inject #UD for INVLPGA if EFER.SVME=0
+
+From: Kevin Cheng <chengkev@google.com>
+
+commit d99df02ff427f461102230f9c5b90a6c64ee8e23 upstream.
+
+INVLPGA should cause a #UD when EFER.SVME is not set. Add a check to
+properly inject #UD when EFER.SVME=0.
+
+Fixes: ff092385e828 ("KVM: SVM: Implement INVLPGA")
+Cc: stable@vger.kernel.org
+Signed-off-by: Kevin Cheng <chengkev@google.com>
+Reviewed-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20260228033328.2285047-3-chengkev@google.com
+[sean: tag for stable@]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -2366,6 +2366,9 @@ static int invlpga_interception(struct k
+ gva_t gva = kvm_rax_read(vcpu);
+ u32 asid = kvm_rcx_read(vcpu);
+
++ if (nested_svm_check_permissions(vcpu))
++ return 1;
++
+ /* FIXME: Handle an address size prefix. */
+ if (!is_long_mode(vcpu))
+ gva = (u32)gva;
--- /dev/null
+From 361dbe8173c460a2bf8aee23920f6c2dbdcabb94 Mon Sep 17 00:00:00 2001
+From: Yosry Ahmed <yosry@kernel.org>
+Date: Tue, 3 Mar 2026 00:33:56 +0000
+Subject: KVM: SVM: Switch svm_copy_lbrs() to a macro
+
+From: Yosry Ahmed <yosry@kernel.org>
+
+commit 361dbe8173c460a2bf8aee23920f6c2dbdcabb94 upstream.
+
+In preparation for using svm_copy_lbrs() with 'struct vmcb_save_area'
+without a containing 'struct vmcb', and later even 'struct
+vmcb_save_area_cached', make it a macro.
+
+Macros are generally not preferred compared to functions, mainly due to
+type-safety. However, in this case it seems like having a simple macro
+copying a few fields is better than copy-pasting the same 5 lines of
+code in different places.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Yosry Ahmed <yosry@kernel.org>
+Link: https://patch.msgid.link/20260303003421.2185681-3-yosry@kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c | 8 ++++----
+ arch/x86/kvm/svm/svm.c | 9 ---------
+ arch/x86/kvm/svm/svm.h | 10 +++++++++-
+ 3 files changed, 13 insertions(+), 14 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -721,10 +721,10 @@ static void nested_vmcb02_prepare_save(s
+ * Reserved bits of DEBUGCTL are ignored. Be consistent with
+ * svm_set_msr's definition of reserved bits.
+ */
+- svm_copy_lbrs(vmcb02, vmcb12);
++ svm_copy_lbrs(&vmcb02->save, &vmcb12->save);
+ vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS;
+ } else {
+- svm_copy_lbrs(vmcb02, vmcb01);
++ svm_copy_lbrs(&vmcb02->save, &vmcb01->save);
+ }
+ vmcb_mark_dirty(vmcb02, VMCB_LBR);
+ svm_update_lbrv(&svm->vcpu);
+@@ -1243,9 +1243,9 @@ int nested_svm_vmexit(struct vcpu_svm *s
+
+ if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
+ (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+- svm_copy_lbrs(vmcb12, vmcb02);
++ svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
+ } else {
+- svm_copy_lbrs(vmcb01, vmcb02);
++ svm_copy_lbrs(&vmcb01->save, &vmcb02->save);
+ vmcb_mark_dirty(vmcb01, VMCB_LBR);
+ }
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -841,15 +841,6 @@ static void svm_recalc_msr_intercepts(st
+ */
+ }
+
+-void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
+-{
+- to_vmcb->save.dbgctl = from_vmcb->save.dbgctl;
+- to_vmcb->save.br_from = from_vmcb->save.br_from;
+- to_vmcb->save.br_to = from_vmcb->save.br_to;
+- to_vmcb->save.last_excp_from = from_vmcb->save.last_excp_from;
+- to_vmcb->save.last_excp_to = from_vmcb->save.last_excp_to;
+-}
+-
+ static void __svm_enable_lbrv(struct kvm_vcpu *vcpu)
+ {
+ to_svm(vcpu)->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -713,8 +713,16 @@ static inline void *svm_vcpu_alloc_msrpm
+ return svm_alloc_permissions_map(MSRPM_SIZE, GFP_KERNEL_ACCOUNT);
+ }
+
++#define svm_copy_lbrs(to, from) \
++do { \
++ (to)->dbgctl = (from)->dbgctl; \
++ (to)->br_from = (from)->br_from; \
++ (to)->br_to = (from)->br_to; \
++ (to)->last_excp_from = (from)->last_excp_from; \
++ (to)->last_excp_to = (from)->last_excp_to; \
++} while (0)
++
+ void svm_vcpu_free_msrpm(void *msrpm);
+-void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
+ void svm_enable_lbrv(struct kvm_vcpu *vcpu);
+ void svm_update_lbrv(struct kvm_vcpu *vcpu);
+
--- /dev/null
+From d0ad1b05bbe6f8da159a4dfb6692b3b7ce30ccc8 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 17 Feb 2026 16:54:38 -0800
+Subject: KVM: x86: Defer non-architectural deliver of exception payload to userspace read
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit d0ad1b05bbe6f8da159a4dfb6692b3b7ce30ccc8 upstream.
+
+When attempting to play nice with userspace that hasn't enabled
+KVM_CAP_EXCEPTION_PAYLOAD, defer KVM's non-architectural delivery of the
+payload until userspace actually reads relevant vCPU state, and more
+importantly, force delivery of the payload in *all* paths where userspace
+saves relevant vCPU state, not just KVM_GET_VCPU_EVENTS.
+
+Ignoring userspace save/restore for the moment, delivering the payload
+before the exception is injected is wrong regardless of whether L1 or L2
+is running. To make matters even more confusing, the flaw *currently*
+being papered over by the !is_guest_mode() check isn't even the same bug
+that commit da998b46d244 ("kvm: x86: Defer setting of CR2 until #PF
+delivery") was trying to avoid.
+
+At the time of commit da998b46d244, KVM didn't correctly handle exception
+intercepts, as KVM would wait until VM-Entry into L2 was imminent to check
+if the queued exception should morph to a nested VM-Exit. I.e. KVM would
+deliver the payload to L2 and then synthesize a VM-Exit into L1. But the
+payload was only the most blatant issue, e.g. waiting to check exception
+intercepts would also lead to KVM incorrectly escalating a
+should-be-intercepted #PF into a #DF.
+
+That underlying bug was eventually fixed by commit 7709aba8f716 ("KVM: x86:
+Morph pending exceptions to pending VM-Exits at queue time"), but in the
+interim, commit a06230b62b89 ("KVM: x86: Deliver exception payload on
+KVM_GET_VCPU_EVENTS") came along and subtly added another dependency on
+the !is_guest_mode() check.
+
+While not recorded in the changelog, the motivation for deferring the
+!exception_payload_enabled delivery was to fix a flaw where a synthesized
+MTF (Monitor Trap Flag) VM-Exit would drop a pending #DB and clobber DR6.
+On a VM-Exit, VMX CPUs save pending #DB information into the VMCS, which
+is emulated by KVM in nested_vmx_update_pending_dbg() by grabbing the
+payload from the queue/pending exception. I.e. prematurely delivering the
+payload would cause the pending #DB to not be recorded in the VMCS, and of
+course, clobber L2's DR6 as seen by L1.
+
+Jumping back to save+restore, the quirked behavior of forcing delivery of
+the payload only works if userspace does KVM_GET_VCPU_EVENTS *before*
+CR2 or DR6 is saved, i.e. before KVM_GET_SREGS{,2} and KVM_GET_DEBUGREGS.
+E.g. if userspace does KVM_GET_SREGS before KVM_GET_VCPU_EVENTS, then the
+CR2 saved by userspace won't contain the payload for the exception save by
+KVM_GET_VCPU_EVENTS.
+
+Deliberately deliver the payload in the store_regs() path, as it's the
+least awful option even though userspace may not be doing save+restore.
+Because if userspace _is_ doing save restore, it could elide KVM_GET_SREGS
+knowing that SREGS were already saved when the vCPU exited.
+
+Link: https://lore.kernel.org/all/20200207103608.110305-1-oupton@google.com
+Cc: Yosry Ahmed <yosry.ahmed@linux.dev>
+Cc: stable@vger.kernel.org
+Reviewed-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Tested-by: Yosry Ahmed <yosry.ahmed@linux.dev>
+Link: https://patch.msgid.link/20260218005438.2619063-1-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c | 62 +++++++++++++++++++++++++++++++++--------------------
+ 1 file changed, 39 insertions(+), 23 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -864,9 +864,6 @@ static void kvm_multiple_exception(struc
+ vcpu->arch.exception.error_code = error_code;
+ vcpu->arch.exception.has_payload = has_payload;
+ vcpu->arch.exception.payload = payload;
+- if (!is_guest_mode(vcpu))
+- kvm_deliver_exception_payload(vcpu,
+- &vcpu->arch.exception);
+ return;
+ }
+
+@@ -5531,18 +5528,8 @@ static int kvm_vcpu_ioctl_x86_set_mce(st
+ return 0;
+ }
+
+-static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
+- struct kvm_vcpu_events *events)
++static struct kvm_queued_exception *kvm_get_exception_to_save(struct kvm_vcpu *vcpu)
+ {
+- struct kvm_queued_exception *ex;
+-
+- process_nmi(vcpu);
+-
+-#ifdef CONFIG_KVM_SMM
+- if (kvm_check_request(KVM_REQ_SMI, vcpu))
+- process_smi(vcpu);
+-#endif
+-
+ /*
+ * KVM's ABI only allows for one exception to be migrated. Luckily,
+ * the only time there can be two queued exceptions is if there's a
+@@ -5553,21 +5540,46 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_
+ if (vcpu->arch.exception_vmexit.pending &&
+ !vcpu->arch.exception.pending &&
+ !vcpu->arch.exception.injected)
+- ex = &vcpu->arch.exception_vmexit;
+- else
+- ex = &vcpu->arch.exception;
++ return &vcpu->arch.exception_vmexit;
++
++ return &vcpu->arch.exception;
++}
++
++static void kvm_handle_exception_payload_quirk(struct kvm_vcpu *vcpu)
++{
++ struct kvm_queued_exception *ex = kvm_get_exception_to_save(vcpu);
+
+ /*
+- * In guest mode, payload delivery should be deferred if the exception
+- * will be intercepted by L1, e.g. KVM should not modifying CR2 if L1
+- * intercepts #PF, ditto for DR6 and #DBs. If the per-VM capability,
+- * KVM_CAP_EXCEPTION_PAYLOAD, is not set, userspace may or may not
+- * propagate the payload and so it cannot be safely deferred. Deliver
+- * the payload if the capability hasn't been requested.
++ * If KVM_CAP_EXCEPTION_PAYLOAD is disabled, then (prematurely) deliver
++ * the pending exception payload when userspace saves *any* vCPU state
++ * that interacts with exception payloads to avoid breaking userspace.
++ *
++ * Architecturally, KVM must not deliver an exception payload until the
++ * exception is actually injected, e.g. to avoid losing pending #DB
++ * information (which VMX tracks in the VMCS), and to avoid clobbering
++ * state if the exception is never injected for whatever reason. But
++ * if KVM_CAP_EXCEPTION_PAYLOAD isn't enabled, then userspace may or
++ * may not propagate the payload across save+restore, and so KVM can't
++ * safely defer delivery of the payload.
+ */
+ if (!vcpu->kvm->arch.exception_payload_enabled &&
+ ex->pending && ex->has_payload)
+ kvm_deliver_exception_payload(vcpu, ex);
++}
++
++static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
++ struct kvm_vcpu_events *events)
++{
++ struct kvm_queued_exception *ex = kvm_get_exception_to_save(vcpu);
++
++ process_nmi(vcpu);
++
++#ifdef CONFIG_KVM_SMM
++ if (kvm_check_request(KVM_REQ_SMI, vcpu))
++ process_smi(vcpu);
++#endif
++
++ kvm_handle_exception_payload_quirk(vcpu);
+
+ memset(events, 0, sizeof(*events));
+
+@@ -5746,6 +5758,8 @@ static int kvm_vcpu_ioctl_x86_get_debugr
+ vcpu->arch.guest_state_protected)
+ return -EINVAL;
+
++ kvm_handle_exception_payload_quirk(vcpu);
++
+ memset(dbgregs, 0, sizeof(*dbgregs));
+
+ BUILD_BUG_ON(ARRAY_SIZE(vcpu->arch.db) != ARRAY_SIZE(dbgregs->db));
+@@ -12148,6 +12162,8 @@ static void __get_sregs_common(struct kv
+ if (vcpu->arch.guest_state_protected)
+ goto skip_protected_regs;
+
++ kvm_handle_exception_payload_quirk(vcpu);
++
+ kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
+ kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
+ kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
--- /dev/null
+From da773ea3f59032f659bfc4c450ca86e384786168 Mon Sep 17 00:00:00 2001
+From: Tao Cui <cuitao@kylinos.cn>
+Date: Thu, 9 Apr 2026 18:56:36 +0800
+Subject: LoongArch: KVM: Use CSR_CRMD_PLV in kvm_arch_vcpu_in_kernel()
+
+From: Tao Cui <cuitao@kylinos.cn>
+
+commit da773ea3f59032f659bfc4c450ca86e384786168 upstream.
+
+The function reads LOONGARCH_CSR_CRMD but uses CSR_PRMD_PPLV to
+extract the privilege level. While both masks have the same value
+(0x3), CSR_CRMD_PLV is the semantically correct constant for CRMD.
+
+Cc: stable@vger.kernel.org
+Reviewed-by: Bibo Mao <maobibo@loongson.cn>
+Signed-off-by: Tao Cui <cuitao@kylinos.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/kvm/vcpu.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/loongarch/kvm/vcpu.c
++++ b/arch/loongarch/kvm/vcpu.c
+@@ -402,7 +402,7 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_
+ val = gcsr_read(LOONGARCH_CSR_CRMD);
+ preempt_enable();
+
+- return (val & CSR_PRMD_PPLV) == PLV_KERN;
++ return (val & CSR_CRMD_PLV) == PLV_KERN;
+ }
+
+ #ifdef CONFIG_GUEST_PERF_EVENTS
--- /dev/null
+From 95093e5cb4c5b50a5b1a4b79f2942b62744bd66a Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Sat, 11 Apr 2026 14:36:36 -0700
+Subject: mm/damon/core: disallow non-power of two min_region_sz on damon_start()
+
+From: SeongJae Park <sj@kernel.org>
+
+commit 95093e5cb4c5b50a5b1a4b79f2942b62744bd66a upstream.
+
+Commit d8f867fa0825 ("mm/damon: add damon_ctx->min_sz_region") introduced
+a bug that allows unaligned DAMON region address ranges. Commit
+c80f46ac228b ("mm/damon/core: disallow non-power of two min_region_sz")
+fixed it, but only for damon_commit_ctx() use case. Still, DAMON sysfs
+interface can emit non-power of two min_region_sz via damon_start(). Fix
+the path by adding the is_power_of_2() check on damon_start().
+
+The issue was discovered by sashiko [1].
+
+Link: https://lore.kernel.org/20260411213638.77768-1-sj@kernel.org
+Link: https://lore.kernel.org/20260403155530.64647-1-sj@kernel.org [1]
+Fixes: d8f867fa0825 ("mm/damon: add damon_ctx->min_sz_region")
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org> # 6.18.x
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/core.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/mm/damon/core.c
++++ b/mm/damon/core.c
+@@ -1368,6 +1368,11 @@ int damon_start(struct damon_ctx **ctxs,
+ int i;
+ int err = 0;
+
++ for (i = 0; i < nr_ctxs; i++) {
++ if (!is_power_of_2(ctxs[i]->min_region_sz))
++ return -EINVAL;
++ }
++
+ mutex_lock(&damon_lock);
+ if ((exclusive && nr_running_ctxs) ||
+ (!exclusive && running_exclusive_ctxs)) {
--- /dev/null
+From 8bbde987c2b84f80da0853f739f0a920386f8b99 Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Mon, 6 Apr 2026 17:31:52 -0700
+Subject: mm/damon/core: disallow time-quota setting zero esz
+
+From: SeongJae Park <sj@kernel.org>
+
+commit 8bbde987c2b84f80da0853f739f0a920386f8b99 upstream.
+
+When the throughput of a DAMOS scheme is very slow, DAMOS time quota can
+make the effective size quota smaller than damon_ctx->min_region_sz. In
+the case, damos_apply_scheme() will skip applying the action, because the
+action is tried at region level, which requires >=min_region_sz size.
+That is, the quota is effectively exceeded for the quota charge window.
+
+Because no action will be applied, the total_charged_sz and
+total_charged_ns are also not updated. damos_set_effective_quota() will
+try to update the effective size quota before starting the next charge
+window. However, because the total_charged_sz and total_charged_ns have
+not updated, the throughput and effective size quota are also not changed.
+Since effective size quota can only be decreased, other effective size
+quota update factors including DAMOS quota goals and size quota cannot
+make any change, either.
+
+As a result, the scheme is unexpectedly deactivated until the user notices
+and mitigates the situation. The users can mitigate this situation by
+changing the time quota online or re-install the scheme. While the
+mitigation is somewhat straightforward, finding the situation would be
+challenging, because DAMON is not providing good observabilities for that.
+Even if such observability is provided, doing the additional monitoring
+and the mitigation is somewhat cumbersome and not aligned to the intention
+of the time quota. The time quota was intended to help reduce the user's
+administration overhead.
+
+Fix the problem by setting time quota-modified effective size quota be at
+least min_region_sz always.
+
+The issue was discovered [1] by sashiko.
+
+Link: https://lore.kernel.org/20260407003153.79589-1-sj@kernel.org
+Link: https://lore.kernel.org/20260405192504.110014-1-sj@kernel.org [1]
+Fixes: 1cd243030059 ("mm/damon/schemes: implement time quota")
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org> # 5.16.x
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/core.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/mm/damon/core.c
++++ b/mm/damon/core.c
+@@ -2225,7 +2225,8 @@ static unsigned long damos_quota_score(s
+ /*
+ * Called only if quota->ms, or quota->sz are set, or quota->goals is not empty
+ */
+-static void damos_set_effective_quota(struct damos_quota *quota)
++static void damos_set_effective_quota(struct damos_quota *quota,
++ struct damon_ctx *ctx)
+ {
+ unsigned long throughput;
+ unsigned long esz = ULONG_MAX;
+@@ -2251,6 +2252,7 @@ static void damos_set_effective_quota(st
+ else
+ throughput = PAGE_SIZE * 1024;
+ esz = min(throughput * quota->ms, esz);
++ esz = max(ctx->min_region_sz, esz);
+ }
+
+ if (quota->sz && quota->sz < esz)
+@@ -2287,7 +2289,7 @@ static void damos_adjust_quota(struct da
+ /* First charge window */
+ if (!quota->total_charged_sz && !quota->charged_from) {
+ quota->charged_from = jiffies;
+- damos_set_effective_quota(quota);
++ damos_set_effective_quota(quota, c);
+ }
+
+ /* New charge window starts */
+@@ -2301,7 +2303,7 @@ static void damos_adjust_quota(struct da
+ quota->charged_sz = 0;
+ if (trace_damos_esz_enabled())
+ cached_esz = quota->esz;
+- damos_set_effective_quota(quota);
++ damos_set_effective_quota(quota, c);
+ if (trace_damos_esz_enabled() && quota->esz != cached_esz)
+ damos_trace_esz(c, s, quota);
+ }
--- /dev/null
+From 049a57421dd67a28c45ae7e92c36df758033e5fa Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Sun, 29 Mar 2026 08:23:05 -0700
+Subject: mm/damon/core: use time_in_range_open() for damos quota window start
+
+From: SeongJae Park <sj@kernel.org>
+
+commit 049a57421dd67a28c45ae7e92c36df758033e5fa upstream.
+
+damos_adjust_quota() uses time_after_eq() to show if it is time to start a
+new quota charge window, comparing the current jiffies and the scheduled
+next charge window start time. If it is, the next charge window start
+time is updated and the new charge window starts.
+
+The time check and next window start time update is skipped while the
+scheme is deactivated by the watermarks. Let's suppose the deactivation
+is kept more than LONG_MAX jiffies (assuming CONFIG_HZ of 250, more than
+99 days in 32 bit systems and more than one billion years in 64 bit
+systems), resulting in having the jiffies larger than the next charge
+window start time + LONG_MAX. Then, the time_after_eq() call can return
+false until another LONG_MAX jiffies are passed.
+
+This means the scheme can continue working after being reactivated by the
+watermarks. But, soon, the quota will be exceeded and the scheme will
+again effectively stop working until the next charge window starts.
+Because the current charge window is extended to up to LONG_MAX jiffies,
+however, it will look like it stopped unexpectedly and indefinitely, from
+the user's perspective.
+
+Fix this by using !time_in_range_open() instead.
+
+The issue was discovered [1] by sashiko.
+
+Link: https://lore.kernel.org/20260329152306.45796-1-sj@kernel.org
+Link: https://lore.kernel.org/20260324040722.57944-1-sj@kernel.org [1]
+Fixes: ee801b7dd782 ("mm/damon/schemes: activate schemes based on a watermarks mechanism")
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org> # 5.16.x
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/core.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/mm/damon/core.c
++++ b/mm/damon/core.c
+@@ -2291,7 +2291,8 @@ static void damos_adjust_quota(struct da
+ }
+
+ /* New charge window starts */
+- if (time_after_eq(jiffies, quota->charged_from +
++ if (!time_in_range_open(jiffies, quota->charged_from,
++ quota->charged_from +
+ msecs_to_jiffies(quota->reset_interval))) {
+ if (quota->esz && quota->charged_sz >= quota->esz)
+ s->stat.qt_exceeds++;
--- /dev/null
+From 40250b2dded0604a112be605f3828700d80ad7c2 Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Sat, 28 Mar 2026 21:38:59 -0700
+Subject: mm/damon/core: validate damos_quota_goal->nid for node_mem_{used,free}_bp
+
+From: SeongJae Park <sj@kernel.org>
+
+commit 40250b2dded0604a112be605f3828700d80ad7c2 upstream.
+
+Patch series "mm/damon/core: validate damos_quota_goal->nid".
+
+node_mem[cg]_{used,free}_bp DAMOS quota goals receive the node id. The
+node id is used for si_meminfo_node() and NODE_DATA() without proper
+validation. As a result, privileged users can trigger an out of bounds
+memory access using DAMON_SYSFS. Fix the issues.
+
+The issue was originally reported [1] with a fix by another author. The
+original author announced [2] that they will stop working including the
+fix that was still in the review stage. Hence I'm restarting this.
+
+
+This patch (of 2):
+
+Users can set damos_quota_goal->nid with arbitrary value for
+node_mem_{used,free}_bp. But DAMON core is using those for
+si_meminfo_node() without the validation of the value. This can result in
+out of bounds memory access. The issue can actually triggered using DAMON
+user-space tool (damo), like below.
+
+ $ sudo ./damo start --damos_action stat \
+ --damos_quota_goal node_mem_used_bp 50% -1 \
+ --damos_quota_interval 1s
+ $ sudo dmesg
+ [...]
+ [ 65.565986] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000098
+
+Fix this issue by adding the validation of the given node. If an invalid
+node id is given, it returns 0% for used memory ratio, and 100% for free
+memory ratio.
+
+Link: https://lore.kernel.org/20260329043902.46163-2-sj@kernel.org
+Link: https://lore.kernel.org/20260325073034.140353-1-objecting@objecting.org [1]
+Link: https://lore.kernel.org/20260327040924.68553-1-sj@kernel.org [2]
+Fixes: 0e1c773b501f ("mm/damon/core: introduce damos quota goal metrics for memory node utilization")
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org> # 6.16.x
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/core.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/mm/damon/core.c
++++ b/mm/damon/core.c
+@@ -2078,12 +2078,24 @@ static inline u64 damos_get_some_mem_psi
+ #endif /* CONFIG_PSI */
+
+ #ifdef CONFIG_NUMA
++static bool invalid_mem_node(int nid)
++{
++ return nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY);
++}
++
+ static __kernel_ulong_t damos_get_node_mem_bp(
+ struct damos_quota_goal *goal)
+ {
+ struct sysinfo i;
+ __kernel_ulong_t numerator;
+
++ if (invalid_mem_node(goal->nid)) {
++ if (goal->metric == DAMOS_QUOTA_NODE_MEM_USED_BP)
++ return 0;
++ else /* DAMOS_QUOTA_NODE_MEM_FREE_BP */
++ return 10000;
++ }
++
+ si_meminfo_node(&i, goal->nid);
+ if (goal->metric == DAMOS_QUOTA_NODE_MEM_USED_BP)
+ numerator = i.totalram - i.freeram;
--- /dev/null
+From a34dac6482e53e2c76944f25b1489b9b7da3a6e6 Mon Sep 17 00:00:00 2001
+From: SeongJae Park <sj@kernel.org>
+Date: Sat, 28 Mar 2026 21:39:00 -0700
+Subject: mm/damon/core: validate damos_quota_goal->nid for node_memcg_{used,free}_bp
+
+From: SeongJae Park <sj@kernel.org>
+
+commit a34dac6482e53e2c76944f25b1489b9b7da3a6e6 upstream.
+
+Users can set damos_quota_goal->nid with arbitrary value for
+node_memcg_{used,free}_bp. But DAMON core is using those for NODE-DATA()
+without a validation of the value. This can result in out of bounds
+memory access. The issue can actually triggered using DAMON user-space
+tool (damo), like below.
+
+ $ sudo mkdir /sys/fs/cgroup/foo
+ $ sudo ./damo start --damos_action stat --damos_quota_interval 1s \
+ --damos_quota_goal node_memcg_used_bp 50% -1 /foo
+ $ sudo dmseg
+ [...]
+ [ 524.181426] Unable to handle kernel paging request at virtual address 0000000000002c00
+
+Fix this issue by adding the validation of the given node id. If an
+invalid node id is given, it returns 0% for used memory ratio, and 100%
+for free memory ratio.
+
+Link: https://lore.kernel.org/20260329043902.46163-3-sj@kernel.org
+Fixes: b74a120bcf50 ("mm/damon/core: implement DAMOS_QUOTA_NODE_MEMCG_USED_BP")
+Signed-off-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org> # 6.19.x
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/core.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/mm/damon/core.c
++++ b/mm/damon/core.c
+@@ -2112,6 +2112,13 @@ static unsigned long damos_get_node_memc
+ unsigned long used_pages, numerator;
+ struct sysinfo i;
+
++ if (invalid_mem_node(goal->nid)) {
++ if (goal->metric == DAMOS_QUOTA_NODE_MEMCG_USED_BP)
++ return 0;
++ else /* DAMOS_QUOTA_NODE_MEMCG_FREE_BP */
++ return 10000;
++ }
++
+ memcg = mem_cgroup_get_from_id(goal->memcg_id);
+ if (!memcg) {
+ if (goal->metric == DAMOS_QUOTA_NODE_MEMCG_USED_BP)
--- /dev/null
+From e04ed278d25bf15769800bf6e35c6737f137186f Mon Sep 17 00:00:00 2001
+From: Jackie Liu <liuyun01@kylinos.cn>
+Date: Tue, 31 Mar 2026 18:15:53 +0800
+Subject: mm/damon/stat: fix memory leak on damon_start() failure in damon_stat_start()
+
+From: Jackie Liu <liuyun01@kylinos.cn>
+
+commit e04ed278d25bf15769800bf6e35c6737f137186f upstream.
+
+Destroy the DAMON context and reset the global pointer when damon_start()
+fails. Otherwise, the context allocated by damon_stat_build_ctx() is
+leaked, and the stale damon_stat_context pointer will be overwritten on
+the next enable attempt, making the old allocation permanently
+unreachable.
+
+Link: https://lore.kernel.org/20260331101553.88422-1-liu.yun@linux.dev
+Fixes: 369c415e6073 ("mm/damon: introduce DAMON_STAT module")
+Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
+Reviewed-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org> # 6.17.x
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/damon/stat.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/mm/damon/stat.c
++++ b/mm/damon/stat.c
+@@ -255,8 +255,11 @@ static int damon_stat_start(void)
+ if (!damon_stat_context)
+ return -ENOMEM;
+ err = damon_start(&damon_stat_context, 1, true);
+- if (err)
++ if (err) {
++ damon_destroy_ctx(damon_stat_context);
++ damon_stat_context = NULL;
+ return err;
++ }
+
+ damon_stat_last_refresh_jiffies = jiffies;
+ call_control.data = damon_stat_context;
--- /dev/null
+From 3538f90ab89aaf302782b4b073a0aae66904cd67 Mon Sep 17 00:00:00 2001
+From: Chenghao Duan <duanchenghao@kylinos.cn>
+Date: Thu, 26 Mar 2026 16:47:25 +0800
+Subject: mm/memfd_luo: fix physical address conversion in put_folios cleanup
+
+From: Chenghao Duan <duanchenghao@kylinos.cn>
+
+commit 3538f90ab89aaf302782b4b073a0aae66904cd67 upstream.
+
+In memfd_luo_retrieve_folios()'s put_folios cleanup path:
+
+1. kho_restore_folio() expects a phys_addr_t (physical address) but
+ receives a raw PFN (pfolio->pfn). This causes kho_restore_page() to
+ check the wrong physical address (pfn << PAGE_SHIFT instead of the
+ actual physical address).
+
+2. This loop lacks the !pfolio->pfn check that exists in the main
+ retrieval loop and memfd_luo_discard_folios(), which could
+ incorrectly process sparse file holes where pfn=0.
+
+Fix by converting PFN to physical address with PFN_PHYS() and adding
+the !pfolio->pfn check, matching the pattern used elsewhere in this file.
+
+This issue was identified by the AI review.
+https://sashiko.dev/#/patchset/20260323110747.193569-1-duanchenghao@kylinos.cn
+
+Link: https://lore.kernel.org/20260326084727.118437-6-duanchenghao@kylinos.cn
+Fixes: b3749f174d68 ("mm: memfd_luo: allow preserving memfd")
+Signed-off-by: Chenghao Duan <duanchenghao@kylinos.cn>
+Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
+Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
+Cc: Haoran Jiang <jianghaoran@kylinos.cn>
+Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memfd_luo.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/mm/memfd_luo.c
++++ b/mm/memfd_luo.c
+@@ -466,8 +466,13 @@ put_folios:
+ */
+ for (long j = i + 1; j < nr_folios; j++) {
+ const struct memfd_luo_folio_ser *pfolio = &folios_ser[j];
++ phys_addr_t phys;
+
+- folio = kho_restore_folio(pfolio->pfn);
++ if (!pfolio->pfn)
++ continue;
++
++ phys = PFN_PHYS(pfolio->pfn);
++ folio = kho_restore_folio(phys);
+ if (folio)
+ folio_put(folio);
+ }
--- /dev/null
+From 6fae274ce0e3109cbbc4c18b354eaace1f0af7d7 Mon Sep 17 00:00:00 2001
+From: Jackie Liu <liuyun01@kylinos.cn>
+Date: Wed, 1 Apr 2026 08:57:02 +0800
+Subject: mm/mempolicy: fix memory leaks in weighted_interleave_auto_store()
+
+From: Jackie Liu <liuyun01@kylinos.cn>
+
+commit 6fae274ce0e3109cbbc4c18b354eaace1f0af7d7 upstream.
+
+weighted_interleave_auto_store() fetches old_wi_state inside the if
+(!input) block only. This causes two memory leaks:
+
+1. When a user writes "false" and the current mode is already manual,
+ the function returns early without freeing the freshly allocated
+ new_wi_state.
+
+2. When a user writes "true", old_wi_state stays NULL because the
+ fetch is skipped entirely. The old state is then overwritten by
+ rcu_assign_pointer() but never freed, since the cleanup path is
+ gated on old_wi_state being non-NULL. A user can trigger this
+ repeatedly by writing "1" in a loop.
+
+Fix both leaks by moving the old_wi_state fetch before the input check,
+making it unconditional. This also allows a unified early return for both
+"true" and "false" when the requested mode matches the current mode.
+
+Link: https://lore.kernel.org/20260401005702.7096-1-liu.yun@linux.dev
+Link: https://sashiko.dev/#/patchset/20260331100740.84906-1-liu.yun@linux.dev
+Fixes: e341f9c3c841 ("mm/mempolicy: Weighted Interleave Auto-tuning")
+Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
+Reviewed-by: Joshua Hahn <joshua.hahnjy@gmail.com>
+Reviewed by: Donet Tom <donettom@linux.ibm.com>
+Cc: Gregory Price <gourry@gourry.net>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Byungchul Park <byungchul@sk.com>
+Cc: David Hildenbrand <david@kernel.org>
+Cc: <stable@vger.kernel.org> # v6.16+
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/mempolicy.c | 23 ++++++++++++-----------
+ 1 file changed, 12 insertions(+), 11 deletions(-)
+
+--- a/mm/mempolicy.c
++++ b/mm/mempolicy.c
+@@ -3706,18 +3706,19 @@ static ssize_t weighted_interleave_auto_
+ new_wi_state->iw_table[i] = 1;
+
+ mutex_lock(&wi_state_lock);
+- if (!input) {
+- old_wi_state = rcu_dereference_protected(wi_state,
+- lockdep_is_held(&wi_state_lock));
+- if (!old_wi_state)
+- goto update_wi_state;
+- if (input == old_wi_state->mode_auto) {
+- mutex_unlock(&wi_state_lock);
+- return count;
+- }
++ old_wi_state = rcu_dereference_protected(wi_state,
++ lockdep_is_held(&wi_state_lock));
++
++ if (old_wi_state && input == old_wi_state->mode_auto) {
++ mutex_unlock(&wi_state_lock);
++ kfree(new_wi_state);
++ return count;
++ }
+
+- memcpy(new_wi_state->iw_table, old_wi_state->iw_table,
+- nr_node_ids * sizeof(u8));
++ if (!input) {
++ if (old_wi_state)
++ memcpy(new_wi_state->iw_table, old_wi_state->iw_table,
++ nr_node_ids * sizeof(u8));
+ goto update_wi_state;
+ }
+
--- /dev/null
+From ec05f51f1e65bce95528543eb73fda56fd201d94 Mon Sep 17 00:00:00 2001
+From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
+Date: Mon, 13 Apr 2026 21:26:46 +0200
+Subject: mm/vmalloc: take vmap_purge_lock in shrinker
+
+From: Uladzislau Rezki (Sony) <urezki@gmail.com>
+
+commit ec05f51f1e65bce95528543eb73fda56fd201d94 upstream.
+
+decay_va_pool_node() can be invoked concurrently from two paths:
+__purge_vmap_area_lazy() when pools are being purged, and the shrinker via
+vmap_node_shrink_scan().
+
+However, decay_va_pool_node() is not safe to run concurrently, and the
+shrinker path currently lacks serialization, leading to races and possible
+leaks.
+
+Protect decay_va_pool_node() by taking vmap_purge_lock in the shrinker
+path to ensure serialization with purge users.
+
+Link: https://lore.kernel.org/20260413192646.14683-1-urezki@gmail.com
+Fixes: 7679ba6b36db ("mm: vmalloc: add a shrinker to drain vmap pools")
+Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
+Reviewed-by: Baoquan He <baoquan.he@linux.dev>
+Cc: chenyichong <chenyichong@uniontech.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/vmalloc.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -5416,6 +5416,7 @@ vmap_node_shrink_scan(struct shrinker *s
+ {
+ struct vmap_node *vn;
+
++ guard(mutex)(&vmap_purge_lock);
+ for_each_vmap_node(vn)
+ decay_va_pool_node(vn, true);
+
--- /dev/null
+From c7c6d4f5103864f73ee3a78bfd6da241f84197dd Mon Sep 17 00:00:00 2001
+From: Bin Liu <b-liu@ti.com>
+Date: Wed, 25 Mar 2026 08:49:47 -0500
+Subject: mmc: block: use single block write in retry
+
+From: Bin Liu <b-liu@ti.com>
+
+commit c7c6d4f5103864f73ee3a78bfd6da241f84197dd upstream.
+
+Due to errata i2493[0], multi-block write would still fail in retries.
+
+With i2493, the MMC interface has the potential of write failures when
+issuing multi-block writes operating in HS200 mode with excessive IO
+supply noise.
+
+While the errata provides guidance in hardware design and layout to
+minimize the IO supply noise, in theory the write failure cannot be
+resolved in hardware. The software solution to ensure the data integrity
+is to add minimum 5us delay between block writes. Single-block write is
+the practical way to introduce the delay.
+
+This patch reuses recovery_mode flag, and switches to single-block
+write in retry when multi-block write fails. It covers both CQE and
+non-CQE cases.
+
+[0] https://www.ti.com/lit/pdf/sprz582
+Cc: stable@vger.kernel.org
+Suggested-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Bin Liu <b-liu@ti.com>
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/core/block.c | 12 ++++++++++--
+ drivers/mmc/core/queue.h | 3 +++
+ 2 files changed, 13 insertions(+), 2 deletions(-)
+
+--- a/drivers/mmc/core/block.c
++++ b/drivers/mmc/core/block.c
+@@ -1401,6 +1401,9 @@ static void mmc_blk_data_prep(struct mmc
+ rq_data_dir(req) == WRITE &&
+ (md->flags & MMC_BLK_REL_WR);
+
++ if (mqrq->flags & MQRQ_XFER_SINGLE_BLOCK)
++ recovery_mode = 1;
++
+ memset(brq, 0, sizeof(struct mmc_blk_request));
+
+ mmc_crypto_prepare_req(mqrq);
+@@ -1540,10 +1543,13 @@ static void mmc_blk_cqe_complete_rq(stru
+ err = 0;
+
+ if (err) {
+- if (mqrq->retries++ < MMC_CQE_RETRIES)
++ if (mqrq->retries++ < MMC_CQE_RETRIES) {
++ if (rq_data_dir(req) == WRITE)
++ mqrq->flags |= MQRQ_XFER_SINGLE_BLOCK;
+ blk_mq_requeue_request(req, true);
+- else
++ } else {
+ blk_mq_end_request(req, BLK_STS_IOERR);
++ }
+ } else if (mrq->data) {
+ if (blk_update_request(req, BLK_STS_OK, mrq->data->bytes_xfered))
+ blk_mq_requeue_request(req, true);
+@@ -2085,6 +2091,8 @@ static void mmc_blk_mq_complete_rq(struc
+ } else if (!blk_rq_bytes(req)) {
+ __blk_mq_end_request(req, BLK_STS_IOERR);
+ } else if (mqrq->retries++ < MMC_MAX_RETRIES) {
++ if (rq_data_dir(req) == WRITE)
++ mqrq->flags |= MQRQ_XFER_SINGLE_BLOCK;
+ blk_mq_requeue_request(req, true);
+ } else {
+ if (mmc_card_removed(mq->card))
+--- a/drivers/mmc/core/queue.h
++++ b/drivers/mmc/core/queue.h
+@@ -61,6 +61,8 @@ enum mmc_drv_op {
+ MMC_DRV_OP_GET_EXT_CSD,
+ };
+
++#define MQRQ_XFER_SINGLE_BLOCK BIT(0)
++
+ struct mmc_queue_req {
+ struct mmc_blk_request brq;
+ struct scatterlist *sg;
+@@ -69,6 +71,7 @@ struct mmc_queue_req {
+ void *drv_op_data;
+ unsigned int ioc_count;
+ int retries;
++ u32 flags;
+ };
+
+ struct mmc_queue {
--- /dev/null
+From 6546a49bbe656981d99a389195560999058c89c4 Mon Sep 17 00:00:00 2001
+From: Shawn Lin <shawn.lin@rock-chips.com>
+Date: Wed, 8 Apr 2026 15:18:49 +0800
+Subject: mmc: sdhci-of-dwcmshc: Disable clock before DLL configuration
+
+From: Shawn Lin <shawn.lin@rock-chips.com>
+
+commit 6546a49bbe656981d99a389195560999058c89c4 upstream.
+
+According to the ASIC design recommendations, the clock must be
+disabled before operating the DLL to prevent glitches that could
+affect the internal digital logic. In extreme cases, failing to
+do so may cause the controller to malfunction completely.
+
+Adds a step to disable the clock before DLL configuration and
+re-enables it at the end.
+
+Fixes: 08f3dff799d4 ("mmc: sdhci-of-dwcmshc: add rockchip platform support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
+Acked-by: Adrian Hunter <adrian.hunter@intel.com>
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/host/sdhci-of-dwcmshc.c | 19 ++++++++++++++++---
+ 1 file changed, 16 insertions(+), 3 deletions(-)
+
+--- a/drivers/mmc/host/sdhci-of-dwcmshc.c
++++ b/drivers/mmc/host/sdhci-of-dwcmshc.c
+@@ -738,12 +738,15 @@ static void dwcmshc_rk3568_set_clock(str
+ extra |= BIT(4);
+ sdhci_writel(host, extra, reg);
+
++ /* Disable clock while config DLL */
++ sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
++
+ if (clock <= 52000000) {
+ if (host->mmc->ios.timing == MMC_TIMING_MMC_HS200 ||
+ host->mmc->ios.timing == MMC_TIMING_MMC_HS400) {
+ dev_err(mmc_dev(host->mmc),
+ "Can't reduce the clock below 52MHz in HS200/HS400 mode");
+- return;
++ goto enable_clk;
+ }
+
+ /*
+@@ -763,7 +766,7 @@ static void dwcmshc_rk3568_set_clock(str
+ DLL_STRBIN_DELAY_NUM_SEL |
+ DLL_STRBIN_DELAY_NUM_DEFAULT << DLL_STRBIN_DELAY_NUM_OFFSET;
+ sdhci_writel(host, extra, DWCMSHC_EMMC_DLL_STRBIN);
+- return;
++ goto enable_clk;
+ }
+
+ /* Reset DLL */
+@@ -790,7 +793,7 @@ static void dwcmshc_rk3568_set_clock(str
+ 500 * USEC_PER_MSEC);
+ if (err) {
+ dev_err(mmc_dev(host->mmc), "DLL lock timeout!\n");
+- return;
++ goto enable_clk;
+ }
+
+ extra = 0x1 << 16 | /* tune clock stop en */
+@@ -823,6 +826,16 @@ static void dwcmshc_rk3568_set_clock(str
+ DLL_STRBIN_TAPNUM_DEFAULT |
+ DLL_STRBIN_TAPNUM_FROM_SW;
+ sdhci_writel(host, extra, DWCMSHC_EMMC_DLL_STRBIN);
++
++enable_clk:
++ /*
++ * The sdclk frequency select bits in SDHCI_CLOCK_CONTROL are not functional
++ * on Rockchip's SDHCI implementation. Instead, the clock frequency is fully
++ * controlled via external clk provider by calling clk_set_rate(). Consequently,
++ * passing 0 to sdhci_enable_clk() only re-enables the already-configured clock,
++ * which matches the hardware's actual behavior.
++ */
++ sdhci_enable_clk(host, 0);
+ }
+
+ static void rk35xx_sdhci_reset(struct sdhci_host *host, u8 mask)
--- /dev/null
+From 3962c24f2d14e8a7f8a23f56b7ce320523947342 Mon Sep 17 00:00:00 2001
+From: "Viorel Suman (OSS)" <viorel.suman@oss.nxp.com>
+Date: Wed, 11 Mar 2026 14:33:09 +0200
+Subject: pwm: imx-tpm: Count the number of enabled channels in probe
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Viorel Suman (OSS) <viorel.suman@oss.nxp.com>
+
+commit 3962c24f2d14e8a7f8a23f56b7ce320523947342 upstream.
+
+On a soft reset TPM PWM IP may preserve its internal state from previous
+runtime, therefore on a subsequent OS boot and driver probe
+"enable_count" value and TPM PWM IP internal channels "enabled" states
+may get unaligned. In consequence on a suspend/resume cycle the call "if
+(--tpm->enable_count == 0)" may lead to "enable_count" overflow the
+system being blocked from entering suspend due to:
+
+ if (tpm->enable_count > 0)
+ return -EBUSY;
+
+Fix the problem by counting the enabled channels in probe function.
+
+Signed-off-by: Viorel Suman (OSS) <viorel.suman@oss.nxp.com>
+Fixes: 738a1cfec2ed ("pwm: Add i.MX TPM PWM driver support")
+Link: https://patch.msgid.link/20260311123309.348904-1-viorel.suman@oss.nxp.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Uwe Kleine-König <ukleinek@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/pwm/pwm-imx-tpm.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/drivers/pwm/pwm-imx-tpm.c
++++ b/drivers/pwm/pwm-imx-tpm.c
+@@ -352,7 +352,7 @@ static int pwm_imx_tpm_probe(struct plat
+ struct clk *clk;
+ void __iomem *base;
+ int ret;
+- unsigned int npwm;
++ unsigned int i, npwm;
+ u32 val;
+
+ base = devm_platform_ioremap_resource(pdev, 0);
+@@ -382,6 +382,13 @@ static int pwm_imx_tpm_probe(struct plat
+
+ mutex_init(&tpm->lock);
+
++ /* count the enabled channels */
++ for (i = 0; i < npwm; ++i) {
++ val = readl(base + PWM_IMX_TPM_CnSC(i));
++ if (FIELD_GET(PWM_IMX_TPM_CnSC_ELS, val))
++ ++tpm->enable_count;
++ }
++
+ ret = devm_pwmchip_add(&pdev->dev, chip);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n");
--- /dev/null
+From 37beb42560165869838e7d91724f3e629db64129 Mon Sep 17 00:00:00 2001
+From: Ryan Roberts <ryan.roberts@arm.com>
+Date: Tue, 3 Mar 2026 15:08:38 +0000
+Subject: randomize_kstack: Maintain kstack_offset per task
+
+From: Ryan Roberts <ryan.roberts@arm.com>
+
+commit 37beb42560165869838e7d91724f3e629db64129 upstream.
+
+kstack_offset was previously maintained per-cpu, but this caused a
+couple of issues. So let's instead make it per-task.
+
+Issue 1: add_random_kstack_offset() and choose_random_kstack_offset()
+expected and required to be called with interrupts and preemption
+disabled so that it could manipulate per-cpu state. But arm64, loongarch
+and risc-v are calling them with interrupts and preemption enabled. I
+don't _think_ this causes any functional issues, but it's certainly
+unexpected and could lead to manipulating the wrong cpu's state, which
+could cause a minor performance degradation due to bouncing the cache
+lines. By maintaining the state per-task those functions can safely be
+called in preemptible context.
+
+Issue 2: add_random_kstack_offset() is called before executing the
+syscall and expands the stack using a previously chosen random offset.
+choose_random_kstack_offset() is called after executing the syscall and
+chooses and stores a new random offset for the next syscall. With
+per-cpu storage for this offset, an attacker could force cpu migration
+during the execution of the syscall and prevent the offset from being
+updated for the original cpu such that it is predictable for the next
+syscall on that cpu. By maintaining the state per-task, this problem
+goes away because the per-task random offset is updated after the
+syscall regardless of which cpu it is executing on.
+
+Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall")
+Closes: https://lore.kernel.org/all/dd8c37bc-795f-4c7a-9086-69e584d8ab24@arm.com/
+Cc: stable@vger.kernel.org
+Acked-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
+Link: https://patch.msgid.link/20260303150840.3789438-2-ryan.roberts@arm.com
+Signed-off-by: Kees Cook <kees@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/randomize_kstack.h | 26 +++++++++++++++-----------
+ include/linux/sched.h | 4 ++++
+ init/main.c | 1 -
+ kernel/fork.c | 2 ++
+ 4 files changed, 21 insertions(+), 12 deletions(-)
+
+--- a/include/linux/randomize_kstack.h
++++ b/include/linux/randomize_kstack.h
+@@ -9,7 +9,6 @@
+
+ DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
+ randomize_kstack_offset);
+-DECLARE_PER_CPU(u32, kstack_offset);
+
+ /*
+ * Do not use this anywhere else in the kernel. This is used here because
+@@ -50,15 +49,14 @@ DECLARE_PER_CPU(u32, kstack_offset);
+ * add_random_kstack_offset - Increase stack utilization by previously
+ * chosen random offset
+ *
+- * This should be used in the syscall entry path when interrupts and
+- * preempt are disabled, and after user registers have been stored to
+- * the stack. For testing the resulting entropy, please see:
+- * tools/testing/selftests/lkdtm/stack-entropy.sh
++ * This should be used in the syscall entry path after user registers have been
++ * stored to the stack. Preemption may be enabled. For testing the resulting
++ * entropy, please see: tools/testing/selftests/lkdtm/stack-entropy.sh
+ */
+ #define add_random_kstack_offset() do { \
+ if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
+ &randomize_kstack_offset)) { \
+- u32 offset = raw_cpu_read(kstack_offset); \
++ u32 offset = current->kstack_offset; \
+ u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset)); \
+ /* Keep allocation even after "ptr" loses scope. */ \
+ asm volatile("" :: "r"(ptr) : "memory"); \
+@@ -69,9 +67,9 @@ DECLARE_PER_CPU(u32, kstack_offset);
+ * choose_random_kstack_offset - Choose the random offset for the next
+ * add_random_kstack_offset()
+ *
+- * This should only be used during syscall exit when interrupts and
+- * preempt are disabled. This position in the syscall flow is done to
+- * frustrate attacks from userspace attempting to learn the next offset:
++ * This should only be used during syscall exit. Preemption may be enabled. This
++ * position in the syscall flow is done to frustrate attacks from userspace
++ * attempting to learn the next offset:
+ * - Maximize the timing uncertainty visible from userspace: if the
+ * offset is chosen at syscall entry, userspace has much more control
+ * over the timing between choosing offsets. "How long will we be in
+@@ -85,14 +83,20 @@ DECLARE_PER_CPU(u32, kstack_offset);
+ #define choose_random_kstack_offset(rand) do { \
+ if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
+ &randomize_kstack_offset)) { \
+- u32 offset = raw_cpu_read(kstack_offset); \
++ u32 offset = current->kstack_offset; \
+ offset = ror32(offset, 5) ^ (rand); \
+- raw_cpu_write(kstack_offset, offset); \
++ current->kstack_offset = offset; \
+ } \
+ } while (0)
++
++static inline void random_kstack_task_init(struct task_struct *tsk)
++{
++ tsk->kstack_offset = 0;
++}
+ #else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
+ #define add_random_kstack_offset() do { } while (0)
+ #define choose_random_kstack_offset(rand) do { } while (0)
++#define random_kstack_task_init(tsk) do { } while (0)
+ #endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
+
+ #endif
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1592,6 +1592,10 @@ struct task_struct {
+ unsigned long prev_lowest_stack;
+ #endif
+
++#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
++ u32 kstack_offset;
++#endif
++
+ #ifdef CONFIG_X86_MCE
+ void __user *mce_vaddr;
+ __u64 mce_kflags;
+--- a/init/main.c
++++ b/init/main.c
+@@ -833,7 +833,6 @@ static inline void initcall_debug_enable
+ #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
+ DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
+ randomize_kstack_offset);
+-DEFINE_PER_CPU(u32, kstack_offset);
+
+ static int __init early_randomize_kstack_offset(char *buf)
+ {
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -95,6 +95,7 @@
+ #include <linux/thread_info.h>
+ #include <linux/kstack_erase.h>
+ #include <linux/kasan.h>
++#include <linux/randomize_kstack.h>
+ #include <linux/scs.h>
+ #include <linux/io_uring.h>
+ #include <linux/io_uring_types.h>
+@@ -2233,6 +2234,7 @@ __latent_entropy struct task_struct *cop
+ if (retval)
+ goto bad_fork_cleanup_io;
+
++ random_kstack_task_init(p);
+ stackleak_task_init(p);
+
+ if (pid != &init_struct_pid) {
--- /dev/null
+From 30c4d2f26bb3538c328035cea2e6265c8320539e Mon Sep 17 00:00:00 2001
+From: Johan Hovold <johan@kernel.org>
+Date: Tue, 7 Apr 2026 14:27:17 +0200
+Subject: rtc: ntxec: fix OF node reference imbalance
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Johan Hovold <johan@kernel.org>
+
+commit 30c4d2f26bb3538c328035cea2e6265c8320539e upstream.
+
+The driver reuses the OF node of the parent multi-function device but
+fails to take another reference to balance the one dropped by the
+platform bus code when unbinding the MFD and deregistering the child
+devices.
+
+Fix this by using the intended helper for reusing OF nodes.
+
+Fixes: 435af89786c6 ("rtc: New driver for RTC in Netronix embedded controller")
+Cc: stable@vger.kernel.org # 5.13
+Cc: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
+Signed-off-by: Johan Hovold <johan@kernel.org>
+Link: https://patch.msgid.link/20260407122717.2676774-1-johan@kernel.org
+Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/rtc/rtc-ntxec.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/rtc/rtc-ntxec.c
++++ b/drivers/rtc/rtc-ntxec.c
+@@ -110,7 +110,7 @@ static int ntxec_rtc_probe(struct platfo
+ struct rtc_device *dev;
+ struct ntxec_rtc *rtc;
+
+- pdev->dev.of_node = pdev->dev.parent->of_node;
++ device_set_of_node_from_dev(&pdev->dev, pdev->dev.parent);
+
+ rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
+ if (!rtc)
fs-prepare-for-adding-lsm-blob-to-backing_file.patch
lsm-add-backing_file-lsm-hooks.patch
selinux-fix-overlayfs-mmap-and-mprotect-access-checks.patch
+hwmon-pt5161l-fix-bugs-in-pt5161l_read_block_data.patch
+randomize_kstack-maintain-kstack_offset-per-task.patch
+mmc-block-use-single-block-write-in-retry.patch
+mmc-sdhci-of-dwcmshc-disable-clock-before-dll-configuration.patch
+arm64-dts-ti-am62-verdin-enable-pullup-for-emmc-data-pins.patch
+crypto-qat-fix-irq-cleanup-on-6xxx-probe-failure.patch
+xfs-start-gc-on-zonegc_low_space-attribute-updates.patch
+xfs-fix-a-resource-leak-in-xfs_alloc_buftarg.patch
+firmware-google-framebuffer-do-not-unregister-platform-device.patch
+firmware-exynos-acpm-drop-fake-const-on-handle-pointer.patch
+crypto-talitos-fix-sec1-32k-ahash-request-limitation.patch
+crypto-talitos-rename-first-last-to-first_desc-last_desc.patch
+pwm-imx-tpm-count-the-number-of-enabled-channels-in-probe.patch
+tpm2-sessions-fix-missing-tpm_buf_destroy-in-tpm2_read_public.patch
+tpm-fix-auth-session-leak-in-tpm2_get_random-error-path.patch
+tpm-use-kfree_sensitive-to-free-auth-session-in-tpm_dev_release.patch
+tpm-tpm_tis-add-error-logging-for-data-transfer.patch
+tpm-tpm_tis-stop-transmit-if-retries-are-exhausted.patch
+rtc-ntxec-fix-of-node-reference-imbalance.patch
+mm-vmalloc-take-vmap_purge_lock-in-shrinker.patch
+mm-memfd_luo-fix-physical-address-conversion-in-put_folios-cleanup.patch
+mm-mempolicy-fix-memory-leaks-in-weighted_interleave_auto_store.patch
+mm-damon-stat-fix-memory-leak-on-damon_start-failure-in-damon_stat_start.patch
+mm-damon-core-validate-damos_quota_goal-nid-for-node_mem_-used-free-_bp.patch
+mm-damon-core-validate-damos_quota_goal-nid-for-node_memcg_-used-free-_bp.patch
+mm-damon-core-use-time_in_range_open-for-damos-quota-window-start.patch
+mm-damon-core-disallow-time-quota-setting-zero-esz.patch
+mm-damon-core-disallow-non-power-of-two-min_region_sz-on-damon_start.patch
+userfaultfd-allow-registration-of-ranges-below-mmap_min_addr.patch
+loongarch-kvm-use-csr_crmd_plv-in-kvm_arch_vcpu_in_kernel.patch
+kvm-x86-defer-non-architectural-deliver-of-exception-payload-to-userspace-read.patch
+kvm-nsvm-mark-all-of-vmcb02-dirty-when-restoring-nested-state.patch
+kvm-nsvm-sync-nextrip-to-cached-vmcb12-after-vmrun-of-l2.patch
+kvm-nsvm-sync-interrupt-shadow-to-cached-vmcb12-after-vmrun-of-l2.patch
+kvm-svm-inject-ud-for-invlpga-if-efer.svme-0.patch
+kvm-svm-explicitly-mark-vmcb01-dirty-after-modifying-vmcb-intercepts.patch
+kvm-nsvm-ensure-avic-is-inhibited-when-restoring-a-vcpu-to-guest-mode.patch
+kvm-nsvm-always-use-nextrip-as-vmcb02-s-nextrip-after-first-l2-vmrun.patch
+kvm-nsvm-delay-stuffing-l2-s-current-rip-into-nextrip-until-vcpu-run.patch
+kvm-nsvm-use-vcpu-arch.cr2-when-updating-vmcb12-on-nested-vmexit.patch
+kvm-arm64-account-for-resx-bits-in-__compute_fgt.patch
+kvm-nsvm-avoid-clearing-vmcb_lbr-in-vmcb12.patch
+kvm-nsvm-delay-setting-soft-irq-rip-tracking-fields-until-vcpu-run.patch
+kvm-svm-switch-svm_copy_lbrs-to-a-macro.patch
+kvm-svm-add-missing-save-restore-handling-of-lbr-msrs.patch
+kvm-nsvm-always-inject-a-gp-if-mapping-vmcb12-fails-on-nested-vmrun.patch
+kvm-nsvm-refactor-checking-lbrv-enablement-in-vmcb12-into-a-helper.patch
+kvm-nsvm-refactor-writing-vmcb12-on-nested-vmexit-as-a-helper.patch
+kvm-nsvm-triple-fault-if-restore-host-cr3-fails-on-nested-vmexit.patch
+kvm-nsvm-triple-fault-if-mapping-vmcb12-fails-on-nested-vmexit.patch
+kvm-nsvm-clear-gif-on-nested-vmexit-invalid.patch
+kvm-nsvm-clear-eventinj-fields-in-vmcb12-on-nested-vmexit.patch
+kvm-nsvm-clear-tracking-of-l1-l2-nmi-and-soft-irq-on-nested-vmexit.patch
+kvm-nsvm-add-missing-consistency-check-for-efer-cr0-cr4-and-cs.patch
+kvm-nsvm-drop-the-non-architectural-consistency-check-for-np_enable.patch
+kvm-nsvm-add-missing-consistency-check-for-ncr3-validity.patch
+kvm-nsvm-raise-ud-if-unhandled-vmmcall-isn-t-intercepted-by-l1.patch
+kvm-nsvm-always-intercept-vmmcall-when-l2-is-active.patch
--- /dev/null
+From 666c1a2ca603d8314231200bf8bbb3a81bd64c6b Mon Sep 17 00:00:00 2001
+From: Gunnar Kudrjavets <gunnarku@amazon.com>
+Date: Wed, 8 Apr 2026 12:00:27 +0300
+Subject: tpm: Fix auth session leak in tpm2_get_random() error path
+
+From: Gunnar Kudrjavets <gunnarku@amazon.com>
+
+commit 666c1a2ca603d8314231200bf8bbb3a81bd64c6b upstream.
+
+When tpm_buf_fill_hmac_session() fails inside the do-while loop in
+tpm2_get_random(), the function returns directly after destroying the
+buffer, without ending the auth session via tpm2_end_auth_session().
+
+This leaks the TPM auth session resource. All other error paths within
+the loop correctly reach the 'out' label which calls both
+tpm_buf_destroy() and tpm2_end_auth_session().
+
+Fix this by replacing the early return with a goto to the existing 'out'
+label, which already handles both cleanup operations. The redundant
+tpm_buf_destroy() call is removed since 'out' takes care of it.
+
+Cc: stable@vger.kernel.org # v6.19+
+Fixes: 6e9722e9a7bf ("tpm2-sessions: Fix out of range indexing in name_size")
+Signed-off-by: Gunnar Kudrjavets <gunnarku@amazon.com>
+Reviewed-by: Justinien Bouron <jbouron@amazon.com>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/tpm2-cmd.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/drivers/char/tpm/tpm2-cmd.c
++++ b/drivers/char/tpm/tpm2-cmd.c
+@@ -295,10 +295,8 @@ int tpm2_get_random(struct tpm_chip *chi
+ }
+ tpm_buf_append_u16(&buf, num_bytes);
+ err = tpm_buf_fill_hmac_session(chip, &buf);
+- if (err) {
+- tpm_buf_destroy(&buf);
+- return err;
+- }
++ if (err)
++ goto out;
+
+ err = tpm_transmit_cmd(chip, &buf,
+ offsetof(struct tpm2_get_random_out,
--- /dev/null
+From 0471921e2d1043dcc6de5cffb49dd37709521abe Mon Sep 17 00:00:00 2001
+From: Jacqueline Wong <jacqwong@google.com>
+Date: Wed, 15 Apr 2026 16:00:05 +0000
+Subject: tpm: tpm_tis: add error logging for data transfer
+
+From: Jacqueline Wong <jacqwong@google.com>
+
+commit 0471921e2d1043dcc6de5cffb49dd37709521abe upstream.
+
+Add logging to more easily determine reason for transmit failure
+
+Cc: stable@vger.kernel.org # v6.6+
+Fixes: 280db21e153d8 ("tpm_tis: Resend command to recover from data transfer errors")
+Signed-off-by: Jacqueline Wong <jacqwong@google.com>
+Signed-off-by: Jordan Hand <jhand@google.com>
+Link: https://lore.kernel.org/r/20260415160006.2275325-2-jacqwong@google.com
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/tpm_tis_core.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/char/tpm/tpm_tis_core.c
++++ b/drivers/char/tpm/tpm_tis_core.c
+@@ -471,6 +471,8 @@ static int tpm_tis_send_data(struct tpm_
+ status = tpm_tis_status(chip);
+ if (!itpm && (status & TPM_STS_DATA_EXPECT) == 0) {
+ rc = -EIO;
++ dev_err(&chip->dev, "TPM_STS_DATA_EXPECT should be set. sts = 0x%08x\n",
++ status);
+ goto out_err;
+ }
+ }
+@@ -491,6 +493,8 @@ static int tpm_tis_send_data(struct tpm_
+ status = tpm_tis_status(chip);
+ if (!itpm && (status & TPM_STS_DATA_EXPECT) != 0) {
+ rc = -EIO;
++ dev_err(&chip->dev, "TPM_STS_DATA_EXPECT should be unset. sts = 0x%08x\n",
++ status);
+ goto out_err;
+ }
+
--- /dev/null
+From 949692da7211572fac419b2986b6abc0cd1aeb76 Mon Sep 17 00:00:00 2001
+From: Jacqueline Wong <jacqwong@google.com>
+Date: Wed, 15 Apr 2026 16:00:06 +0000
+Subject: tpm: tpm_tis: stop transmit if retries are exhausted
+
+From: Jacqueline Wong <jacqwong@google.com>
+
+commit 949692da7211572fac419b2986b6abc0cd1aeb76 upstream.
+
+tpm_tis_send_main() will attempt to retry sending data TPM_RETRY times.
+Currently, if those retries are exhausted, the driver will attempt to
+call execute. The TPM will be in the wrong state, leading to the
+operation simply timing out.
+
+Instead, if there is still an error after retries are exhausted, return
+that error immediately.
+
+Cc: stable@vger.kernel.org # v6.6+
+Fixes: 280db21e153d8 ("tpm_tis: Resend command to recover from data transfer errors")
+Signed-off-by: Jacqueline Wong <jacqwong@google.com>
+Signed-off-by: Jordan Hand <jhand@google.com>
+Link: https://lore.kernel.org/r/20260415160006.2275325-3-jacqwong@google.com
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/tpm_tis_core.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/char/tpm/tpm_tis_core.c
++++ b/drivers/char/tpm/tpm_tis_core.c
+@@ -556,11 +556,16 @@ static int tpm_tis_send_main(struct tpm_
+ break;
+ else if (rc != -EAGAIN && rc != -EIO)
+ /* Data transfer failed, not recoverable */
+- return rc;
++ goto out_err;
+
+ usleep_range(priv->timeout_min, priv->timeout_max);
+ }
+
++ if (rc == -EAGAIN || rc == -EIO) {
++ dev_err(&chip->dev, "Exhausted %d tpm_tis_send_data retries\n", TPM_RETRY);
++ goto out_err;
++ }
++
+ /* go and do it */
+ rc = tpm_tis_write8(priv, TPM_STS(priv->locality), TPM_STS_GO);
+ if (rc < 0)
--- /dev/null
+From c424d2664f08c77f08b4580b5f0cbaabf7c229b2 Mon Sep 17 00:00:00 2001
+From: Gunnar Kudrjavets <gunnarku@amazon.com>
+Date: Thu, 9 Apr 2026 17:20:54 +0000
+Subject: tpm: Use kfree_sensitive() to free auth session in tpm_dev_release()
+
+From: Gunnar Kudrjavets <gunnarku@amazon.com>
+
+commit c424d2664f08c77f08b4580b5f0cbaabf7c229b2 upstream.
+
+tpm_dev_release() uses plain kfree() to free chip->auth, which contains
+sensitive cryptographic material including HMAC session keys, nonces,
+and passphrase data (struct tpm2_auth).
+
+Every other code path that frees this structure uses kfree_sensitive()
+to zero the memory before releasing it: both tpm2_end_auth_session()
+and tpm_buf_check_hmac_response() do so. The tpm_dev_release() path
+is the only one that does not, leaving key material in freed slab
+memory until it is eventually overwritten.
+
+Use kfree_sensitive() for consistency with the rest of the driver and
+to ensure session keys are scrubbed during device teardown.
+
+Cc: stable@vger.kernel.org # v6.10+
+Fixes: 699e3efd6c64 ("tpm: Add HMAC session start and end functions")
+Signed-off-by: Gunnar Kudrjavets <gunnarku@amazon.com>
+Reviewed-by: Justinien Bouron <jbouron@amazon.com>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/tpm-chip.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/char/tpm/tpm-chip.c
++++ b/drivers/char/tpm/tpm-chip.c
+@@ -247,7 +247,7 @@ static void tpm_dev_release(struct devic
+ kfree(chip->work_space.context_buf);
+ kfree(chip->work_space.session_buf);
+ #ifdef CONFIG_TCG_TPM2_HMAC
+- kfree(chip->auth);
++ kfree_sensitive(chip->auth);
+ #endif
+ kfree(chip);
+ }
--- /dev/null
+From f0f75a3d98b7959a8677b6363e23190f3018636b Mon Sep 17 00:00:00 2001
+From: Gunnar Kudrjavets <gunnarku@amazon.com>
+Date: Wed, 15 Apr 2026 03:00:03 +0300
+Subject: tpm2-sessions: Fix missing tpm_buf_destroy() in tpm2_read_public()
+
+From: Gunnar Kudrjavets <gunnarku@amazon.com>
+
+commit f0f75a3d98b7959a8677b6363e23190f3018636b upstream.
+
+tpm2_read_public() calls tpm_buf_init() but fails to call
+tpm_buf_destroy() on two exit paths, leaking a page allocation:
+
+1. When name_size() returns an error (unrecognized hash algorithm),
+ the function returns directly without destroying the buffer.
+
+2. On the success path, the buffer is never destroyed before
+ returning.
+
+All other error paths in the function correctly call
+tpm_buf_destroy() before returning.
+
+Fix both by adding the missing tpm_buf_destroy() calls.
+
+Cc: stable@vger.kernel.org # v6.19+
+Fixes: bda1cbf73c6e ("tpm2-sessions: Fix tpm2_read_public range checks")
+Signed-off-by: Gunnar Kudrjavets <gunnarku@amazon.com>
+Reviewed-by: Justinien Bouron <jbouron@amazon.com>
+Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/char/tpm/tpm2-sessions.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/char/tpm/tpm2-sessions.c
++++ b/drivers/char/tpm/tpm2-sessions.c
+@@ -203,8 +203,10 @@ static int tpm2_read_public(struct tpm_c
+ rc = tpm_buf_read_u16(&buf, &offset);
+ name_size_alg = name_size(&buf.data[offset]);
+
+- if (name_size_alg < 0)
++ if (name_size_alg < 0) {
++ tpm_buf_destroy(&buf);
+ return name_size_alg;
++ }
+
+ if (rc != name_size_alg) {
+ tpm_buf_destroy(&buf);
+@@ -217,6 +219,7 @@ static int tpm2_read_public(struct tpm_c
+ }
+
+ memcpy(name, &buf.data[offset], rc);
++ tpm_buf_destroy(&buf);
+ return name_size_alg;
+ }
+ #endif /* CONFIG_TCG_TPM2_HMAC */
--- /dev/null
+From 161ce69c2c89781784b945d8e281ff2da9dede9c Mon Sep 17 00:00:00 2001
+From: "Denis M. Karpov" <komlomal@gmail.com>
+Date: Thu, 9 Apr 2026 13:33:45 +0300
+Subject: userfaultfd: allow registration of ranges below mmap_min_addr
+
+From: Denis M. Karpov <komlomal@gmail.com>
+
+commit 161ce69c2c89781784b945d8e281ff2da9dede9c upstream.
+
+The current implementation of validate_range() in fs/userfaultfd.c
+performs a hard check against mmap_min_addr. This is redundant because
+UFFDIO_REGISTER operates on memory ranges that must already be backed by a
+VMA.
+
+Enforcing mmap_min_addr or capability checks again in userfaultfd is
+unnecessary and prevents applications like binary compilers from using
+UFFD for valid memory regions mapped by application.
+
+Remove the redundant check for mmap_min_addr.
+
+We started using UFFD instead of the classic mprotect approach in the
+binary translator to track application writes. During development, we
+encountered this bug. The translator cannot control where the translated
+application chooses to map its memory and if the app requires a
+low-address area, UFFD fails, whereas mprotect would work just fine. I
+believe this is a genuine logic bug rather than an improvement, and I
+would appreciate including the fix in stable.
+
+Link: https://lore.kernel.org/20260409103345.15044-1-komlomal@gmail.com
+Fixes: 86039bd3b4e6 ("userfaultfd: add new syscall to provide memory externalization")
+Signed-off-by: Denis M. Karpov <komlomal@gmail.com>
+Reviewed-by: Lorenzo Stoakes <ljs@kernel.org>
+Acked-by: Harry Yoo (Oracle) <harry@kernel.org>
+Reviewed-by: Pedro Falcato <pfalcato@suse.de>
+Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
+Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Jann Horn <jannh@google.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/userfaultfd.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/fs/userfaultfd.c
++++ b/fs/userfaultfd.c
+@@ -1238,8 +1238,6 @@ static __always_inline int validate_unal
+ return -EINVAL;
+ if (!len)
+ return -EINVAL;
+- if (start < mmap_min_addr)
+- return -EINVAL;
+ if (start >= task_size)
+ return -EINVAL;
+ if (len > task_size - start)
--- /dev/null
+From 29a7b2614357393b176ef06ba5bc3ff5afc8df69 Mon Sep 17 00:00:00 2001
+From: Haoxiang Li <lihaoxiang@isrc.iscas.ac.cn>
+Date: Wed, 1 Apr 2026 12:02:41 +0800
+Subject: xfs: fix a resource leak in xfs_alloc_buftarg()
+
+From: Haoxiang Li <lihaoxiang@isrc.iscas.ac.cn>
+
+commit 29a7b2614357393b176ef06ba5bc3ff5afc8df69 upstream.
+
+In the error path, call fs_put_dax() to drop the DAX
+device reference.
+
+Fixes: 6f643c57d57c ("xfs: implement ->notify_failure() for XFS")
+Cc: stable@vger.kernel.org
+Signed-off-by: Haoxiang Li <lihaoxiang@isrc.iscas.ac.cn>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_buf.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/xfs/xfs_buf.c
++++ b/fs/xfs/xfs_buf.c
+@@ -1831,6 +1831,7 @@ xfs_alloc_buftarg(
+ return btp;
+
+ error_free:
++ fs_put_dax(btp->bt_daxdev, mp);
+ kfree(btp);
+ return ERR_PTR(error);
+ }
--- /dev/null
+From 181ea4e2de422aa0a66f355bd59bccccdd169826 Mon Sep 17 00:00:00 2001
+From: Hans Holmberg <hans.holmberg@wdc.com>
+Date: Wed, 25 Mar 2026 13:43:12 +0100
+Subject: xfs: start gc on zonegc_low_space attribute updates
+
+From: Hans Holmberg <hans.holmberg@wdc.com>
+
+commit 181ea4e2de422aa0a66f355bd59bccccdd169826 upstream.
+
+Start gc if the agressiveness of zone garbage collection is changed
+by the user (if the file system is not read only).
+
+Without this change, the new setting will not be taken into account
+until the gc thread is woken up by e.g. a write.
+
+Cc: stable@vger.kernel.org # v6.15
+Fixes: 845abeb1f06a8a ("xfs: add tunable threshold parameter for triggering zone GC")
+Signed-off-by: Hans Holmberg <hans.holmberg@wdc.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_sysfs.c | 7 ++++++-
+ fs/xfs/xfs_zone_alloc.h | 4 ++++
+ fs/xfs/xfs_zone_gc.c | 17 +++++++++++++++++
+ 3 files changed, 27 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_sysfs.c
++++ b/fs/xfs/xfs_sysfs.c
+@@ -14,6 +14,7 @@
+ #include "xfs_log_priv.h"
+ #include "xfs_mount.h"
+ #include "xfs_zones.h"
++#include "xfs_zone_alloc.h"
+
+ struct xfs_sysfs_attr {
+ struct attribute attr;
+@@ -724,6 +725,7 @@ zonegc_low_space_store(
+ const char *buf,
+ size_t count)
+ {
++ struct xfs_mount *mp = zoned_to_mp(kobj);
+ int ret;
+ unsigned int val;
+
+@@ -734,7 +736,10 @@ zonegc_low_space_store(
+ if (val > 100)
+ return -EINVAL;
+
+- zoned_to_mp(kobj)->m_zonegc_low_space = val;
++ if (mp->m_zonegc_low_space != val) {
++ mp->m_zonegc_low_space = val;
++ xfs_zone_gc_wakeup(mp);
++ }
+
+ return count;
+ }
+--- a/fs/xfs/xfs_zone_alloc.h
++++ b/fs/xfs/xfs_zone_alloc.h
+@@ -51,6 +51,7 @@ int xfs_mount_zones(struct xfs_mount *mp
+ void xfs_unmount_zones(struct xfs_mount *mp);
+ void xfs_zone_gc_start(struct xfs_mount *mp);
+ void xfs_zone_gc_stop(struct xfs_mount *mp);
++void xfs_zone_gc_wakeup(struct xfs_mount *mp);
+ #else
+ static inline int xfs_mount_zones(struct xfs_mount *mp)
+ {
+@@ -65,6 +66,9 @@ static inline void xfs_zone_gc_start(str
+ static inline void xfs_zone_gc_stop(struct xfs_mount *mp)
+ {
+ }
++static inline void xfs_zone_gc_wakeup(struct xfs_mount *mp)
++{
++}
+ #endif /* CONFIG_XFS_RT */
+
+ #endif /* _XFS_ZONE_ALLOC_H */
+--- a/fs/xfs/xfs_zone_gc.c
++++ b/fs/xfs/xfs_zone_gc.c
+@@ -1159,6 +1159,23 @@ xfs_zone_gc_stop(
+ kthread_park(mp->m_zone_info->zi_gc_thread);
+ }
+
++void
++xfs_zone_gc_wakeup(
++ struct xfs_mount *mp)
++{
++ struct super_block *sb = mp->m_super;
++
++ /*
++ * If we are unmounting the file system we must not try to
++ * wake gc as m_zone_info might have been freed already.
++ */
++ if (down_read_trylock(&sb->s_umount)) {
++ if (!xfs_is_readonly(mp))
++ wake_up_process(mp->m_zone_info->zi_gc_thread);
++ up_read(&sb->s_umount);
++ }
++}
++
+ int
+ xfs_zone_gc_mount(
+ struct xfs_mount *mp)