--- /dev/null
+From 492be12d4a21f5882cc0a4f4f87162a9ff3a6cd5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Jun 2026 14:31:52 +0200
+Subject: Revert "ptp: add testptp mask test"
+
+From: Petr Machata <petrm@nvidia.com>
+
+This reverts commit 8510559c0fa1e228b18fcf77cfbcf5b970793a8a, which is
+commit 26285e689c6cd2cf3849568c83b2ebe53f467143 upstream.
+
+The reverted commit extends the selftest to test timestamp event queue mask
+manipulation in testptp. It exercises masks PTP_MASK_CLEAR_ALL and
+PTP_MASK_EN_SINGLE, introduced in commit c5a445b1e934 ("ptp: support event
+queue reader channel masks"), which is not on this stable branch. The test
+case thus cannot be built against this tree's own UAPI headers.
+
+The reverted commit was introduced to resolve a missing dependency of
+commit bef3a83a9a67 ("testptp: Add option to open PHC in readonly mode"),
+which is 76868642e427 upstream. The only conflict between the two is the
+getopt string, and there is otherwise no direct dependency between the two.
+
+This patch therefore reverts the cited commit, with hand-resolving the
+getopt string to include 'r' (as introduced by c6dc458227a3), but not
+'F' (introduced by c1c50689799d).
+
+Reported-by: Yong Wang <yongwang@nvidia.com>
+Signed-off-by: Petr Machata <petrm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/ptp/testptp.c | 19 +------------------
+ 1 file changed, 1 insertion(+), 18 deletions(-)
+
+diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
+index d78d52f028ab52..84e86898f4b409 100644
+--- a/tools/testing/selftests/ptp/testptp.c
++++ b/tools/testing/selftests/ptp/testptp.c
+@@ -121,7 +121,6 @@ static void usage(char *progname)
+ " -d name device to open\n"
+ " -e val read 'val' external time stamp events\n"
+ " -f val adjust the ptp clock frequency by 'val' ppb\n"
+- " -F chan Enable single channel mask and keep device open for debugfs verification.\n"
+ " -g get the ptp clock time\n"
+ " -h prints this message\n"
+ " -i val index for event/trigger\n"
+@@ -190,7 +189,6 @@ int main(int argc, char *argv[])
+ int seconds = 0;
+ int readonly = 0;
+ int settime = 0;
+- int channel = -1;
+
+ int64_t t1, t2, tp;
+ int64_t interval, offset;
+@@ -200,7 +198,7 @@ int main(int argc, char *argv[])
+
+ progname = strrchr(argv[0], '/');
+ progname = progname ? 1+progname : argv[0];
+- while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xz"))) {
++ while (EOF != (c = getopt(argc, argv, "cd:e:f:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xz"))) {
+ switch (c) {
+ case 'c':
+ capabilities = 1;
+@@ -214,9 +212,6 @@ int main(int argc, char *argv[])
+ case 'f':
+ adjfreq = atoi(optarg);
+ break;
+- case 'F':
+- channel = atoi(optarg);
+- break;
+ case 'g':
+ gettime = 1;
+ break;
+@@ -616,18 +611,6 @@ int main(int argc, char *argv[])
+ free(xts);
+ }
+
+- if (channel >= 0) {
+- if (ioctl(fd, PTP_MASK_CLEAR_ALL)) {
+- perror("PTP_MASK_CLEAR_ALL");
+- } else if (ioctl(fd, PTP_MASK_EN_SINGLE, (unsigned int *)&channel)) {
+- perror("PTP_MASK_EN_SINGLE");
+- } else {
+- printf("Channel %d exclusively enabled. Check on debugfs.\n", channel);
+- printf("Press any key to continue\n.");
+- getchar();
+- }
+- }
+-
+ close(fd);
+ return 0;
+ }
+--
+2.53.0
+
--- /dev/null
+From fc21c6a729f8798693461df430e637220889e2a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Jun 2026 14:31:51 +0200
+Subject: Revert "selftest/ptp: update ptp selftest to exercise the gettimex
+ options"
+
+From: Petr Machata <petrm@nvidia.com>
+
+This reverts commit 6b32d042aa8255e964ebed860e24adccb204fcbc, which is
+commit 3d07b691ee707c00afaf365440975e81bb96cd9b upstream.
+
+The cited commit allows testptp to set a configurable clock_id. That is
+done via a PTP_SYS_OFFSET_EXTENDED ioctl call, whose argument is struct
+ptp_sys_offset_extended, where the clock_id is set. However, this Linux
+version does not support the ptp_sys_offset_extended.clockid field, and
+the test case cannot be built against this tree's own UAPI headers.
+
+The reverted commit was introduced to resolve a missing dependency of
+commit bef3a83a9a67 ("testptp: Add option to open PHC in readonly mode"),
+which is 76868642e427 upstream. My suspicion is that the only conflict
+between the two is the getopt string, and there is otherwise no direct
+dependency between the two.
+
+This patch therefore reverts the cited commit, with hand-resolving the
+getopt string to include 'r' (as introduced by c6dc458227a3), but not
+'y' (introduced by 06954f715deb).
+
+Reported-by: Yong Wang <yongwang@nvidia.com>
+Signed-off-by: Petr Machata <petrm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/ptp/testptp.c | 62 +++------------------------
+ 1 file changed, 5 insertions(+), 57 deletions(-)
+
+diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
+index 89b4f43a7ba459..d78d52f028ab52 100644
+--- a/tools/testing/selftests/ptp/testptp.c
++++ b/tools/testing/selftests/ptp/testptp.c
+@@ -147,7 +147,6 @@ static void usage(char *progname)
+ " -T val set the ptp clock time to 'val' seconds\n"
+ " -x val get an extended ptp clock time with the desired number of samples (up to %d)\n"
+ " -X get a ptp clock cross timestamp\n"
+- " -y val pre/post tstamp timebase to use {realtime|monotonic|monotonic-raw}\n"
+ " -z test combinations of rising/falling external time stamp flags\n",
+ progname, PTP_MAX_SAMPLES);
+ }
+@@ -192,7 +191,6 @@ int main(int argc, char *argv[])
+ int readonly = 0;
+ int settime = 0;
+ int channel = -1;
+- clockid_t ext_clockid = CLOCK_REALTIME;
+
+ int64_t t1, t2, tp;
+ int64_t interval, offset;
+@@ -202,7 +200,7 @@ int main(int argc, char *argv[])
+
+ progname = strrchr(argv[0], '/');
+ progname = progname ? 1+progname : argv[0];
+- while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xy:z"))) {
++ while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xz"))) {
+ switch (c) {
+ case 'c':
+ capabilities = 1;
+@@ -285,21 +283,6 @@ int main(int argc, char *argv[])
+ case 'X':
+ getcross = 1;
+ break;
+- case 'y':
+- if (!strcasecmp(optarg, "realtime"))
+- ext_clockid = CLOCK_REALTIME;
+- else if (!strcasecmp(optarg, "monotonic"))
+- ext_clockid = CLOCK_MONOTONIC;
+- else if (!strcasecmp(optarg, "monotonic-raw"))
+- ext_clockid = CLOCK_MONOTONIC_RAW;
+- else {
+- fprintf(stderr,
+- "type needs to be realtime, monotonic or monotonic-raw; was given %s\n",
+- optarg);
+- return -1;
+- }
+- break;
+-
+ case 'z':
+ flagtest = 1;
+ break;
+@@ -590,7 +573,6 @@ int main(int argc, char *argv[])
+ }
+
+ soe->n_samples = getextended;
+- soe->clockid = ext_clockid;
+
+ if (ioctl(fd, PTP_SYS_OFFSET_EXTENDED, soe)) {
+ perror("PTP_SYS_OFFSET_EXTENDED");
+@@ -599,46 +581,12 @@ int main(int argc, char *argv[])
+ getextended);
+
+ for (i = 0; i < getextended; i++) {
+- switch (ext_clockid) {
+- case CLOCK_REALTIME:
+- printf("sample #%2d: real time before: %lld.%09u\n",
+- i, soe->ts[i][0].sec,
+- soe->ts[i][0].nsec);
+- break;
+- case CLOCK_MONOTONIC:
+- printf("sample #%2d: monotonic time before: %lld.%09u\n",
+- i, soe->ts[i][0].sec,
+- soe->ts[i][0].nsec);
+- break;
+- case CLOCK_MONOTONIC_RAW:
+- printf("sample #%2d: monotonic-raw time before: %lld.%09u\n",
+- i, soe->ts[i][0].sec,
+- soe->ts[i][0].nsec);
+- break;
+- default:
+- break;
+- }
++ printf("sample #%2d: system time before: %lld.%09u\n",
++ i, soe->ts[i][0].sec, soe->ts[i][0].nsec);
+ printf(" phc time: %lld.%09u\n",
+ soe->ts[i][1].sec, soe->ts[i][1].nsec);
+- switch (ext_clockid) {
+- case CLOCK_REALTIME:
+- printf(" real time after: %lld.%09u\n",
+- soe->ts[i][2].sec,
+- soe->ts[i][2].nsec);
+- break;
+- case CLOCK_MONOTONIC:
+- printf(" monotonic time after: %lld.%09u\n",
+- soe->ts[i][2].sec,
+- soe->ts[i][2].nsec);
+- break;
+- case CLOCK_MONOTONIC_RAW:
+- printf(" monotonic-raw time after: %lld.%09u\n",
+- soe->ts[i][2].sec,
+- soe->ts[i][2].nsec);
+- break;
+- default:
+- break;
+- }
++ printf(" system time after: %lld.%09u\n",
++ soe->ts[i][2].sec, soe->ts[i][2].nsec);
+ }
+ }
+
+--
+2.53.0
+
--- /dev/null
+revert-selftest-ptp-update-ptp-selftest-to-exercise-.patch
+revert-ptp-add-testptp-mask-test.patch
--- /dev/null
+From 820505951b2c8f7274028e90218cc6f94419f324 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2026 13:24:36 +0200
+Subject: KVM: x86: Fix shadow paging use-after-free due to unexpected role
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 81ccda30b4e83d8f5cc4fd50503c44e3a33abfeb upstream.
+
+Commit 0cb2af2ea66ad ("KVM: x86: Fix shadow paging use-after-free due
+to unexpected GFN") fixed a shadow paging mismatch between stored and
+computed GFNs; the bug could be triggered by changing a PDE mapping from
+outside the guest, and then deleting a memslot. The rmap_remove()
+call would miss entries created after the PDE change because the GFN
+of the leaf SPTE does not match the GFN of the struct kvm_mmu_page.
+
+A similar hole however remains if the modified PDE points to a non-leaf
+page. In this case the gfn can be made to match, but the role does not
+match: the original large 2MB page creates a kvm_mmu_page with direct=1,
+while the new 4KB needs a kvm_mmu_page with direct=0. However,
+kvm_mmu_get_child_sp() does not compare the role, and therefore reuses
+the page.
+
+The next step is installing a leaf (4KB) SPTE on the new path which
+records an rmap entry under the gfn resolved by the walk. But when
+that child is zapped its parent kvm_mmu_page has direct=1 and
+kvm_mmu_page_get_gfn() computes the gfn for the 4KB page as
+sp->gfn + index instead of using sp->shadowed_translation[] (or sp->gfns[]
+in older kernels). It therefore fails to remove the recorded entry.
+
+When the memslot is dropped the shadow page is freed but the rmap
+entry survives, as in the scenario that was already fixed. Code that
+later walks that gfn (dirty logging, MMU notifier invalidation, and
+so on) dereferences an sptep that lies in the freed page, causing the
+use-after-free.
+
+Fixes: 2032a93d66fa ("KVM: MMU: Don't allocate gfns page for direct mmu pages")
+Reported-by: Hyunwoo Kim <imv4bel@gmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index 58d67e5ab2c583..9edfc812423766 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -2239,13 +2239,15 @@ static struct kvm_mmu_page *kvm_mmu_get_child_sp(struct kvm_vcpu *vcpu,
+ u64 *sptep, gfn_t gfn,
+ bool direct, unsigned int access)
+ {
+- union kvm_mmu_page_role role;
++ union kvm_mmu_page_role role = kvm_mmu_child_role(sptep, direct, access);
+
+- if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep) &&
+- spte_to_child_sp(*sptep) && spte_to_child_sp(*sptep)->gfn == gfn)
++ if (is_shadow_present_pte(*sptep) &&
++ !is_large_pte(*sptep) &&
++ spte_to_child_sp(*sptep) &&
++ spte_to_child_sp(*sptep)->gfn == gfn &&
++ spte_to_child_sp(*sptep)->role.word == role.word)
+ return ERR_PTR(-EEXIST);
+
+- role = kvm_mmu_child_role(sptep, direct, access);
+ return kvm_mmu_get_shadow_page(vcpu, gfn, role);
+ }
+
+--
+2.53.0
+
--- /dev/null
+From 521ac6fe6afdb644708691075886d39b2211c6a7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2026 13:24:37 +0200
+Subject: KVM: x86/mmu: Ensure hugepage is in by slot before checking max
+ mapping level
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit ef057cbf825e03b63f6edf5980f96abf3c53089d upstream.
+
+When recovering hugepages in the shadow MMU, verify that the base gfn of
+the shadow page is actually contained within the target memslot, *before*
+querying the max mapping level given the shadow page's gfn. Failure to
+pre-check the validity of the gfn can lead to an out-of-bounds access to
+the slot's lpage_info (which typically manifests as a host #PF because the
+lpage_info is vmalloc'd) if the guest creates a hugepage mapping (in its
+PTEs) that extends "below" the bounds of a memslot.
+
+When faulting in memory for a guest, and the size of the guest mapping is
+greater than KVM's (current) max mapping, then KVM will create a "direct"
+shadow page (direct in that there are no gPTEs to shadow, and so the target
+gfn is a direct calculation given the base gfn of the shadow page). The
+hugepage recovery flow looks for such direct shadow pages, as forcing 4KiB
+mappings when dirty logging generates the guest > host mapping size case.
+When the 4KiB restriction is lifted, then KVM can replace the shadow page
+with a hugepage.
+
+But if KVM originally used a smaller mapping than the guest because the
+range of memory covered by the guest hugepage exceeds the bounds of a
+memslot, then KVM will link a direct shadow page with a gfn that is outside
+the bounds of the memslot being used to fault in memory. The rmap entry
+added for the leaf mapping is correct and within bounds, but the gfn of the
+leaf SPTE's parent shadow page will be out of bounds.
+
+ BUG: unable to handle page fault for address: ffffc90000806ffc
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 100000067 P4D 100000067 PUD 1002a7067 PMD 10612f067 PTE 0
+ Oops: Oops: 0000 [#1] SMP
+ CPU: 13 UID: 1000 PID: 757 Comm: mmu_stress_test Not tainted 7.1.0-rc1-48ce1e26eace-x86_pir_to_irr_comments-vm #341 PREEMPT
+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
+ RIP: 0010:kvm_mmu_max_mapping_level+0x79/0x2b0 [kvm]
+ Call Trace:
+ <TASK>
+ kvm_mmu_recover_huge_pages+0x21b/0x320 [kvm]
+ kvm_set_memslot+0x1ee/0x590 [kvm]
+ kvm_set_memory_region.part.0+0x3a1/0x4d0 [kvm]
+ kvm_vm_ioctl+0x9bf/0x15d0 [kvm]
+ __x64_sys_ioctl+0x8a/0xd0
+ do_syscall_64+0xb7/0xbb0
+ entry_SYSCALL_64_after_hwframe+0x4b/0x53
+ RIP: 0033:0x7f21c0f1a9bf
+ </TASK>
+
+Don't bother pre-checking the bounds of the potential hugepage, i.e. don't
+check that e.g. sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level + 1) is also
+within the memslot, as the checks performed by kvm_mmu_max_mapping_level()
+are a superset of the basic bounds checks. I.e. pre-checking the full
+range would be a dubious micro-optimization.
+
+Fixes: 9eba50f8d7fc ("KVM: x86/mmu: Consult max mapping level when zapping collapsible SPTEs")
+Cc: stable@vger.kernel.org
+Cc: David Matlack <dmatlack@google.com>
+Cc: James Houghton <jthoughton@google.com>
+Cc: Alexander Bulekov <bkov@amazon.com>
+Cc: Fred Griffoul <fgriffo@amazon.co.uk>
+Cc: Alexander Graf <graf@amazon.de>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Filippo Sironi <sironi@amazon.de>
+Cc: Ivan Orlov <iorlov@amazon.co.uk>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 18 ++++++++++++------
+ include/linux/kvm_host.h | 7 ++++++-
+ 2 files changed, 18 insertions(+), 7 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index 9edfc812423766..7785da8f0ad339 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -6417,13 +6417,19 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
+ sp = sptep_to_sp(sptep);
+
+ /*
+- * We cannot do huge page mapping for indirect shadow pages,
+- * which are found on the last rmap (level = 1) when not using
+- * tdp; such shadow pages are synced with the page table in
+- * the guest, and the guest page table is using 4K page size
+- * mapping if the indirect sp has level = 1.
++ * Direct shadow page can be replaced by a hugepage if the host
++ * mapping level allows it and the memslot maps all of the host
++ * hugepage. Note! If the memslot maps only part of the
++ * hugepage, sp->gfn may be below slot->base_gfn, and querying
++ * the max mapping level would cause an out-of-bounds lpage_info
++ * access. So the gfn bounds check *must* be done first.
++ *
++ * Indirect shadow pages are created when the guest page tables
++ * are using 4K pages. Since the host mapping is always
++ * constrained by the page size in the guest, indirect shadow
++ * pages are never collapsible.
+ */
+- if (sp->role.direct &&
++ if (sp->role.direct && is_gfn_in_memslot(slot, sp->gfn) &&
+ sp->role.level < kvm_mmu_max_mapping_level(kvm, slot, sp->gfn,
+ PG_LEVEL_NUM)) {
+ kvm_zap_one_rmap_spte(kvm, rmap_head, sptep);
+diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
+index 04fac4cdcfbcef..624380c4fef809 100644
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -1621,6 +1621,11 @@ int kvm_request_irq_source_id(struct kvm *kvm);
+ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
+ bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
+
++static inline bool is_gfn_in_memslot(const struct kvm_memory_slot *slot, gfn_t gfn)
++{
++ return gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages;
++}
++
+ /*
+ * Returns a pointer to the memslot if it contains gfn.
+ * Otherwise returns NULL.
+@@ -1631,7 +1636,7 @@ try_get_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
+ if (!slot)
+ return NULL;
+
+- if (gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages)
++ if (is_gfn_in_memslot(slot, gfn))
+ return slot;
+ else
+ return NULL;
+--
+2.53.0
+
--- /dev/null
+From dfb507cfad4d73f87e54272ee099e1b9c8fd6f03 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Jun 2026 23:27:51 +0200
+Subject: mm/mglru: skip special VMAs in lru_gen_look_around()
+
+From: Yu Zhao <yuzhao@google.com>
+
+[ Upstream commit c28ac3c7eb945fee6e20f47d576af68fdff1392a ]
+
+Special VMAs like VM_PFNMAP can contain anon pages from COW. There isn't
+much profit in doing lookaround on them. Besides, they can trigger the
+pte_special() warning in get_pte_pfn().
+
+Skip them in lru_gen_look_around().
+
+Link: https://lkml.kernel.org/r/20231223045647.1566043-1-yuzhao@google.com
+Fixes: 018ee47f1489 ("mm: multi-gen LRU: exploit locality in rmap")
+Signed-off-by: Yu Zhao <yuzhao@google.com>
+Reported-by: syzbot+03fd9b3f71641f0ebf2d@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/000000000000f9ff00060d14c256@google.com/
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[fix conflicts with variable declarations and vma pointer usage]
+Signed-off-by: Jakov Novak <jakovnovak30@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/vmscan.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 1f7a90ecc7007d..f6f8c18dc45f57 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -4622,6 +4622,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+ struct lru_gen_mm_walk *walk;
+ int young = 0;
+ unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {};
++ struct vm_area_struct *vma = pvmw->vma;
+ struct folio *folio = pfn_folio(pvmw->pfn);
+ struct mem_cgroup *memcg = folio_memcg(folio);
+ struct pglist_data *pgdat = folio_pgdat(folio);
+@@ -4635,11 +4636,15 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+ if (spin_is_contended(pvmw->ptl))
+ return;
+
++ /* exclude special VMAs containing anon pages from COW */
++ if (vma->vm_flags & VM_SPECIAL)
++ return;
++
+ /* avoid taking the LRU lock under the PTL when possible */
+ walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL;
+
+- start = max(pvmw->address & PMD_MASK, pvmw->vma->vm_start);
+- end = min(pvmw->address | ~PMD_MASK, pvmw->vma->vm_end - 1) + 1;
++ start = max(pvmw->address & PMD_MASK, vma->vm_start);
++ end = min(pvmw->address | ~PMD_MASK, vma->vm_end - 1) + 1;
+
+ if (end - start > MIN_LRU_BATCH * PAGE_SIZE) {
+ if (pvmw->address - start < MIN_LRU_BATCH * PAGE_SIZE / 2)
+@@ -4660,7 +4665,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+ for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) {
+ unsigned long pfn;
+
+- pfn = get_pte_pfn(pte[i], pvmw->vma, addr);
++ pfn = get_pte_pfn(pte[i], vma, addr);
+ if (pfn == -1)
+ continue;
+
+@@ -4671,7 +4676,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+ if (!folio)
+ continue;
+
+- if (!ptep_test_and_clear_young(pvmw->vma, addr, pte + i))
++ if (!ptep_test_and_clear_young(vma, addr, pte + i))
+ VM_WARN_ON_ONCE(true);
+
+ young++;
+--
+2.53.0
+
--- /dev/null
+From 217f6fbbb3c73e8855307cf712ab66b736fac020 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Jun 2026 13:32:21 +0000
+Subject: perf bench: Avoid NDEBUG warning
+
+From: Ian Rogers <irogers@google.com>
+
+[ Upstream commit d1babea9c38282b58a6f822ab95027cba3165a42 ]
+
+With NDEBUG set the asserts are compiled out. This yields
+"unused-but-set-variable" variables. Move these variables behind
+NDEBUG to avoid the warning.
+
+Signed-off-by: Ian Rogers <irogers@google.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Sean Christopherson <seanjc@google.com>
+Link: https://lore.kernel.org/r/20230330183827.1412303-1-irogers@google.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Stable-dep-of: 616b14b47a86 ("perf build: Conditionally define NDEBUG")
+Signed-off-by: Simon Liebold <simonlie@amazon.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/bench/find-bit-bench.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c
+index 22b5cfe9702370..80f051f9c20fd9 100644
+--- a/tools/perf/bench/find-bit-bench.c
++++ b/tools/perf/bench/find-bit-bench.c
+@@ -61,7 +61,6 @@ static int do_for_each_set_bit(unsigned int num_bits)
+ double time_average, time_stddev;
+ unsigned int bit, i, j;
+ unsigned int set_bits, skip;
+- unsigned int old;
+
+ init_stats(&fb_time_stats);
+ init_stats(&tb_time_stats);
+@@ -73,7 +72,10 @@ static int do_for_each_set_bit(unsigned int num_bits)
+ set_bit(i, to_test);
+
+ for (i = 0; i < outer_iterations; i++) {
+- old = accumulator;
++#ifndef NDEBUG
++ unsigned int old = accumulator;
++#endif
++
+ gettimeofday(&start, NULL);
+ for (j = 0; j < inner_iterations; j++) {
+ for_each_set_bit(bit, to_test, num_bits)
+@@ -85,7 +87,9 @@ static int do_for_each_set_bit(unsigned int num_bits)
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&fb_time_stats, runtime_us);
+
++#ifndef NDEBUG
+ old = accumulator;
++#endif
+ gettimeofday(&start, NULL);
+ for (j = 0; j < inner_iterations; j++) {
+ for (bit = 0; bit < num_bits; bit++) {
+--
+2.53.0
+
--- /dev/null
+From cab8b53bc49101e02d2ad8320e9a1828d9ae2624 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Jun 2026 13:32:22 +0000
+Subject: perf block-range: Move debug code behind ifndef NDEBUG
+
+From: Ian Rogers <irogers@google.com>
+
+[ Upstream commit 984a785f25e5b5db5fa673130b60dca6ca794406 ]
+
+Make good on a comment and avoid a unused-but-set-variable warning.
+
+Signed-off-by: Ian Rogers <irogers@google.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Sean Christopherson <seanjc@google.com>
+Link: https://lore.kernel.org/r/20230330183827.1412303-1-irogers@google.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Stable-dep-of: 616b14b47a86 ("perf build: Conditionally define NDEBUG")
+Signed-off-by: Simon Liebold <simonlie@amazon.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/block-range.c | 6 +-----
+ 1 file changed, 1 insertion(+), 5 deletions(-)
+
+diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c
+index 1be43265750137..680e92774d0cde 100644
+--- a/tools/perf/util/block-range.c
++++ b/tools/perf/util/block-range.c
+@@ -11,11 +11,7 @@ struct {
+
+ static void block_range__debug(void)
+ {
+- /*
+- * XXX still paranoid for now; see if we can make this depend on
+- * DEBUG=1 builds.
+- */
+-#if 1
++#ifndef NDEBUG
+ struct rb_node *rb;
+ u64 old = 0; /* NULL isn't executable */
+
+--
+2.53.0
+
--- /dev/null
+From 6ac4e808e2168de2361ad605d0e16df87c2fb2d8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Jun 2026 14:31:21 +0200
+Subject: Revert "ptp: add testptp mask test"
+
+From: Petr Machata <petrm@nvidia.com>
+
+This reverts commit c1c50689799d0343598ab6ccb7209819bcef248d, which is
+commit 26285e689c6cd2cf3849568c83b2ebe53f467143 upstream.
+
+The reverted commit extends the selftest to test timestamp event queue mask
+manipulation in testptp. It exercises masks PTP_MASK_CLEAR_ALL and
+PTP_MASK_EN_SINGLE, introduced in commit c5a445b1e934 ("ptp: support event
+queue reader channel masks"), which is not on this stable branch. The test
+case thus cannot be built against this tree's own UAPI headers.
+
+The reverted commit was introduced to resolve a missing dependency of
+commit c6dc458227a3 ("testptp: Add option to open PHC in readonly mode"),
+which is 76868642e427 upstream. The only conflict between the two is the
+getopt string, and there is otherwise no direct dependency between the two.
+
+This patch therefore reverts the cited commit, with hand-resolving the
+getopt string to include 'r' (as introduced by c6dc458227a3), but not
+'F' (introduced by c1c50689799d).
+
+Reported-by: Yong Wang <yongwang@nvidia.com>
+Signed-off-by: Petr Machata <petrm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/ptp/testptp.c | 19 +------------------
+ 1 file changed, 1 insertion(+), 18 deletions(-)
+
+diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
+index 7030bae8e5e07e..14b975594c88e7 100644
+--- a/tools/testing/selftests/ptp/testptp.c
++++ b/tools/testing/selftests/ptp/testptp.c
+@@ -121,7 +121,6 @@ static void usage(char *progname)
+ " -d name device to open\n"
+ " -e val read 'val' external time stamp events\n"
+ " -f val adjust the ptp clock frequency by 'val' ppb\n"
+- " -F chan Enable single channel mask and keep device open for debugfs verification.\n"
+ " -g get the ptp clock time\n"
+ " -h prints this message\n"
+ " -i val index for event/trigger\n"
+@@ -190,7 +189,6 @@ int main(int argc, char *argv[])
+ int seconds = 0;
+ int readonly = 0;
+ int settime = 0;
+- int channel = -1;
+
+ int64_t t1, t2, tp;
+ int64_t interval, offset;
+@@ -200,7 +198,7 @@ int main(int argc, char *argv[])
+
+ progname = strrchr(argv[0], '/');
+ progname = progname ? 1+progname : argv[0];
+- while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xz"))) {
++ while (EOF != (c = getopt(argc, argv, "cd:e:f:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xz"))) {
+ switch (c) {
+ case 'c':
+ capabilities = 1;
+@@ -214,9 +212,6 @@ int main(int argc, char *argv[])
+ case 'f':
+ adjfreq = atoi(optarg);
+ break;
+- case 'F':
+- channel = atoi(optarg);
+- break;
+ case 'g':
+ gettime = 1;
+ break;
+@@ -616,18 +611,6 @@ int main(int argc, char *argv[])
+ free(xts);
+ }
+
+- if (channel >= 0) {
+- if (ioctl(fd, PTP_MASK_CLEAR_ALL)) {
+- perror("PTP_MASK_CLEAR_ALL");
+- } else if (ioctl(fd, PTP_MASK_EN_SINGLE, (unsigned int *)&channel)) {
+- perror("PTP_MASK_EN_SINGLE");
+- } else {
+- printf("Channel %d exclusively enabled. Check on debugfs.\n", channel);
+- printf("Press any key to continue\n.");
+- getchar();
+- }
+- }
+-
+ close(fd);
+ return 0;
+ }
+--
+2.53.0
+
--- /dev/null
+perf-bench-avoid-ndebug-warning.patch
+perf-block-range-move-debug-code-behind-ifndef-ndebu.patch
+kvm-x86-fix-shadow-paging-use-after-free-due-to-unex.patch
+kvm-x86-mmu-ensure-hugepage-is-in-by-slot-before-che.patch
+revert-ptp-add-testptp-mask-test.patch
+mm-mglru-skip-special-vmas-in-lru_gen_look_around.patch
--- /dev/null
+From e5fafa4b1a7e592a8bcd2ff828db676f68f4dc81 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2026 13:24:04 +0200
+Subject: KVM: x86: Fix shadow paging use-after-free due to unexpected role
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 81ccda30b4e83d8f5cc4fd50503c44e3a33abfeb upstream.
+
+Commit 0cb2af2ea66ad ("KVM: x86: Fix shadow paging use-after-free due
+to unexpected GFN") fixed a shadow paging mismatch between stored and
+computed GFNs; the bug could be triggered by changing a PDE mapping from
+outside the guest, and then deleting a memslot. The rmap_remove()
+call would miss entries created after the PDE change because the GFN
+of the leaf SPTE does not match the GFN of the struct kvm_mmu_page.
+
+A similar hole however remains if the modified PDE points to a non-leaf
+page. In this case the gfn can be made to match, but the role does not
+match: the original large 2MB page creates a kvm_mmu_page with direct=1,
+while the new 4KB needs a kvm_mmu_page with direct=0. However,
+kvm_mmu_get_child_sp() does not compare the role, and therefore reuses
+the page.
+
+The next step is installing a leaf (4KB) SPTE on the new path which
+records an rmap entry under the gfn resolved by the walk. But when
+that child is zapped its parent kvm_mmu_page has direct=1 and
+kvm_mmu_page_get_gfn() computes the gfn for the 4KB page as
+sp->gfn + index instead of using sp->shadowed_translation[] (or sp->gfns[]
+in older kernels). It therefore fails to remove the recorded entry.
+
+When the memslot is dropped the shadow page is freed but the rmap
+entry survives, as in the scenario that was already fixed. Code that
+later walks that gfn (dirty logging, MMU notifier invalidation, and
+so on) dereferences an sptep that lies in the freed page, causing the
+use-after-free.
+
+Fixes: 2032a93d66fa ("KVM: MMU: Don't allocate gfns page for direct mmu pages")
+Reported-by: Hyunwoo Kim <imv4bel@gmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index d288c60ae200ba..a67d013fff4d91 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -2329,13 +2329,15 @@ static struct kvm_mmu_page *kvm_mmu_get_child_sp(struct kvm_vcpu *vcpu,
+ u64 *sptep, gfn_t gfn,
+ bool direct, unsigned int access)
+ {
+- union kvm_mmu_page_role role;
++ union kvm_mmu_page_role role = kvm_mmu_child_role(sptep, direct, access);
+
+- if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep) &&
+- spte_to_child_sp(*sptep) && spte_to_child_sp(*sptep)->gfn == gfn)
++ if (is_shadow_present_pte(*sptep) &&
++ !is_large_pte(*sptep) &&
++ spte_to_child_sp(*sptep) &&
++ spte_to_child_sp(*sptep)->gfn == gfn &&
++ spte_to_child_sp(*sptep)->role.word == role.word)
+ return ERR_PTR(-EEXIST);
+
+- role = kvm_mmu_child_role(sptep, direct, access);
+ return kvm_mmu_get_shadow_page(vcpu, gfn, role);
+ }
+
+--
+2.53.0
+
--- /dev/null
+From 49df398f6a3114653555bd0876213293dc761a65 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2026 13:24:05 +0200
+Subject: KVM: x86/mmu: Ensure hugepage is in by slot before checking max
+ mapping level
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit ef057cbf825e03b63f6edf5980f96abf3c53089d upstream.
+
+When recovering hugepages in the shadow MMU, verify that the base gfn of
+the shadow page is actually contained within the target memslot, *before*
+querying the max mapping level given the shadow page's gfn. Failure to
+pre-check the validity of the gfn can lead to an out-of-bounds access to
+the slot's lpage_info (which typically manifests as a host #PF because the
+lpage_info is vmalloc'd) if the guest creates a hugepage mapping (in its
+PTEs) that extends "below" the bounds of a memslot.
+
+When faulting in memory for a guest, and the size of the guest mapping is
+greater than KVM's (current) max mapping, then KVM will create a "direct"
+shadow page (direct in that there are no gPTEs to shadow, and so the target
+gfn is a direct calculation given the base gfn of the shadow page). The
+hugepage recovery flow looks for such direct shadow pages, as forcing 4KiB
+mappings when dirty logging generates the guest > host mapping size case.
+When the 4KiB restriction is lifted, then KVM can replace the shadow page
+with a hugepage.
+
+But if KVM originally used a smaller mapping than the guest because the
+range of memory covered by the guest hugepage exceeds the bounds of a
+memslot, then KVM will link a direct shadow page with a gfn that is outside
+the bounds of the memslot being used to fault in memory. The rmap entry
+added for the leaf mapping is correct and within bounds, but the gfn of the
+leaf SPTE's parent shadow page will be out of bounds.
+
+ BUG: unable to handle page fault for address: ffffc90000806ffc
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 100000067 P4D 100000067 PUD 1002a7067 PMD 10612f067 PTE 0
+ Oops: Oops: 0000 [#1] SMP
+ CPU: 13 UID: 1000 PID: 757 Comm: mmu_stress_test Not tainted 7.1.0-rc1-48ce1e26eace-x86_pir_to_irr_comments-vm #341 PREEMPT
+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
+ RIP: 0010:kvm_mmu_max_mapping_level+0x79/0x2b0 [kvm]
+ Call Trace:
+ <TASK>
+ kvm_mmu_recover_huge_pages+0x21b/0x320 [kvm]
+ kvm_set_memslot+0x1ee/0x590 [kvm]
+ kvm_set_memory_region.part.0+0x3a1/0x4d0 [kvm]
+ kvm_vm_ioctl+0x9bf/0x15d0 [kvm]
+ __x64_sys_ioctl+0x8a/0xd0
+ do_syscall_64+0xb7/0xbb0
+ entry_SYSCALL_64_after_hwframe+0x4b/0x53
+ RIP: 0033:0x7f21c0f1a9bf
+ </TASK>
+
+Don't bother pre-checking the bounds of the potential hugepage, i.e. don't
+check that e.g. sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level + 1) is also
+within the memslot, as the checks performed by kvm_mmu_max_mapping_level()
+are a superset of the basic bounds checks. I.e. pre-checking the full
+range would be a dubious micro-optimization.
+
+Fixes: 9eba50f8d7fc ("KVM: x86/mmu: Consult max mapping level when zapping collapsible SPTEs")
+Cc: stable@vger.kernel.org
+Cc: David Matlack <dmatlack@google.com>
+Cc: James Houghton <jthoughton@google.com>
+Cc: Alexander Bulekov <bkov@amazon.com>
+Cc: Fred Griffoul <fgriffo@amazon.co.uk>
+Cc: Alexander Graf <graf@amazon.de>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Filippo Sironi <sironi@amazon.de>
+Cc: Ivan Orlov <iorlov@amazon.co.uk>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 18 ++++++++++++------
+ include/linux/kvm_host.h | 7 ++++++-
+ 2 files changed, 18 insertions(+), 7 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index a67d013fff4d91..aab26f90c28551 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -6952,13 +6952,19 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
+ sp = sptep_to_sp(sptep);
+
+ /*
+- * We cannot do huge page mapping for indirect shadow pages,
+- * which are found on the last rmap (level = 1) when not using
+- * tdp; such shadow pages are synced with the page table in
+- * the guest, and the guest page table is using 4K page size
+- * mapping if the indirect sp has level = 1.
++ * Direct shadow page can be replaced by a hugepage if the host
++ * mapping level allows it and the memslot maps all of the host
++ * hugepage. Note! If the memslot maps only part of the
++ * hugepage, sp->gfn may be below slot->base_gfn, and querying
++ * the max mapping level would cause an out-of-bounds lpage_info
++ * access. So the gfn bounds check *must* be done first.
++ *
++ * Indirect shadow pages are created when the guest page tables
++ * are using 4K pages. Since the host mapping is always
++ * constrained by the page size in the guest, indirect shadow
++ * pages are never collapsible.
+ */
+- if (sp->role.direct &&
++ if (sp->role.direct && is_gfn_in_memslot(slot, sp->gfn) &&
+ sp->role.level < kvm_mmu_max_mapping_level(kvm, slot, sp->gfn,
+ PG_LEVEL_NUM)) {
+ kvm_zap_one_rmap_spte(kvm, rmap_head, sptep);
+diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
+index 04b81e2166d5dc..b4235e99f0a9d0 100644
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -1745,6 +1745,11 @@ int kvm_request_irq_source_id(struct kvm *kvm);
+ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
+ bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
+
++static inline bool is_gfn_in_memslot(const struct kvm_memory_slot *slot, gfn_t gfn)
++{
++ return gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages;
++}
++
+ /*
+ * Returns a pointer to the memslot if it contains gfn.
+ * Otherwise returns NULL.
+@@ -1755,7 +1760,7 @@ try_get_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
+ if (!slot)
+ return NULL;
+
+- if (gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages)
++ if (is_gfn_in_memslot(slot, gfn))
+ return slot;
+ else
+ return NULL;
+--
+2.53.0
+
--- /dev/null
+kvm-x86-fix-shadow-paging-use-after-free-due-to-unex.patch
+kvm-x86-mmu-ensure-hugepage-is-in-by-slot-before-che.patch
--- /dev/null
+From 5172dc086d930aaf62c5b03b49199990dd1a0dcc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2026 13:23:15 +0200
+Subject: KVM: x86: Fix shadow paging use-after-free due to unexpected role
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 81ccda30b4e83d8f5cc4fd50503c44e3a33abfeb upstream.
+
+Commit 0cb2af2ea66ad ("KVM: x86: Fix shadow paging use-after-free due
+to unexpected GFN") fixed a shadow paging mismatch between stored and
+computed GFNs; the bug could be triggered by changing a PDE mapping from
+outside the guest, and then deleting a memslot. The rmap_remove()
+call would miss entries created after the PDE change because the GFN
+of the leaf SPTE does not match the GFN of the struct kvm_mmu_page.
+
+A similar hole however remains if the modified PDE points to a non-leaf
+page. In this case the gfn can be made to match, but the role does not
+match: the original large 2MB page creates a kvm_mmu_page with direct=1,
+while the new 4KB needs a kvm_mmu_page with direct=0. However,
+kvm_mmu_get_child_sp() does not compare the role, and therefore reuses
+the page.
+
+The next step is installing a leaf (4KB) SPTE on the new path which
+records an rmap entry under the gfn resolved by the walk. But when
+that child is zapped its parent kvm_mmu_page has direct=1 and
+kvm_mmu_page_get_gfn() computes the gfn for the 4KB page as
+sp->gfn + index instead of using sp->shadowed_translation[] (or sp->gfns[]
+in older kernels). It therefore fails to remove the recorded entry.
+
+When the memslot is dropped the shadow page is freed but the rmap
+entry survives, as in the scenario that was already fixed. Code that
+later walks that gfn (dirty logging, MMU notifier invalidation, and
+so on) dereferences an sptep that lies in the freed page, causing the
+use-after-free.
+
+Fixes: 2032a93d66fa ("KVM: MMU: Don't allocate gfns page for direct mmu pages")
+Reported-by: Hyunwoo Kim <imv4bel@gmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index 0bd0cb8992c9fd..541e199feb9981 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -2453,13 +2453,15 @@ static struct kvm_mmu_page *kvm_mmu_get_child_sp(struct kvm_vcpu *vcpu,
+ u64 *sptep, gfn_t gfn,
+ bool direct, unsigned int access)
+ {
+- union kvm_mmu_page_role role;
++ union kvm_mmu_page_role role = kvm_mmu_child_role(sptep, direct, access);
+
+- if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep) &&
+- spte_to_child_sp(*sptep) && spte_to_child_sp(*sptep)->gfn == gfn)
++ if (is_shadow_present_pte(*sptep) &&
++ !is_large_pte(*sptep) &&
++ spte_to_child_sp(*sptep) &&
++ spte_to_child_sp(*sptep)->gfn == gfn &&
++ spte_to_child_sp(*sptep)->role.word == role.word)
+ return ERR_PTR(-EEXIST);
+
+- role = kvm_mmu_child_role(sptep, direct, access);
+ return kvm_mmu_get_shadow_page(vcpu, gfn, role);
+ }
+
+--
+2.53.0
+
--- /dev/null
+From d0a894862fd2a3c4c2557c6e2b341d2193d8ff9f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2026 15:50:34 +0800
+Subject: lsm: add backing_file LSM hooks
+
+From: Paul Moore <paul@paul-moore.com>
+
+[ Upstream commit 6af36aeb147a06dea47c49859cd6ca5659aeb987 ]
+
+Stacked filesystems such as overlayfs do not currently provide the
+necessary mechanisms for LSMs to properly enforce access controls on the
+mmap() and mprotect() operations. In order to resolve this gap, a LSM
+security blob is being added to the backing_file struct and the following
+new LSM hooks are being created:
+
+ security_backing_file_alloc()
+ security_backing_file_free()
+ security_mmap_backing_file()
+
+The first two hooks are to manage the lifecycle of the LSM security blob
+in the backing_file struct, while the third provides a new mmap() access
+control point for the underlying backing file. It is also expected that
+LSMs will likely want to update their security_file_mprotect() callback
+to address issues with their mprotect() controls, but that does not
+require a change to the security_file_mprotect() LSM hook.
+
+There are a three other small changes to support these new LSM hooks:
+* Pass the user file associated with a backing file down to
+alloc_empty_backing_file() so it can be included in the
+security_backing_file_alloc() hook.
+* Add getter and setter functions for the backing_file struct LSM blob
+as the backing_file struct remains private to fs/file_table.c.
+* Constify the file struct field in the LSM common_audit_data struct to
+better support LSMs that need to pass a const file struct pointer into
+the common LSM audit code.
+
+Thanks to Arnd Bergmann for identifying the missing EXPORT_SYMBOL_GPL()
+and supplying a fixup.
+
+Cc: stable@vger.kernel.org
+Cc: linux-fsdevel@vger.kernel.org
+Cc: linux-unionfs@vger.kernel.org
+Cc: linux-erofs@lists.ozlabs.org
+Reviewed-by: Amir Goldstein <amir73il@gmail.com>
+Reviewed-by: Serge Hallyn <serge@hallyn.com>
+Reviewed-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+[Mainline declares lsm_backing_file_cache in security/lsm.h. Linux 6.18.y
+does not have security/lsm_init.c or security/lsm.h; the cache variable
+is defined locally as static struct kmem_cache *lsm_backing_file_cache in
+security/security.c.]
+Signed-off-by: Cai Xinchen <caixinchen1@huawei.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/backing-file.c | 17 ++++--
+ fs/file_table.c | 27 +++++++--
+ fs/fuse/passthrough.c | 2 +-
+ fs/internal.h | 3 +-
+ fs/overlayfs/dir.c | 2 +-
+ fs/overlayfs/file.c | 2 +-
+ include/linux/backing-file.h | 4 +-
+ include/linux/fs.h | 13 ++++
+ include/linux/lsm_audit.h | 2 +-
+ include/linux/lsm_hook_defs.h | 5 ++
+ include/linux/lsm_hooks.h | 1 +
+ include/linux/security.h | 22 +++++++
+ security/security.c | 109 ++++++++++++++++++++++++++++++++++
+ 13 files changed, 194 insertions(+), 15 deletions(-)
+
+diff --git a/fs/backing-file.c b/fs/backing-file.c
+index 15a7f80310848d..e049a627d78fb9 100644
+--- a/fs/backing-file.c
++++ b/fs/backing-file.c
+@@ -12,6 +12,7 @@
+ #include <linux/backing-file.h>
+ #include <linux/splice.h>
+ #include <linux/mm.h>
++#include <linux/security.h>
+
+ #include "internal.h"
+
+@@ -29,14 +30,15 @@
+ * returned file into a container structure that also stores the stacked
+ * file's path, which can be retrieved using backing_file_user_path().
+ */
+-struct file *backing_file_open(const struct path *user_path, int flags,
++struct file *backing_file_open(const struct file *user_file, int flags,
+ const struct path *real_path,
+ const struct cred *cred)
+ {
++ const struct path *user_path = &user_file->f_path;
+ struct file *f;
+ int error;
+
+- f = alloc_empty_backing_file(flags, cred);
++ f = alloc_empty_backing_file(flags, cred, user_file);
+ if (IS_ERR(f))
+ return f;
+
+@@ -52,15 +54,16 @@ struct file *backing_file_open(const struct path *user_path, int flags,
+ }
+ EXPORT_SYMBOL_GPL(backing_file_open);
+
+-struct file *backing_tmpfile_open(const struct path *user_path, int flags,
++struct file *backing_tmpfile_open(const struct file *user_file, int flags,
+ const struct path *real_parentpath,
+ umode_t mode, const struct cred *cred)
+ {
+ struct mnt_idmap *real_idmap = mnt_idmap(real_parentpath->mnt);
++ const struct path *user_path = &user_file->f_path;
+ struct file *f;
+ int error;
+
+- f = alloc_empty_backing_file(flags, cred);
++ f = alloc_empty_backing_file(flags, cred, user_file);
+ if (IS_ERR(f))
+ return f;
+
+@@ -339,6 +342,12 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
+ vma_set_file(vma, file);
+
+ old_cred = override_creds(ctx->cred);
++ ret = security_mmap_backing_file(vma, file, user_file);
++ if (ret) {
++ revert_creds(old_cred);
++ return ret;
++ }
++
+ ret = vfs_mmap(vma->vm_file, vma);
+ revert_creds(old_cred);
+
+diff --git a/fs/file_table.c b/fs/file_table.c
+index 762f03dcbcd778..987e01da993894 100644
+--- a/fs/file_table.c
++++ b/fs/file_table.c
+@@ -50,6 +50,9 @@ struct backing_file {
+ struct path user_path;
+ freeptr_t bf_freeptr;
+ };
++#ifdef CONFIG_SECURITY
++ void *security;
++#endif
+ };
+
+ #define backing_file(f) container_of(f, struct backing_file, file)
+@@ -66,8 +69,21 @@ void backing_file_set_user_path(struct file *f, const struct path *path)
+ }
+ EXPORT_SYMBOL_GPL(backing_file_set_user_path);
+
++#ifdef CONFIG_SECURITY
++void *backing_file_security(const struct file *f)
++{
++ return backing_file(f)->security;
++}
++
++void backing_file_set_security(struct file *f, void *security)
++{
++ backing_file(f)->security = security;
++}
++#endif /* CONFIG_SECURITY */
++
+ static inline void backing_file_free(struct backing_file *ff)
+ {
++ security_backing_file_free(&ff->file);
+ path_put(&ff->user_path);
+ kmem_cache_free(bfilp_cachep, ff);
+ }
+@@ -288,10 +304,12 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
+ return f;
+ }
+
+-static int init_backing_file(struct backing_file *ff)
++static int init_backing_file(struct backing_file *ff,
++ const struct file *user_file)
+ {
+ memset(&ff->user_path, 0, sizeof(ff->user_path));
+- return 0;
++ backing_file_set_security(&ff->file, NULL);
++ return security_backing_file_alloc(&ff->file, user_file);
+ }
+
+ /*
+@@ -301,7 +319,8 @@ static int init_backing_file(struct backing_file *ff)
+ * This is only for kernel internal use, and the allocate file must not be
+ * installed into file tables or such.
+ */
+-struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
++struct file *alloc_empty_backing_file(int flags, const struct cred *cred,
++ const struct file *user_file)
+ {
+ struct backing_file *ff;
+ int error;
+@@ -318,7 +337,7 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
+
+ /* The f_mode flags must be set before fput(). */
+ ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
+- error = init_backing_file(ff);
++ error = init_backing_file(ff, user_file);
+ if (unlikely(error)) {
+ fput(&ff->file);
+ return ERR_PTR(error);
+diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
+index 72de97c03d0eeb..f2d08ac2459b7e 100644
+--- a/fs/fuse/passthrough.c
++++ b/fs/fuse/passthrough.c
+@@ -167,7 +167,7 @@ struct fuse_backing *fuse_passthrough_open(struct file *file, int backing_id)
+ goto out;
+
+ /* Allocate backing file per fuse file to store fuse path */
+- backing_file = backing_file_open(&file->f_path, file->f_flags,
++ backing_file = backing_file_open(file, file->f_flags,
+ &fb->file->f_path, fb->cred);
+ err = PTR_ERR(backing_file);
+ if (IS_ERR(backing_file)) {
+diff --git a/fs/internal.h b/fs/internal.h
+index 9b2b4d11688023..51107fd515145b 100644
+--- a/fs/internal.h
++++ b/fs/internal.h
+@@ -100,7 +100,8 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
+ */
+ struct file *alloc_empty_file(int flags, const struct cred *cred);
+ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
+-struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
++struct file *alloc_empty_backing_file(int flags, const struct cred *cred,
++ const struct file *user_file);
+ void backing_file_set_user_path(struct file *f, const struct path *path);
+
+ static inline void file_put_write_access(struct file *file)
+diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
+index a5e9ddf3023b39..e924321b64025e 100644
+--- a/fs/overlayfs/dir.c
++++ b/fs/overlayfs/dir.c
+@@ -1355,7 +1355,7 @@ static int ovl_create_tmpfile(struct file *file, struct dentry *dentry,
+ }
+
+ ovl_path_upper(dentry->d_parent, &realparentpath);
+- realfile = backing_tmpfile_open(&file->f_path, flags, &realparentpath,
++ realfile = backing_tmpfile_open(file, flags, &realparentpath,
+ mode, current_cred());
+ err = PTR_ERR_OR_ZERO(realfile);
+ pr_debug("tmpfile/open(%pd2, 0%o) = %i\n", realparentpath.dentry, mode, err);
+diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
+index 7ab2c9daffd017..3fedfdddfa7584 100644
+--- a/fs/overlayfs/file.c
++++ b/fs/overlayfs/file.c
+@@ -48,7 +48,7 @@ static struct file *ovl_open_realfile(const struct file *file,
+ if (!inode_owner_or_capable(real_idmap, realinode))
+ flags &= ~O_NOATIME;
+
+- realfile = backing_file_open(file_user_path(file),
++ realfile = backing_file_open(file,
+ flags, realpath, current_cred());
+ }
+ ovl_revert_creds(old_cred);
+diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h
+index 1476a6ed1bfd77..c939cd222730c4 100644
+--- a/include/linux/backing-file.h
++++ b/include/linux/backing-file.h
+@@ -18,10 +18,10 @@ struct backing_file_ctx {
+ void (*end_write)(struct kiocb *iocb, ssize_t);
+ };
+
+-struct file *backing_file_open(const struct path *user_path, int flags,
++struct file *backing_file_open(const struct file *user_file, int flags,
+ const struct path *real_path,
+ const struct cred *cred);
+-struct file *backing_tmpfile_open(const struct path *user_path, int flags,
++struct file *backing_tmpfile_open(const struct file *user_file, int flags,
+ const struct path *real_parentpath,
+ umode_t mode, const struct cred *cred);
+ ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index 014cb04eefbe6c..f3e798184a58e8 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -2890,6 +2890,19 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode,
+ const struct cred *cred);
+ const struct path *backing_file_user_path(const struct file *f);
+
++#ifdef CONFIG_SECURITY
++void *backing_file_security(const struct file *f);
++void backing_file_set_security(struct file *f, void *security);
++#else
++static inline void *backing_file_security(const struct file *f)
++{
++ return NULL;
++}
++static inline void backing_file_set_security(struct file *f, void *security)
++{
++}
++#endif /* CONFIG_SECURITY */
++
+ /*
+ * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file
+ * stored in ->vm_file is a backing file whose f_inode is on the underlying
+diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
+index 382c56a97bba1d..584db296e43b20 100644
+--- a/include/linux/lsm_audit.h
++++ b/include/linux/lsm_audit.h
+@@ -94,7 +94,7 @@ struct common_audit_data {
+ #endif
+ char *kmod_name;
+ struct lsm_ioctlop_audit *op;
+- struct file *file;
++ const struct file *file;
+ struct lsm_ibpkey_audit *ibpkey;
+ struct lsm_ibendport_audit *ibendport;
+ int reason;
+diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
+index 8c42b4bde09c07..b4958167e38196 100644
+--- a/include/linux/lsm_hook_defs.h
++++ b/include/linux/lsm_hook_defs.h
+@@ -191,6 +191,9 @@ LSM_HOOK(int, 0, file_permission, struct file *file, int mask)
+ LSM_HOOK(int, 0, file_alloc_security, struct file *file)
+ LSM_HOOK(void, LSM_RET_VOID, file_release, struct file *file)
+ LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file)
++LSM_HOOK(int, 0, backing_file_alloc, struct file *backing_file,
++ const struct file *user_file)
++LSM_HOOK(void, LSM_RET_VOID, backing_file_free, struct file *backing_file)
+ LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd,
+ unsigned long arg)
+ LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd,
+@@ -198,6 +201,8 @@ LSM_HOOK(int, 0, file_ioctl_compat, struct file *file, unsigned int cmd,
+ LSM_HOOK(int, 0, mmap_addr, unsigned long addr)
+ LSM_HOOK(int, 0, mmap_file, struct file *file, unsigned long reqprot,
+ unsigned long prot, unsigned long flags)
++LSM_HOOK(int, 0, mmap_backing_file, struct vm_area_struct *vma,
++ struct file *backing_file, struct file *user_file)
+ LSM_HOOK(int, 0, file_mprotect, struct vm_area_struct *vma,
+ unsigned long reqprot, unsigned long prot)
+ LSM_HOOK(int, 0, file_lock, struct file *file, unsigned int cmd)
+diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
+index 79ec5a2bdcca7a..ea4b0f5ca7f0ff 100644
+--- a/include/linux/lsm_hooks.h
++++ b/include/linux/lsm_hooks.h
+@@ -104,6 +104,7 @@ struct security_hook_list {
+ struct lsm_blob_sizes {
+ int lbs_cred;
+ int lbs_file;
++ int lbs_backing_file;
+ int lbs_ib;
+ int lbs_inode;
+ int lbs_sock;
+diff --git a/include/linux/security.h b/include/linux/security.h
+index b64598e5d65d75..e540253624268d 100644
+--- a/include/linux/security.h
++++ b/include/linux/security.h
+@@ -473,11 +473,17 @@ int security_file_permission(struct file *file, int mask);
+ int security_file_alloc(struct file *file);
+ void security_file_release(struct file *file);
+ void security_file_free(struct file *file);
++int security_backing_file_alloc(struct file *backing_file,
++ const struct file *user_file);
++void security_backing_file_free(struct file *backing_file);
+ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+ int security_file_ioctl_compat(struct file *file, unsigned int cmd,
+ unsigned long arg);
+ int security_mmap_file(struct file *file, unsigned long prot,
+ unsigned long flags);
++int security_mmap_backing_file(struct vm_area_struct *vma,
++ struct file *backing_file,
++ struct file *user_file);
+ int security_mmap_addr(unsigned long addr);
+ int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
+ unsigned long prot);
+@@ -1142,6 +1148,15 @@ static inline void security_file_release(struct file *file)
+ static inline void security_file_free(struct file *file)
+ { }
+
++static inline int security_backing_file_alloc(struct file *backing_file,
++ const struct file *user_file)
++{
++ return 0;
++}
++
++static inline void security_backing_file_free(struct file *backing_file)
++{ }
++
+ static inline int security_file_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+ {
+@@ -1161,6 +1176,13 @@ static inline int security_mmap_file(struct file *file, unsigned long prot,
+ return 0;
+ }
+
++static inline int security_mmap_backing_file(struct vm_area_struct *vma,
++ struct file *backing_file,
++ struct file *user_file)
++{
++ return 0;
++}
++
+ static inline int security_mmap_addr(unsigned long addr)
+ {
+ return cap_mmap_addr(addr);
+diff --git a/security/security.c b/security/security.c
+index 603c3c6d5635d8..9285909908ab8a 100644
+--- a/security/security.c
++++ b/security/security.c
+@@ -94,6 +94,7 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX + 1] = {
+ static BLOCKING_NOTIFIER_HEAD(blocking_lsm_notifier_chain);
+
+ static struct kmem_cache *lsm_file_cache;
++static struct kmem_cache *lsm_backing_file_cache;
+ static struct kmem_cache *lsm_inode_cache;
+
+ char *lsm_names;
+@@ -265,6 +266,7 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed)
+
+ lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred);
+ lsm_set_blob_size(&needed->lbs_file, &blob_sizes.lbs_file);
++ lsm_set_blob_size(&needed->lbs_backing_file, &blob_sizes.lbs_backing_file);
+ lsm_set_blob_size(&needed->lbs_ib, &blob_sizes.lbs_ib);
+ /*
+ * The inode blob gets an rcu_head in addition to
+@@ -470,6 +472,7 @@ static void __init ordered_lsm_init(void)
+
+ init_debug("cred blob size = %d\n", blob_sizes.lbs_cred);
+ init_debug("file blob size = %d\n", blob_sizes.lbs_file);
++ init_debug("lsm_backing_file_cache = %d\n", blob_sizes.lbs_backing_file);
+ init_debug("ib blob size = %d\n", blob_sizes.lbs_ib);
+ init_debug("inode blob size = %d\n", blob_sizes.lbs_inode);
+ init_debug("ipc blob size = %d\n", blob_sizes.lbs_ipc);
+@@ -495,6 +498,11 @@ static void __init ordered_lsm_init(void)
+ lsm_file_cache = kmem_cache_create("lsm_file_cache",
+ blob_sizes.lbs_file, 0,
+ SLAB_PANIC, NULL);
++ if (blob_sizes.lbs_backing_file)
++ lsm_backing_file_cache = kmem_cache_create(
++ "lsm_backing_file_cache",
++ blob_sizes.lbs_backing_file,
++ 0, SLAB_PANIC, NULL);
+ if (blob_sizes.lbs_inode)
+ lsm_inode_cache = kmem_cache_create("lsm_inode_cache",
+ blob_sizes.lbs_inode, 0,
+@@ -671,6 +679,30 @@ int unregister_blocking_lsm_notifier(struct notifier_block *nb)
+ }
+ EXPORT_SYMBOL(unregister_blocking_lsm_notifier);
+
++/**
++ * lsm_backing_file_alloc - allocate a composite backing file blob
++ * @backing_file: the backing file
++ *
++ * Allocate the backing file blob for all the modules.
++ *
++ * Returns 0, or -ENOMEM if memory can't be allocated.
++ */
++static int lsm_backing_file_alloc(struct file *backing_file)
++{
++ void *blob;
++
++ if (!lsm_backing_file_cache) {
++ backing_file_set_security(backing_file, NULL);
++ return 0;
++ }
++
++ blob = kmem_cache_zalloc(lsm_backing_file_cache, GFP_KERNEL);
++ backing_file_set_security(backing_file, blob);
++ if (!blob)
++ return -ENOMEM;
++ return 0;
++}
++
+ /**
+ * lsm_blob_alloc - allocate a composite blob
+ * @dest: the destination for the blob
+@@ -2965,6 +2997,57 @@ void security_file_free(struct file *file)
+ }
+ }
+
++/**
++ * security_backing_file_alloc() - Allocate and setup a backing file blob
++ * @backing_file: the backing file
++ * @user_file: the associated user visible file
++ *
++ * Allocate a backing file LSM blob and perform any necessary initialization of
++ * the LSM blob. There will be some operations where the LSM will not have
++ * access to @user_file after this point, so any important state associated
++ * with @user_file that is important to the LSM should be captured in the
++ * backing file's LSM blob.
++ *
++ * LSM's should avoid taking a reference to @user_file in this hook as it will
++ * result in problems later when the system attempts to drop/put the file
++ * references due to a circular dependency.
++ *
++ * Return: Return 0 if the hook is successful, negative values otherwise.
++ */
++int security_backing_file_alloc(struct file *backing_file,
++ const struct file *user_file)
++{
++ int rc;
++
++ rc = lsm_backing_file_alloc(backing_file);
++ if (rc)
++ return rc;
++ rc = call_int_hook(backing_file_alloc, backing_file, user_file);
++ if (unlikely(rc))
++ security_backing_file_free(backing_file);
++
++ return rc;
++}
++
++/**
++ * security_backing_file_free() - Free a backing file blob
++ * @backing_file: the backing file
++ *
++ * Free any LSM state associate with a backing file's LSM blob, including the
++ * blob itself.
++ */
++void security_backing_file_free(struct file *backing_file)
++{
++ void *blob = backing_file_security(backing_file);
++
++ call_void_hook(backing_file_free, backing_file);
++
++ if (blob) {
++ backing_file_set_security(backing_file, NULL);
++ kmem_cache_free(lsm_backing_file_cache, blob);
++ }
++}
++
+ /**
+ * security_file_ioctl() - Check if an ioctl is allowed
+ * @file: associated file
+@@ -3053,6 +3136,32 @@ int security_mmap_file(struct file *file, unsigned long prot,
+ flags);
+ }
+
++/**
++ * security_mmap_backing_file - Check if mmap'ing a backing file is allowed
++ * @vma: the vm_area_struct for the mmap'd region
++ * @backing_file: the backing file being mmap'd
++ * @user_file: the user file being mmap'd
++ *
++ * Check permissions for a mmap operation on a stacked filesystem. This hook
++ * is called after the security_mmap_file() and is responsible for authorizing
++ * the mmap on @backing_file. It is important to note that the mmap operation
++ * on @user_file has already been authorized and the @vma->vm_file has been
++ * set to @backing_file.
++ *
++ * Return: Returns 0 if permission is granted.
++ */
++int security_mmap_backing_file(struct vm_area_struct *vma,
++ struct file *backing_file,
++ struct file *user_file)
++{
++ /* recommended by the stackable filesystem devs */
++ if (WARN_ON_ONCE(!(backing_file->f_mode & FMODE_BACKING)))
++ return -EIO;
++
++ return call_int_hook(mmap_backing_file, vma, backing_file, user_file);
++}
++EXPORT_SYMBOL_GPL(security_mmap_backing_file);
++
+ /**
+ * security_mmap_addr() - Check if mmap'ing an address is allowed
+ * @addr: address
+--
+2.53.0
+
--- /dev/null
+From 57ddf33f22f73696788287ee86f64be3332ed730 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2026 15:50:35 +0800
+Subject: selinux: fix overlayfs mmap() and mprotect() access checks
+
+From: Paul Moore <paul@paul-moore.com>
+
+[ Upstream commit 82544d36b1729153c8aeb179e84750f0c085d3b1 ]
+
+The existing SELinux security model for overlayfs is to allow access if
+the current task is able to access the top level file (the "user" file)
+and the mounter's credentials are sufficient to access the lower
+level file (the "backing" file). Unfortunately, the current code does
+not properly enforce these access controls for both mmap() and mprotect()
+operations on overlayfs filesystems.
+
+This patch makes use of the newly created security_mmap_backing_file()
+LSM hook to provide the missing backing file enforcement for mmap()
+operations, and leverages the backing file API and new LSM blob to
+provide the necessary information to properly enforce the mprotect()
+access controls.
+
+Cc: stable@vger.kernel.org
+Acked-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Cai Xinchen <caixinchen1@huawei.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ security/selinux/hooks.c | 242 ++++++++++++++++++++++--------
+ security/selinux/include/objsec.h | 11 ++
+ 2 files changed, 189 insertions(+), 64 deletions(-)
+
+diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
+index 3da3017ad2ca06..f96ee8f372e3b2 100644
+--- a/security/selinux/hooks.c
++++ b/security/selinux/hooks.c
+@@ -1739,49 +1739,72 @@ static inline int file_path_has_perm(const struct cred *cred,
+ static int bpf_fd_pass(const struct file *file, u32 sid);
+ #endif
+
+-/* Check whether a task can use an open file descriptor to
+- access an inode in a given way. Check access to the
+- descriptor itself, and then use dentry_has_perm to
+- check a particular permission to the file.
+- Access to the descriptor is implicitly granted if it
+- has the same SID as the process. If av is zero, then
+- access to the file is not checked, e.g. for cases
+- where only the descriptor is affected like seek. */
+-static int file_has_perm(const struct cred *cred,
+- struct file *file,
+- u32 av)
++static int __file_has_perm(const struct cred *cred, const struct file *file,
++ u32 av, bool bf_user_file)
++
+ {
+- struct file_security_struct *fsec = selinux_file(file);
+- struct inode *inode = file_inode(file);
+ struct common_audit_data ad;
+- u32 sid = cred_sid(cred);
++ struct inode *inode;
++ u32 ssid = cred_sid(cred);
++ u32 tsid_fd;
+ int rc;
+
+- ad.type = LSM_AUDIT_DATA_FILE;
+- ad.u.file = file;
++ if (bf_user_file) {
++ struct backing_file_security_struct *bfsec;
++ const struct path *path;
+
+- if (sid != fsec->sid) {
+- rc = avc_has_perm(sid, fsec->sid,
+- SECCLASS_FD,
+- FD__USE,
+- &ad);
++ if (WARN_ON(!(file->f_mode & FMODE_BACKING)))
++ return -EIO;
++
++ bfsec = selinux_backing_file(file);
++ path = backing_file_user_path(file);
++ tsid_fd = bfsec->uf_sid;
++ inode = d_inode(path->dentry);
++
++ ad.type = LSM_AUDIT_DATA_PATH;
++ ad.u.path = *path;
++ } else {
++ struct file_security_struct *fsec = selinux_file(file);
++
++ tsid_fd = fsec->sid;
++ inode = file_inode(file);
++
++ ad.type = LSM_AUDIT_DATA_FILE;
++ ad.u.file = file;
++ }
++
++ if (ssid != tsid_fd) {
++ rc = avc_has_perm(ssid, tsid_fd, SECCLASS_FD, FD__USE, &ad);
+ if (rc)
+- goto out;
++ return rc;
+ }
+
+ #ifdef CONFIG_BPF_SYSCALL
+- rc = bpf_fd_pass(file, cred_sid(cred));
++ /* regardless of backing vs user file, use the underlying file here */
++ rc = bpf_fd_pass(file, ssid);
+ if (rc)
+ return rc;
+ #endif
+
+ /* av is zero if only checking access to the descriptor. */
+- rc = 0;
+ if (av)
+- rc = inode_has_perm(cred, inode, av, &ad);
++ return inode_has_perm(cred, inode, av, &ad);
+
+-out:
+- return rc;
++ return 0;
++}
++
++/* Check whether a task can use an open file descriptor to
++ access an inode in a given way. Check access to the
++ descriptor itself, and then use dentry_has_perm to
++ check a particular permission to the file.
++ Access to the descriptor is implicitly granted if it
++ has the same SID as the process. If av is zero, then
++ access to the file is not checked, e.g. for cases
++ where only the descriptor is affected like seek. */
++static inline int file_has_perm(const struct cred *cred,
++ const struct file *file, u32 av)
++{
++ return __file_has_perm(cred, file, av, false);
+ }
+
+ /*
+@@ -3799,6 +3822,17 @@ static int selinux_file_alloc_security(struct file *file)
+ return 0;
+ }
+
++static int selinux_backing_file_alloc(struct file *backing_file,
++ const struct file *user_file)
++{
++ struct backing_file_security_struct *bfsec;
++
++ bfsec = selinux_backing_file(backing_file);
++ bfsec->uf_sid = selinux_file(user_file)->sid;
++
++ return 0;
++}
++
+ /*
+ * Check whether a task has the ioctl permission and cmd
+ * operation to an inode.
+@@ -3916,42 +3950,55 @@ static int selinux_file_ioctl_compat(struct file *file, unsigned int cmd,
+
+ static int default_noexec __ro_after_init;
+
+-static int file_map_prot_check(struct file *file, unsigned long prot, int shared)
++static int __file_map_prot_check(const struct cred *cred,
++ const struct file *file, unsigned long prot,
++ bool shared, bool bf_user_file)
+ {
+- const struct cred *cred = current_cred();
+- u32 sid = cred_sid(cred);
+- int rc = 0;
++ struct inode *inode = NULL;
++ bool prot_exec = prot & PROT_EXEC;
++ bool prot_write = prot & PROT_WRITE;
++
++ if (file) {
++ if (bf_user_file)
++ inode = d_inode(backing_file_user_path(file)->dentry);
++ else
++ inode = file_inode(file);
++ }
++
++ if (default_noexec && prot_exec &&
++ (!file || IS_PRIVATE(inode) || (!shared && prot_write))) {
++ int rc;
++ u32 sid = cred_sid(cred);
+
+- if (default_noexec &&
+- (prot & PROT_EXEC) && (!file || IS_PRIVATE(file_inode(file)) ||
+- (!shared && (prot & PROT_WRITE)))) {
+ /*
+- * We are making executable an anonymous mapping or a
+- * private file mapping that will also be writable.
+- * This has an additional check.
++ * We are making executable an anonymous mapping or a private
++ * file mapping that will also be writable.
+ */
+- rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
+- PROCESS__EXECMEM, NULL);
++ rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, PROCESS__EXECMEM,
++ NULL);
+ if (rc)
+- goto error;
++ return rc;
+ }
+
+ if (file) {
+- /* read access is always possible with a mapping */
++ /* "read" always possible, "write" only if shared */
+ u32 av = FILE__READ;
+-
+- /* write access only matters if the mapping is shared */
+- if (shared && (prot & PROT_WRITE))
++ if (shared && prot_write)
+ av |= FILE__WRITE;
+-
+- if (prot & PROT_EXEC)
++ if (prot_exec)
+ av |= FILE__EXECUTE;
+
+- return file_has_perm(cred, file, av);
++ return __file_has_perm(cred, file, av, bf_user_file);
+ }
+
+-error:
+- return rc;
++ return 0;
++}
++
++static inline int file_map_prot_check(const struct cred *cred,
++ const struct file *file,
++ unsigned long prot, bool shared)
++{
++ return __file_map_prot_check(cred, file, prot, shared, false);
+ }
+
+ static int selinux_mmap_addr(unsigned long addr)
+@@ -3967,36 +4014,80 @@ static int selinux_mmap_addr(unsigned long addr)
+ return rc;
+ }
+
+-static int selinux_mmap_file(struct file *file,
+- unsigned long reqprot __always_unused,
+- unsigned long prot, unsigned long flags)
++static int selinux_mmap_file_common(const struct cred *cred, struct file *file,
++ unsigned long prot, bool shared)
+ {
+- struct common_audit_data ad;
+- int rc;
+-
+ if (file) {
++ int rc;
++ struct common_audit_data ad;
++
+ ad.type = LSM_AUDIT_DATA_FILE;
+ ad.u.file = file;
+- rc = inode_has_perm(current_cred(), file_inode(file),
+- FILE__MAP, &ad);
++ rc = inode_has_perm(cred, file_inode(file), FILE__MAP, &ad);
+ if (rc)
+ return rc;
+ }
+
+- return file_map_prot_check(file, prot,
+- (flags & MAP_TYPE) == MAP_SHARED);
++ return file_map_prot_check(cred, file, prot, shared);
++}
++
++static int selinux_mmap_file(struct file *file,
++ unsigned long reqprot __always_unused,
++ unsigned long prot, unsigned long flags)
++{
++ return selinux_mmap_file_common(current_cred(), file, prot,
++ (flags & MAP_TYPE) == MAP_SHARED);
++}
++
++/**
++ * selinux_mmap_backing_file - Check mmap permissions on a backing file
++ * @vma: memory region
++ * @backing_file: stacked filesystem backing file
++ * @user_file: user visible file
++ *
++ * This is called after selinux_mmap_file() on stacked filesystems, and it
++ * is this function's responsibility to verify access to @backing_file and
++ * setup the SELinux state for possible later use in the mprotect() code path.
++ *
++ * By the time this function is called, mmap() access to @user_file has already
++ * been authorized and @vma->vm_file has been set to point to @backing_file.
++ *
++ * Return zero on success, negative values otherwise.
++ */
++static int selinux_mmap_backing_file(struct vm_area_struct *vma,
++ struct file *backing_file,
++ struct file *user_file __always_unused)
++{
++ unsigned long prot = 0;
++
++ /* translate vma->vm_flags perms into PROT perms */
++ if (vma->vm_flags & VM_READ)
++ prot |= PROT_READ;
++ if (vma->vm_flags & VM_WRITE)
++ prot |= PROT_WRITE;
++ if (vma->vm_flags & VM_EXEC)
++ prot |= PROT_EXEC;
++
++ return selinux_mmap_file_common(backing_file->f_cred, backing_file,
++ prot, vma->vm_flags & VM_SHARED);
+ }
+
+ static int selinux_file_mprotect(struct vm_area_struct *vma,
+ unsigned long reqprot __always_unused,
+ unsigned long prot)
+ {
++ int rc;
+ const struct cred *cred = current_cred();
+ u32 sid = cred_sid(cred);
++ const struct file *file = vma->vm_file;
++ bool backing_file;
++ bool shared = vma->vm_flags & VM_SHARED;
++
++ /* check if we need to trigger the "backing files are awful" mode */
++ backing_file = file && (file->f_mode & FMODE_BACKING);
+
+ if (default_noexec &&
+ (prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) {
+- int rc = 0;
+ /*
+ * We don't use the vma_is_initial_heap() helper as it has
+ * a history of problems and is currently broken on systems
+@@ -4010,11 +4101,15 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
+ vma->vm_end <= vma->vm_mm->brk) {
+ rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
+ PROCESS__EXECHEAP, NULL);
+- } else if (!vma->vm_file && (vma_is_initial_stack(vma) ||
++ if (rc)
++ return rc;
++ } else if (!file && (vma_is_initial_stack(vma) ||
+ vma_is_stack_for_current(vma))) {
+ rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
+ PROCESS__EXECSTACK, NULL);
+- } else if (vma->vm_file && vma->anon_vma) {
++ if (rc)
++ return rc;
++ } else if (file && vma->anon_vma) {
+ /*
+ * We are making executable a file mapping that has
+ * had some COW done. Since pages might have been
+@@ -4022,13 +4117,29 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
+ * modified content. This typically should only
+ * occur for text relocations.
+ */
+- rc = file_has_perm(cred, vma->vm_file, FILE__EXECMOD);
++ rc = __file_has_perm(cred, file, FILE__EXECMOD,
++ backing_file);
++ if (rc)
++ return rc;
++ if (backing_file) {
++ rc = file_has_perm(file->f_cred, file,
++ FILE__EXECMOD);
++ if (rc)
++ return rc;
++ }
+ }
++ }
++
++ rc = __file_map_prot_check(cred, file, prot, shared, backing_file);
++ if (rc)
++ return rc;
++ if (backing_file) {
++ rc = file_map_prot_check(file->f_cred, file, prot, shared);
+ if (rc)
+ return rc;
+ }
+
+- return file_map_prot_check(vma->vm_file, prot, vma->vm_flags&VM_SHARED);
++ return 0;
+ }
+
+ static int selinux_file_lock(struct file *file, unsigned int cmd)
+@@ -7140,6 +7251,7 @@ struct lsm_blob_sizes selinux_blob_sizes __ro_after_init = {
+ .lbs_cred = sizeof(struct cred_security_struct),
+ .lbs_task = sizeof(struct task_security_struct),
+ .lbs_file = sizeof(struct file_security_struct),
++ .lbs_backing_file = sizeof(struct backing_file_security_struct),
+ .lbs_inode = sizeof(struct inode_security_struct),
+ .lbs_ipc = sizeof(struct ipc_security_struct),
+ .lbs_key = sizeof(struct key_security_struct),
+@@ -7363,9 +7475,11 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
+
+ LSM_HOOK_INIT(file_permission, selinux_file_permission),
+ LSM_HOOK_INIT(file_alloc_security, selinux_file_alloc_security),
++ LSM_HOOK_INIT(backing_file_alloc, selinux_backing_file_alloc),
+ LSM_HOOK_INIT(file_ioctl, selinux_file_ioctl),
+ LSM_HOOK_INIT(file_ioctl_compat, selinux_file_ioctl_compat),
+ LSM_HOOK_INIT(mmap_file, selinux_mmap_file),
++ LSM_HOOK_INIT(mmap_backing_file, selinux_mmap_backing_file),
+ LSM_HOOK_INIT(mmap_addr, selinux_mmap_addr),
+ LSM_HOOK_INIT(file_mprotect, selinux_file_mprotect),
+ LSM_HOOK_INIT(file_lock, selinux_file_lock),
+diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
+index 816fde5a5896c1..fcb46793898f5e 100644
+--- a/security/selinux/include/objsec.h
++++ b/security/selinux/include/objsec.h
+@@ -86,6 +86,10 @@ struct file_security_struct {
+ u32 pseqno; /* Policy seqno at the time of file open */
+ };
+
++struct backing_file_security_struct {
++ u32 uf_sid; /* associated user file fsec->sid */
++};
++
+ struct superblock_security_struct {
+ u32 sid; /* SID of file system superblock */
+ u32 def_sid; /* default SID for labeling */
+@@ -190,6 +194,13 @@ static inline struct file_security_struct *selinux_file(const struct file *file)
+ return file->f_security + selinux_blob_sizes.lbs_file;
+ }
+
++static inline struct backing_file_security_struct *
++selinux_backing_file(const struct file *backing_file)
++{
++ void *blob = backing_file_security(backing_file);
++ return blob + selinux_blob_sizes.lbs_backing_file;
++}
++
+ static inline struct inode_security_struct *
+ selinux_inode(const struct inode *inode)
+ {
+--
+2.53.0
+
--- /dev/null
+kvm-x86-fix-shadow-paging-use-after-free-due-to-unex.patch
+lsm-add-backing_file-lsm-hooks.patch
+selinux-fix-overlayfs-mmap-and-mprotect-access-check.patch
--- /dev/null
+From 01314afe47db7d0abc07cfbc9a851f92ed98f451 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2026 12:14:00 +0800
+Subject: eventpoll: drop vestigial __ prefix from ep_remove_{file,epi}()
+
+From: Christian Brauner <brauner@kernel.org>
+
+[ Upstream commit 0feaf644f7180c4a91b6b405a881afbfd958f1cf ]
+
+With __ep_remove() gone, the double-underscore on __ep_remove_file()
+and __ep_remove_epi() no longer contrasts with a __-less parent and
+just reads as noise. Rename both to ep_remove_file() and
+ep_remove_epi(). No functional change.
+
+Signed-off-by: Christian Brauner (Amutable) <brauner@kernel.org>
+Stable-dep-of: a6dc643c6931 ("eventpoll: fix ep_remove struct eventpoll / struct file UAF")
+Signed-off-by: Quentin Schulz <quentin.schulz@cherry.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/eventpoll.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/fs/eventpoll.c b/fs/eventpoll.c
+index 766716c2fd92a0..0a54a42263575f 100644
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -719,7 +719,7 @@ static void ep_free(struct eventpoll *ep)
+ * Called with &file->f_lock held,
+ * returns with it released
+ */
+-static void __ep_remove_file(struct eventpoll *ep, struct epitem *epi,
++static void ep_remove_file(struct eventpoll *ep, struct epitem *epi,
+ struct file *file)
+ {
+ struct epitems_head *to_free = NULL;
+@@ -743,7 +743,7 @@ static void __ep_remove_file(struct eventpoll *ep, struct epitem *epi,
+ free_ephead(to_free);
+ }
+
+-static bool __ep_remove_epi(struct eventpoll *ep, struct epitem *epi)
++static bool ep_remove_epi(struct eventpoll *ep, struct epitem *epi)
+ {
+ lockdep_assert_held(&ep->mtx);
+
+@@ -789,9 +789,9 @@ static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi)
+ spin_unlock(&file->f_lock);
+ return;
+ }
+- __ep_remove_file(ep, epi, file);
++ ep_remove_file(ep, epi, file);
+
+- if (__ep_remove_epi(ep, epi))
++ if (ep_remove_epi(ep, epi))
+ WARN_ON_ONCE(ep_refcount_dec_and_test(ep));
+ }
+
+@@ -1013,8 +1013,8 @@ void eventpoll_release_file(struct file *file)
+ ep_unregister_pollwait(ep, epi);
+
+ spin_lock(&file->f_lock);
+- __ep_remove_file(ep, epi, file);
+- dispose = __ep_remove_epi(ep, epi);
++ ep_remove_file(ep, epi, file);
++ dispose = ep_remove_epi(ep, epi);
+
+ mutex_unlock(&ep->mtx);
+
+--
+2.53.0
+
--- /dev/null
+From 23c26dd42ccf99e2d0574dfc93569898693ac979 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2026 12:14:03 +0800
+Subject: eventpoll: fix ep_remove struct eventpoll / struct file UAF
+
+From: Christian Brauner <brauner@kernel.org>
+
+[ Upstream commit a6dc643c69311677c574a0f17a3f4d66a5f3744b ]
+
+ep_remove() (via ep_remove_file()) cleared file->f_ep under
+file->f_lock but then kept using @file inside the critical section
+(is_file_epoll(), hlist_del_rcu() through the head, spin_unlock).
+A concurrent __fput() taking the eventpoll_release() fastpath in
+that window observed the transient NULL, skipped
+eventpoll_release_file() and ran to f_op->release / file_free().
+
+For the epoll-watches-epoll case, f_op->release is
+ep_eventpoll_release() -> ep_clear_and_put() -> ep_free(), which
+kfree()s the watched struct eventpoll. Its embedded ->refs
+hlist_head is exactly where epi->fllink.pprev points, so the
+subsequent hlist_del_rcu()'s "*pprev = next" scribbles into freed
+kmalloc-192 memory.
+
+In addition, struct file is SLAB_TYPESAFE_BY_RCU, so the slot
+backing @file could be recycled by alloc_empty_file() --
+reinitializing f_lock and f_ep -- while ep_remove() is still
+nominally inside that lock. The upshot is an attacker-controllable
+kmem_cache_free() against the wrong slab cache.
+
+Pin @file via epi_fget() at the top of ep_remove() and gate the
+critical section on the pin succeeding. With the pin held @file
+cannot reach refcount zero, which holds __fput() off and
+transitively keeps the watched struct eventpoll alive across the
+hlist_del_rcu() and the f_lock use, closing both UAFs.
+
+If the pin fails @file has already reached refcount zero and its
+__fput() is in flight. Because we bailed before clearing f_ep,
+that path takes the eventpoll_release() slow path into
+eventpoll_release_file() and blocks on ep->mtx until the waiter
+side's ep_clear_and_put() drops it. The bailed epi's share of
+ep->refcount stays intact, so the trailing ep_refcount_dec_and_test()
+in ep_clear_and_put() cannot free the eventpoll out from under
+eventpoll_release_file(); the orphaned epi is then cleaned up
+there.
+
+A successful pin also proves we are not racing
+eventpoll_release_file() on this epi, so drop the now-redundant
+re-check of epi->dying under f_lock. The cheap lockless
+READ_ONCE(epi->dying) fast-path bailout stays.
+
+Fixes: 58c9b016e128 ("epoll: use refcount to reduce ep_mutex contention")
+Reported-by: Jaeyoung Chung <jjy600901@snu.ac.kr>
+Link: https://patch.msgid.link/20260423-work-epoll-uaf-v1-6-2470f9eec0f5@kernel.org
+Signed-off-by: Christian Brauner (Amutable) <brauner@kernel.org>
+(cherry picked from commit a6dc643c69311677c574a0f17a3f4d66a5f3744b)
+Signed-off-by: Wentao Guan <guanwentao@uniontech.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/eventpoll.c | 16 ++++++++++------
+ 1 file changed, 10 insertions(+), 6 deletions(-)
+
+diff --git a/fs/eventpoll.c b/fs/eventpoll.c
+index fc4668a403c9d3..0e09bddea16a5f 100644
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -801,22 +801,26 @@ static bool ep_remove_epi(struct eventpoll *ep, struct epitem *epi)
+ */
+ static void ep_remove(struct eventpoll *ep, struct epitem *epi)
+ {
+- struct file *file = epi->ffd.file;
++ struct file *file __free(fput) = NULL;
+
+ lockdep_assert_irqs_enabled();
+ lockdep_assert_held(&ep->mtx);
+
+ ep_unregister_pollwait(ep, epi);
+
+- /* sync with eventpoll_release_file() */
++ /* cheap sync with eventpoll_release_file() */
+ if (unlikely(READ_ONCE(epi->dying)))
+ return;
+
+- spin_lock(&file->f_lock);
+- if (epi->dying) {
+- spin_unlock(&file->f_lock);
++ /*
++ * If we manage to grab a reference it means we're not in
++ * eventpoll_release_file() and aren't going to be.
++ */
++ file = epi_fget(epi);
++ if (!file)
+ return;
+- }
++
++ spin_lock(&file->f_lock);
+ ep_remove_file(ep, epi, file);
+
+ if (ep_remove_epi(ep, epi))
+--
+2.53.0
+
--- /dev/null
+From 703f69e2424badc5012bd77ea32f68a917a59063 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2026 12:13:59 +0800
+Subject: eventpoll: kill __ep_remove()
+
+From: Christian Brauner <brauner@kernel.org>
+
+[ Upstream commit e9e5cd40d7c403e19f21d0f7b8b8ba3a76b58330 ]
+
+Remove the boolean conditional in __ep_remove() and restructure the code
+so the check for racing with eventpoll_release_file() are only done in
+the ep_remove_safe() path where they belong.
+
+Link: https://patch.msgid.link/20260423-work-epoll-uaf-v1-3-2470f9eec0f5@kernel.org
+Signed-off-by: Christian Brauner (Amutable) <brauner@kernel.org>
+Stable-dep-of: a6dc643c6931 ("eventpoll: fix ep_remove struct eventpoll / struct file UAF")
+Signed-off-by: Quentin Schulz <quentin.schulz@cherry.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/eventpoll.c | 67 ++++++++++++++++++++++----------------------------
+ 1 file changed, 30 insertions(+), 37 deletions(-)
+
+diff --git a/fs/eventpoll.c b/fs/eventpoll.c
+index ae9cb82764482c..766716c2fd92a0 100644
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -715,49 +715,18 @@ static void ep_free(struct eventpoll *ep)
+ kfree_rcu(ep, rcu);
+ }
+
+-static void __ep_remove_file(struct eventpoll *ep, struct epitem *epi, struct file *file);
+-static bool __ep_remove_epi(struct eventpoll *ep, struct epitem *epi);
+-
+-/*
+- * Removes a "struct epitem" from the eventpoll RB tree and deallocates
+- * all the associated resources. Must be called with "mtx" held.
+- * If the dying flag is set, do the removal only if force is true.
+- * This prevents ep_clear_and_put() from dropping all the ep references
+- * while running concurrently with eventpoll_release_file().
+- * Returns true if the eventpoll can be disposed.
+- */
+-static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
+-{
+- struct file *file = epi->ffd.file;
+-
+- lockdep_assert_irqs_enabled();
+-
+- /*
+- * Removes poll wait queue hooks.
+- */
+- ep_unregister_pollwait(ep, epi);
+-
+- /* Remove the current item from the list of epoll hooks */
+- spin_lock(&file->f_lock);
+- if (epi->dying && !force) {
+- spin_unlock(&file->f_lock);
+- return false;
+- }
+-
+- __ep_remove_file(ep, epi, file);
+- return __ep_remove_epi(ep, epi);
+-}
+-
+ /*
+ * Called with &file->f_lock held,
+ * returns with it released
+ */
+-static void __ep_remove_file(struct eventpoll *ep, struct epitem *epi, struct file *file)
++static void __ep_remove_file(struct eventpoll *ep, struct epitem *epi,
++ struct file *file)
+ {
+ struct epitems_head *to_free = NULL;
+ struct hlist_head *head = file->f_ep;
+
+ lockdep_assert_held(&ep->mtx);
++ lockdep_assert_held(&file->f_lock);
+
+ if (hlist_is_singular_node(&epi->fllink, head)) {
+ /* See eventpoll_release() for details. */
+@@ -804,7 +773,25 @@ static bool __ep_remove_epi(struct eventpoll *ep, struct epitem *epi)
+ */
+ static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi)
+ {
+- if (__ep_remove(ep, epi, false))
++ struct file *file = epi->ffd.file;
++
++ lockdep_assert_irqs_enabled();
++ lockdep_assert_held(&ep->mtx);
++
++ ep_unregister_pollwait(ep, epi);
++
++ /* sync with eventpoll_release_file() */
++ if (unlikely(READ_ONCE(epi->dying)))
++ return;
++
++ spin_lock(&file->f_lock);
++ if (epi->dying) {
++ spin_unlock(&file->f_lock);
++ return;
++ }
++ __ep_remove_file(ep, epi, file);
++
++ if (__ep_remove_epi(ep, epi))
+ WARN_ON_ONCE(ep_refcount_dec_and_test(ep));
+ }
+
+@@ -1013,7 +1000,7 @@ void eventpoll_release_file(struct file *file)
+ spin_lock(&file->f_lock);
+ if (file->f_ep && file->f_ep->first) {
+ epi = hlist_entry(file->f_ep->first, struct epitem, fllink);
+- epi->dying = true;
++ WRITE_ONCE(epi->dying, true);
+ spin_unlock(&file->f_lock);
+
+ /*
+@@ -1022,7 +1009,13 @@ void eventpoll_release_file(struct file *file)
+ */
+ ep = epi->ep;
+ mutex_lock(&ep->mtx);
+- dispose = __ep_remove(ep, epi, true);
++
++ ep_unregister_pollwait(ep, epi);
++
++ spin_lock(&file->f_lock);
++ __ep_remove_file(ep, epi, file);
++ dispose = __ep_remove_epi(ep, epi);
++
+ mutex_unlock(&ep->mtx);
+
+ if (dispose && ep_refcount_dec_and_test(ep))
+--
+2.53.0
+
--- /dev/null
+From ca3b789c9d69a0caf13143935a98832cf0e8f59f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2026 12:14:02 +0800
+Subject: eventpoll: move epi_fget() up
+
+From: Christian Brauner <brauner@kernel.org>
+
+[ Upstream commit 86e87059e6d1fd5115a31949726450ed03c1073b ]
+
+We'll need it when removing files so move it up. No functional change.
+
+Link: https://patch.msgid.link/20260423-work-epoll-uaf-v1-5-2470f9eec0f5@kernel.org
+Signed-off-by: Christian Brauner (Amutable) <brauner@kernel.org>
+Stable-dep-of: a6dc643c6931 ("eventpoll: fix ep_remove struct eventpoll / struct file UAF")
+[file_ref_get(&file->f_ref) from original commit left as
+ atomic_long_inc_not_zero(&file->f_count) due to v6.12.y missing commit
+ 90ee6ed776c0 ("fs: port files to file_ref") and its dependent commit
+ 08ef26ea9ab3 ("fs: add file_ref")]
+Signed-off-by: Quentin Schulz <quentin.schulz@cherry.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/eventpoll.c | 56 +++++++++++++++++++++++++-------------------------
+ 1 file changed, 28 insertions(+), 28 deletions(-)
+
+diff --git a/fs/eventpoll.c b/fs/eventpoll.c
+index db5d7c1d726c83..fc4668a403c9d3 100644
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -715,6 +715,34 @@ static void ep_free(struct eventpoll *ep)
+ kfree_rcu(ep, rcu);
+ }
+
++/*
++ * The ffd.file pointer may be in the process of being torn down due to
++ * being closed, but we may not have finished eventpoll_release() yet.
++ *
++ * Normally, even with the atomic_long_inc_not_zero, the file may have
++ * been free'd and then gotten re-allocated to something else (since
++ * files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU).
++ *
++ * But for epoll, users hold the ep->mtx mutex, and as such any file in
++ * the process of being free'd will block in eventpoll_release_file()
++ * and thus the underlying file allocation will not be free'd, and the
++ * file re-use cannot happen.
++ *
++ * For the same reason we can avoid a rcu_read_lock() around the
++ * operation - 'ffd.file' cannot go away even if the refcount has
++ * reached zero (but we must still not call out to ->poll() functions
++ * etc).
++ */
++static struct file *epi_fget(const struct epitem *epi)
++{
++ struct file *file;
++
++ file = epi->ffd.file;
++ if (!atomic_long_inc_not_zero(&file->f_count))
++ file = NULL;
++ return file;
++}
++
+ /*
+ * Called with &file->f_lock held,
+ * returns with it released
+@@ -886,34 +914,6 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep
+ return res;
+ }
+
+-/*
+- * The ffd.file pointer may be in the process of being torn down due to
+- * being closed, but we may not have finished eventpoll_release() yet.
+- *
+- * Normally, even with the atomic_long_inc_not_zero, the file may have
+- * been free'd and then gotten re-allocated to something else (since
+- * files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU).
+- *
+- * But for epoll, users hold the ep->mtx mutex, and as such any file in
+- * the process of being free'd will block in eventpoll_release_file()
+- * and thus the underlying file allocation will not be free'd, and the
+- * file re-use cannot happen.
+- *
+- * For the same reason we can avoid a rcu_read_lock() around the
+- * operation - 'ffd.file' cannot go away even if the refcount has
+- * reached zero (but we must still not call out to ->poll() functions
+- * etc).
+- */
+-static struct file *epi_fget(const struct epitem *epi)
+-{
+- struct file *file;
+-
+- file = epi->ffd.file;
+- if (!atomic_long_inc_not_zero(&file->f_count))
+- file = NULL;
+- return file;
+-}
+-
+ /*
+ * Differs from ep_eventpoll_poll() in that internal callers already have
+ * the ep->mtx so we need to start from depth=1, such that mutex_lock_nested()
+--
+2.53.0
+
--- /dev/null
+From 177b4d0273b7f3dc4ecb1d9005d98b7e840e1229 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2026 12:14:01 +0800
+Subject: eventpoll: rename ep_remove_safe() back to ep_remove()
+
+From: Christian Brauner <brauner@kernel.org>
+
+[ Upstream commit 0bade234723e40e4937be912e105785d6a51464e ]
+
+The current name is just confusing and doesn't clarify anything.
+
+Link: https://patch.msgid.link/20260423-work-epoll-uaf-v1-4-2470f9eec0f5@kernel.org
+Signed-off-by: Christian Brauner (Amutable) <brauner@kernel.org>
+Stable-dep-of: a6dc643c6931 ("eventpoll: fix ep_remove struct eventpoll / struct file UAF")
+Signed-off-by: Quentin Schulz <quentin.schulz@cherry.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/eventpoll.c | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/fs/eventpoll.c b/fs/eventpoll.c
+index 0a54a42263575f..db5d7c1d726c83 100644
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -771,7 +771,7 @@ static bool ep_remove_epi(struct eventpoll *ep, struct epitem *epi)
+ /*
+ * ep_remove variant for callers owing an additional reference to the ep
+ */
+-static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi)
++static void ep_remove(struct eventpoll *ep, struct epitem *epi)
+ {
+ struct file *file = epi->ffd.file;
+
+@@ -818,7 +818,7 @@ static void ep_clear_and_put(struct eventpoll *ep)
+
+ /*
+ * Walks through the whole tree and try to free each "struct epitem".
+- * Note that ep_remove_safe() will not remove the epitem in case of a
++ * Note that ep_remove() will not remove the epitem in case of a
+ * racing eventpoll_release_file(); the latter will do the removal.
+ * At this point we are sure no poll callbacks will be lingering around.
+ * Since we still own a reference to the eventpoll struct, the loop can't
+@@ -827,7 +827,7 @@ static void ep_clear_and_put(struct eventpoll *ep)
+ for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = next) {
+ next = rb_next(rbp);
+ epi = rb_entry(rbp, struct epitem, rbn);
+- ep_remove_safe(ep, epi);
++ ep_remove(ep, epi);
+ cond_resched();
+ }
+
+@@ -1497,21 +1497,21 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
+ mutex_unlock(&tep->mtx);
+
+ /*
+- * ep_remove_safe() calls in the later error paths can't lead to
++ * ep_remove() calls in the later error paths can't lead to
+ * ep_free() as the ep file itself still holds an ep reference.
+ */
+ ep_get(ep);
+
+ /* now check if we've created too many backpaths */
+ if (unlikely(full_check && reverse_path_check())) {
+- ep_remove_safe(ep, epi);
++ ep_remove(ep, epi);
+ return -EINVAL;
+ }
+
+ if (epi->event.events & EPOLLWAKEUP) {
+ error = ep_create_wakeup_source(epi);
+ if (error) {
+- ep_remove_safe(ep, epi);
++ ep_remove(ep, epi);
+ return error;
+ }
+ }
+@@ -1535,7 +1535,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
+ * high memory pressure.
+ */
+ if (unlikely(!epq.epi)) {
+- ep_remove_safe(ep, epi);
++ ep_remove(ep, epi);
+ return -ENOMEM;
+ }
+
+@@ -2227,7 +2227,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
+ * The eventpoll itself is still alive: the refcount
+ * can't go to zero here.
+ */
+- ep_remove_safe(ep, epi);
++ ep_remove(ep, epi);
+ error = 0;
+ } else {
+ error = -ENOENT;
+--
+2.53.0
+
--- /dev/null
+From efc3e478b693c4904b75b800ca605fb623b6510e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2026 12:13:58 +0800
+Subject: eventpoll: split __ep_remove()
+
+From: Christian Brauner <brauner@kernel.org>
+
+[ Upstream commit 0f7bdfd413000985de09fc39eb9efa1e091a3ce0 ]
+
+Split __ep_remove() to delineate file removal from epoll item removal.
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Link: https://patch.msgid.link/20260423-work-epoll-uaf-v1-2-2470f9eec0f5@kernel.org
+Signed-off-by: Christian Brauner (Amutable) <brauner@kernel.org>
+Stable-dep-of: a6dc643c6931 ("eventpoll: fix ep_remove struct eventpoll / struct file UAF")
+Signed-off-by: Quentin Schulz <quentin.schulz@cherry.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/eventpoll.c | 27 +++++++++++++++++++++++----
+ 1 file changed, 23 insertions(+), 4 deletions(-)
+
+diff --git a/fs/eventpoll.c b/fs/eventpoll.c
+index 4f05d12a05031a..ae9cb82764482c 100644
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -715,6 +715,9 @@ static void ep_free(struct eventpoll *ep)
+ kfree_rcu(ep, rcu);
+ }
+
++static void __ep_remove_file(struct eventpoll *ep, struct epitem *epi, struct file *file);
++static bool __ep_remove_epi(struct eventpoll *ep, struct epitem *epi);
++
+ /*
+ * Removes a "struct epitem" from the eventpoll RB tree and deallocates
+ * all the associated resources. Must be called with "mtx" held.
+@@ -726,8 +729,6 @@ static void ep_free(struct eventpoll *ep)
+ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
+ {
+ struct file *file = epi->ffd.file;
+- struct epitems_head *to_free;
+- struct hlist_head *head;
+
+ lockdep_assert_irqs_enabled();
+
+@@ -743,8 +744,21 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
+ return false;
+ }
+
+- to_free = NULL;
+- head = file->f_ep;
++ __ep_remove_file(ep, epi, file);
++ return __ep_remove_epi(ep, epi);
++}
++
++/*
++ * Called with &file->f_lock held,
++ * returns with it released
++ */
++static void __ep_remove_file(struct eventpoll *ep, struct epitem *epi, struct file *file)
++{
++ struct epitems_head *to_free = NULL;
++ struct hlist_head *head = file->f_ep;
++
++ lockdep_assert_held(&ep->mtx);
++
+ if (hlist_is_singular_node(&epi->fllink, head)) {
+ /* See eventpoll_release() for details. */
+ WRITE_ONCE(file->f_ep, NULL);
+@@ -758,6 +772,11 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
+ hlist_del_rcu(&epi->fllink);
+ spin_unlock(&file->f_lock);
+ free_ephead(to_free);
++}
++
++static bool __ep_remove_epi(struct eventpoll *ep, struct epitem *epi)
++{
++ lockdep_assert_held(&ep->mtx);
+
+ rb_erase_cached(&epi->rbn, &ep->rbr);
+
+--
+2.53.0
+
--- /dev/null
+From 7de44d9622e8a472637993fbec6b4869a01d0a1f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2026 12:13:57 +0800
+Subject: eventpoll: use hlist_is_singular_node() in __ep_remove()
+
+From: Christian Brauner <brauner@kernel.org>
+
+[ Upstream commit 3d9fd0abc94d8cd430cc7cd7d37ce5e5aae2cd2b ]
+
+Replace the open-coded "epi is the only entry in file->f_ep" check
+with hlist_is_singular_node(). Same semantics, and the helper avoids
+the head-cacheline access in the common false case.
+
+Link: https://patch.msgid.link/20260423-work-epoll-uaf-v1-1-2470f9eec0f5@kernel.org
+Signed-off-by: Christian Brauner (Amutable) <brauner@kernel.org>
+Stable-dep-of: a6dc643c6931 ("eventpoll: fix ep_remove struct eventpoll / struct file UAF")
+Signed-off-by: Quentin Schulz <quentin.schulz@cherry.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/eventpoll.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/eventpoll.c b/fs/eventpoll.c
+index 8a556560a5b2f2..4f05d12a05031a 100644
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -745,7 +745,7 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
+
+ to_free = NULL;
+ head = file->f_ep;
+- if (head->first == &epi->fllink && !epi->fllink.next) {
++ if (hlist_is_singular_node(&epi->fllink, head)) {
+ /* See eventpoll_release() for details. */
+ WRITE_ONCE(file->f_ep, NULL);
+ if (!is_file_epoll(file)) {
+--
+2.53.0
+
--- /dev/null
+From 931684e9bb54f5fbfb5a4e7270852d54b8efbf32 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2026 12:13:56 +0800
+Subject: file: add fput() cleanup helper
+
+From: Christian Brauner <brauner@kernel.org>
+
+[ Upstream commit 257b1c2c78c25643526609dee0c15f1544eb3252 ]
+
+Add a simple helper to put a file reference.
+
+Link: https://lore.kernel.org/r/20240719-work-mount-namespace-v1-4-834113cab0d2@kernel.org
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+(cherry picked from commit 257b1c2c78c25643526609dee0c15f1544eb3252)
+Signed-off-by: Wentao Guan <guanwentao@uniontech.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/file.h | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/include/linux/file.h b/include/linux/file.h
+index 6e9099d2934368..221ba0888107a0 100644
+--- a/include/linux/file.h
++++ b/include/linux/file.h
+@@ -11,6 +11,7 @@
+ #include <linux/posix_types.h>
+ #include <linux/errno.h>
+ #include <linux/cleanup.h>
++#include <linux/err.h>
+
+ struct file;
+
+@@ -93,6 +94,7 @@ extern void put_unused_fd(unsigned int fd);
+
+ DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T),
+ get_unused_fd_flags(flags), unsigned flags)
++DEFINE_FREE(fput, struct file *, if (!IS_ERR_OR_NULL(_T)) fput(_T))
+
+ extern void fd_install(unsigned int fd, struct file *file);
+
+--
+2.53.0
+
--- /dev/null
+From b0d2e80edac5f4cf1d3626e24167f5606e36d335 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2026 13:24:24 +0200
+Subject: KVM: x86: Fix shadow paging use-after-free due to unexpected role
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 81ccda30b4e83d8f5cc4fd50503c44e3a33abfeb upstream.
+
+Commit 0cb2af2ea66ad ("KVM: x86: Fix shadow paging use-after-free due
+to unexpected GFN") fixed a shadow paging mismatch between stored and
+computed GFNs; the bug could be triggered by changing a PDE mapping from
+outside the guest, and then deleting a memslot. The rmap_remove()
+call would miss entries created after the PDE change because the GFN
+of the leaf SPTE does not match the GFN of the struct kvm_mmu_page.
+
+A similar hole however remains if the modified PDE points to a non-leaf
+page. In this case the gfn can be made to match, but the role does not
+match: the original large 2MB page creates a kvm_mmu_page with direct=1,
+while the new 4KB needs a kvm_mmu_page with direct=0. However,
+kvm_mmu_get_child_sp() does not compare the role, and therefore reuses
+the page.
+
+The next step is installing a leaf (4KB) SPTE on the new path which
+records an rmap entry under the gfn resolved by the walk. But when
+that child is zapped its parent kvm_mmu_page has direct=1 and
+kvm_mmu_page_get_gfn() computes the gfn for the 4KB page as
+sp->gfn + index instead of using sp->shadowed_translation[] (or sp->gfns[]
+in older kernels). It therefore fails to remove the recorded entry.
+
+When the memslot is dropped the shadow page is freed but the rmap
+entry survives, as in the scenario that was already fixed. Code that
+later walks that gfn (dirty logging, MMU notifier invalidation, and
+so on) dereferences an sptep that lies in the freed page, causing the
+use-after-free.
+
+Fixes: 2032a93d66fa ("KVM: MMU: Don't allocate gfns page for direct mmu pages")
+Reported-by: Hyunwoo Kim <imv4bel@gmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index 774bc26b8235e3..8e9ba7eaeaf3a0 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -2337,13 +2337,15 @@ static struct kvm_mmu_page *kvm_mmu_get_child_sp(struct kvm_vcpu *vcpu,
+ u64 *sptep, gfn_t gfn,
+ bool direct, unsigned int access)
+ {
+- union kvm_mmu_page_role role;
++ union kvm_mmu_page_role role = kvm_mmu_child_role(sptep, direct, access);
+
+- if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep) &&
+- spte_to_child_sp(*sptep) && spte_to_child_sp(*sptep)->gfn == gfn)
++ if (is_shadow_present_pte(*sptep) &&
++ !is_large_pte(*sptep) &&
++ spte_to_child_sp(*sptep) &&
++ spte_to_child_sp(*sptep)->gfn == gfn &&
++ spte_to_child_sp(*sptep)->role.word == role.word)
+ return ERR_PTR(-EEXIST);
+
+- role = kvm_mmu_child_role(sptep, direct, access);
+ return kvm_mmu_get_shadow_page(vcpu, gfn, role);
+ }
+
+--
+2.53.0
+
--- /dev/null
+From 84d6c1556b8cbc87fbc8b37bbeb34e49a55c77ed Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2026 13:24:25 +0200
+Subject: KVM: x86/mmu: Ensure hugepage is in by slot before checking max
+ mapping level
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit ef057cbf825e03b63f6edf5980f96abf3c53089d upstream.
+
+When recovering hugepages in the shadow MMU, verify that the base gfn of
+the shadow page is actually contained within the target memslot, *before*
+querying the max mapping level given the shadow page's gfn. Failure to
+pre-check the validity of the gfn can lead to an out-of-bounds access to
+the slot's lpage_info (which typically manifests as a host #PF because the
+lpage_info is vmalloc'd) if the guest creates a hugepage mapping (in its
+PTEs) that extends "below" the bounds of a memslot.
+
+When faulting in memory for a guest, and the size of the guest mapping is
+greater than KVM's (current) max mapping, then KVM will create a "direct"
+shadow page (direct in that there are no gPTEs to shadow, and so the target
+gfn is a direct calculation given the base gfn of the shadow page). The
+hugepage recovery flow looks for such direct shadow pages, as forcing 4KiB
+mappings when dirty logging generates the guest > host mapping size case.
+When the 4KiB restriction is lifted, then KVM can replace the shadow page
+with a hugepage.
+
+But if KVM originally used a smaller mapping than the guest because the
+range of memory covered by the guest hugepage exceeds the bounds of a
+memslot, then KVM will link a direct shadow page with a gfn that is outside
+the bounds of the memslot being used to fault in memory. The rmap entry
+added for the leaf mapping is correct and within bounds, but the gfn of the
+leaf SPTE's parent shadow page will be out of bounds.
+
+ BUG: unable to handle page fault for address: ffffc90000806ffc
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not-present page
+ PGD 100000067 P4D 100000067 PUD 1002a7067 PMD 10612f067 PTE 0
+ Oops: Oops: 0000 [#1] SMP
+ CPU: 13 UID: 1000 PID: 757 Comm: mmu_stress_test Not tainted 7.1.0-rc1-48ce1e26eace-x86_pir_to_irr_comments-vm #341 PREEMPT
+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
+ RIP: 0010:kvm_mmu_max_mapping_level+0x79/0x2b0 [kvm]
+ Call Trace:
+ <TASK>
+ kvm_mmu_recover_huge_pages+0x21b/0x320 [kvm]
+ kvm_set_memslot+0x1ee/0x590 [kvm]
+ kvm_set_memory_region.part.0+0x3a1/0x4d0 [kvm]
+ kvm_vm_ioctl+0x9bf/0x15d0 [kvm]
+ __x64_sys_ioctl+0x8a/0xd0
+ do_syscall_64+0xb7/0xbb0
+ entry_SYSCALL_64_after_hwframe+0x4b/0x53
+ RIP: 0033:0x7f21c0f1a9bf
+ </TASK>
+
+Don't bother pre-checking the bounds of the potential hugepage, i.e. don't
+check that e.g. sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level + 1) is also
+within the memslot, as the checks performed by kvm_mmu_max_mapping_level()
+are a superset of the basic bounds checks. I.e. pre-checking the full
+range would be a dubious micro-optimization.
+
+Fixes: 9eba50f8d7fc ("KVM: x86/mmu: Consult max mapping level when zapping collapsible SPTEs")
+Cc: stable@vger.kernel.org
+Cc: David Matlack <dmatlack@google.com>
+Cc: James Houghton <jthoughton@google.com>
+Cc: Alexander Bulekov <bkov@amazon.com>
+Cc: Fred Griffoul <fgriffo@amazon.co.uk>
+Cc: Alexander Graf <graf@amazon.de>
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Filippo Sironi <sironi@amazon.de>
+Cc: Ivan Orlov <iorlov@amazon.co.uk>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 18 ++++++++++++------
+ include/linux/kvm_host.h | 7 ++++++-
+ 2 files changed, 18 insertions(+), 7 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index 8e9ba7eaeaf3a0..2453524ea4a1fc 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -6582,13 +6582,19 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
+ sp = sptep_to_sp(sptep);
+
+ /*
+- * We cannot do huge page mapping for indirect shadow pages,
+- * which are found on the last rmap (level = 1) when not using
+- * tdp; such shadow pages are synced with the page table in
+- * the guest, and the guest page table is using 4K page size
+- * mapping if the indirect sp has level = 1.
++ * Direct shadow page can be replaced by a hugepage if the host
++ * mapping level allows it and the memslot maps all of the host
++ * hugepage. Note! If the memslot maps only part of the
++ * hugepage, sp->gfn may be below slot->base_gfn, and querying
++ * the max mapping level would cause an out-of-bounds lpage_info
++ * access. So the gfn bounds check *must* be done first.
++ *
++ * Indirect shadow pages are created when the guest page tables
++ * are using 4K pages. Since the host mapping is always
++ * constrained by the page size in the guest, indirect shadow
++ * pages are never collapsible.
+ */
+- if (sp->role.direct &&
++ if (sp->role.direct && is_gfn_in_memslot(slot, sp->gfn) &&
+ sp->role.level < kvm_mmu_max_mapping_level(kvm, slot, sp->gfn,
+ PG_LEVEL_NUM)) {
+ kvm_zap_one_rmap_spte(kvm, rmap_head, sptep);
+diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
+index ab09b08967bba4..57c7b4009f5758 100644
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -1650,6 +1650,11 @@ int kvm_request_irq_source_id(struct kvm *kvm);
+ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
+ bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
+
++static inline bool is_gfn_in_memslot(const struct kvm_memory_slot *slot, gfn_t gfn)
++{
++ return gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages;
++}
++
+ /*
+ * Returns a pointer to the memslot if it contains gfn.
+ * Otherwise returns NULL.
+@@ -1660,7 +1665,7 @@ try_get_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
+ if (!slot)
+ return NULL;
+
+- if (gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages)
++ if (is_gfn_in_memslot(slot, gfn))
+ return slot;
+ else
+ return NULL;
+--
+2.53.0
+
--- /dev/null
+From 47bc6f0db8f685814a14df5e3d28ed384b5107df Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 25 Jun 2026 14:30:39 +0200
+Subject: Revert "ptp: add testptp mask test"
+
+From: Petr Machata <petrm@nvidia.com>
+
+This reverts commit 59ac47a0275fcd5a7637c3d5da20b0905563c7f5, which is
+commit 26285e689c6cd2cf3849568c83b2ebe53f467143 upstream.
+
+The reverted commit extends the selftest to test timestamp event queue mask
+manipulation in testptp. It exercises masks PTP_MASK_CLEAR_ALL and
+PTP_MASK_EN_SINGLE, introduced in commit c5a445b1e934 ("ptp: support event
+queue reader channel masks"), which is not on this stable branch. The test
+case thus cannot be built against this tree's own UAPI headers.
+
+The reverted commit was introduced to resolve a missing dependency of
+commit 8d9f22c570ba ("testptp: Add option to open PHC in readonly mode"),
+which is 76868642e427 upstream. The only conflict between the two is the
+getopt string, and there is otherwise no direct dependency between the two.
+
+This patch therefore reverts the cited commit, with hand-resolving the
+getopt string to include 'r' (as introduced by c6dc458227a3), but not
+'F' (introduced by c1c50689799d).
+
+Reported-by: Yong Wang <yongwang@nvidia.com>
+Signed-off-by: Petr Machata <petrm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/ptp/testptp.c | 19 +------------------
+ 1 file changed, 1 insertion(+), 18 deletions(-)
+
+diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
+index e0aed424fe42d5..8f05212f82329a 100644
+--- a/tools/testing/selftests/ptp/testptp.c
++++ b/tools/testing/selftests/ptp/testptp.c
+@@ -121,7 +121,6 @@ static void usage(char *progname)
+ " -d name device to open\n"
+ " -e val read 'val' external time stamp events\n"
+ " -f val adjust the ptp clock frequency by 'val' ppb\n"
+- " -F chan Enable single channel mask and keep device open for debugfs verification.\n"
+ " -g get the ptp clock time\n"
+ " -h prints this message\n"
+ " -i val index for event/trigger\n"
+@@ -190,7 +189,6 @@ int main(int argc, char *argv[])
+ int seconds = 0;
+ int readonly = 0;
+ int settime = 0;
+- int channel = -1;
+
+ int64_t t1, t2, tp;
+ int64_t interval, offset;
+@@ -200,7 +198,7 @@ int main(int argc, char *argv[])
+
+ progname = strrchr(argv[0], '/');
+ progname = progname ? 1+progname : argv[0];
+- while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xz"))) {
++ while (EOF != (c = getopt(argc, argv, "cd:e:f:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xz"))) {
+ switch (c) {
+ case 'c':
+ capabilities = 1;
+@@ -214,9 +212,6 @@ int main(int argc, char *argv[])
+ case 'f':
+ adjfreq = atoi(optarg);
+ break;
+- case 'F':
+- channel = atoi(optarg);
+- break;
+ case 'g':
+ gettime = 1;
+ break;
+@@ -618,18 +613,6 @@ int main(int argc, char *argv[])
+ free(xts);
+ }
+
+- if (channel >= 0) {
+- if (ioctl(fd, PTP_MASK_CLEAR_ALL)) {
+- perror("PTP_MASK_CLEAR_ALL");
+- } else if (ioctl(fd, PTP_MASK_EN_SINGLE, (unsigned int *)&channel)) {
+- perror("PTP_MASK_EN_SINGLE");
+- } else {
+- printf("Channel %d exclusively enabled. Check on debugfs.\n", channel);
+- printf("Press any key to continue\n.");
+- getchar();
+- }
+- }
+-
+ close(fd);
+ return 0;
+ }
+--
+2.53.0
+
--- /dev/null
+file-add-fput-cleanup-helper.patch
+eventpoll-use-hlist_is_singular_node-in-__ep_remove.patch
+eventpoll-split-__ep_remove.patch
+eventpoll-kill-__ep_remove.patch
+eventpoll-drop-vestigial-__-prefix-from-ep_remove_-f.patch
+eventpoll-rename-ep_remove_safe-back-to-ep_remove.patch
+eventpoll-move-epi_fget-up.patch
+eventpoll-fix-ep_remove-struct-eventpoll-struct-file.patch
+kvm-x86-fix-shadow-paging-use-after-free-due-to-unex.patch
+kvm-x86-mmu-ensure-hugepage-is-in-by-slot-before-che.patch
+revert-ptp-add-testptp-mask-test.patch
--- /dev/null
+From b0d49a30728d6df9f36ca076f38160317e784a1c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2026 13:22:50 +0200
+Subject: KVM: x86: Fix shadow paging use-after-free due to unexpected role
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 81ccda30b4e83d8f5cc4fd50503c44e3a33abfeb upstream.
+
+Commit 0cb2af2ea66ad ("KVM: x86: Fix shadow paging use-after-free due
+to unexpected GFN") fixed a shadow paging mismatch between stored and
+computed GFNs; the bug could be triggered by changing a PDE mapping from
+outside the guest, and then deleting a memslot. The rmap_remove()
+call would miss entries created after the PDE change because the GFN
+of the leaf SPTE does not match the GFN of the struct kvm_mmu_page.
+
+A similar hole however remains if the modified PDE points to a non-leaf
+page. In this case the gfn can be made to match, but the role does not
+match: the original large 2MB page creates a kvm_mmu_page with direct=1,
+while the new 4KB needs a kvm_mmu_page with direct=0. However,
+kvm_mmu_get_child_sp() does not compare the role, and therefore reuses
+the page.
+
+The next step is installing a leaf (4KB) SPTE on the new path which
+records an rmap entry under the gfn resolved by the walk. But when
+that child is zapped its parent kvm_mmu_page has direct=1 and
+kvm_mmu_page_get_gfn() computes the gfn for the 4KB page as
+sp->gfn + index instead of using sp->shadowed_translation[] (or sp->gfns[]
+in older kernels). It therefore fails to remove the recorded entry.
+
+When the memslot is dropped the shadow page is freed but the rmap
+entry survives, as in the scenario that was already fixed. Code that
+later walks that gfn (dirty logging, MMU notifier invalidation, and
+so on) dereferences an sptep that lies in the freed page, causing the
+use-after-free.
+
+Fixes: 2032a93d66fa ("KVM: MMU: Don't allocate gfns page for direct mmu pages")
+Reported-by: Hyunwoo Kim <imv4bel@gmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index 729240bc00a269..3e1218abbbb757 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -2453,13 +2453,15 @@ static struct kvm_mmu_page *kvm_mmu_get_child_sp(struct kvm_vcpu *vcpu,
+ u64 *sptep, gfn_t gfn,
+ bool direct, unsigned int access)
+ {
+- union kvm_mmu_page_role role;
++ union kvm_mmu_page_role role = kvm_mmu_child_role(sptep, direct, access);
+
+- if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep) &&
+- spte_to_child_sp(*sptep) && spte_to_child_sp(*sptep)->gfn == gfn)
++ if (is_shadow_present_pte(*sptep) &&
++ !is_large_pte(*sptep) &&
++ spte_to_child_sp(*sptep) &&
++ spte_to_child_sp(*sptep)->gfn == gfn &&
++ spte_to_child_sp(*sptep)->role.word == role.word)
+ return ERR_PTR(-EEXIST);
+
+- role = kvm_mmu_child_role(sptep, direct, access);
+ return kvm_mmu_get_shadow_page(vcpu, gfn, role);
+ }
+
+--
+2.53.0
+
--- /dev/null
+kvm-x86-fix-shadow-paging-use-after-free-due-to-unex.patch
--- /dev/null
+From 0559cf5a1599f89cb2255b2eef4d1ad721059e30 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Jun 2026 13:22:32 +0200
+Subject: KVM: x86: Fix shadow paging use-after-free due to unexpected role
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 81ccda30b4e83d8f5cc4fd50503c44e3a33abfeb upstream.
+
+Commit 0cb2af2ea66ad ("KVM: x86: Fix shadow paging use-after-free due
+to unexpected GFN") fixed a shadow paging mismatch between stored and
+computed GFNs; the bug could be triggered by changing a PDE mapping from
+outside the guest, and then deleting a memslot. The rmap_remove()
+call would miss entries created after the PDE change because the GFN
+of the leaf SPTE does not match the GFN of the struct kvm_mmu_page.
+
+A similar hole however remains if the modified PDE points to a non-leaf
+page. In this case the gfn can be made to match, but the role does not
+match: the original large 2MB page creates a kvm_mmu_page with direct=1,
+while the new 4KB needs a kvm_mmu_page with direct=0. However,
+kvm_mmu_get_child_sp() does not compare the role, and therefore reuses
+the page.
+
+The next step is installing a leaf (4KB) SPTE on the new path which
+records an rmap entry under the gfn resolved by the walk. But when
+that child is zapped its parent kvm_mmu_page has direct=1 and
+kvm_mmu_page_get_gfn() computes the gfn for the 4KB page as
+sp->gfn + index instead of using sp->shadowed_translation[] (or sp->gfns[]
+in older kernels). It therefore fails to remove the recorded entry.
+
+When the memslot is dropped the shadow page is freed but the rmap
+entry survives, as in the scenario that was already fixed. Code that
+later walks that gfn (dirty logging, MMU notifier invalidation, and
+so on) dereferences an sptep that lies in the freed page, causing the
+use-after-free.
+
+Fixes: 2032a93d66fa ("KVM: MMU: Don't allocate gfns page for direct mmu pages")
+Reported-by: Hyunwoo Kim <imv4bel@gmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index f0144ae8d891d3..bb204d3c66b7e9 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -2453,13 +2453,15 @@ static struct kvm_mmu_page *kvm_mmu_get_child_sp(struct kvm_vcpu *vcpu,
+ u64 *sptep, gfn_t gfn,
+ bool direct, unsigned int access)
+ {
+- union kvm_mmu_page_role role;
++ union kvm_mmu_page_role role = kvm_mmu_child_role(sptep, direct, access);
+
+- if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep) &&
+- spte_to_child_sp(*sptep) && spte_to_child_sp(*sptep)->gfn == gfn)
++ if (is_shadow_present_pte(*sptep) &&
++ !is_large_pte(*sptep) &&
++ spte_to_child_sp(*sptep) &&
++ spte_to_child_sp(*sptep)->gfn == gfn &&
++ spte_to_child_sp(*sptep)->role.word == role.word)
+ return ERR_PTR(-EEXIST);
+
+- role = kvm_mmu_child_role(sptep, direct, access);
+ return kvm_mmu_get_shadow_page(vcpu, gfn, role);
+ }
+
+--
+2.53.0
+
--- /dev/null
+kvm-x86-fix-shadow-paging-use-after-free-due-to-unex.patch