]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.12-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 17 Oct 2025 12:00:41 +0000 (14:00 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 17 Oct 2025 12:00:41 +0000 (14:00 +0200)
added patches:
mm-rmap-fix-soft-dirty-and-uffd-wp-bit-loss-when-remapping-zero-filled-mthp-subpage-to-shared-zeropage.patch
s390-bpf-centralize-frame-offset-calculations.patch
s390-bpf-describe-the-frame-using-a-struct-instead-of-constants.patch
s390-bpf-write-back-tail-call-counter-for-bpf_pseudo_call.patch
s390-bpf-write-back-tail-call-counter-for-bpf_tramp_f_call_orig.patch

queue-6.12/mm-rmap-fix-soft-dirty-and-uffd-wp-bit-loss-when-remapping-zero-filled-mthp-subpage-to-shared-zeropage.patch [new file with mode: 0644]
queue-6.12/s390-bpf-centralize-frame-offset-calculations.patch [new file with mode: 0644]
queue-6.12/s390-bpf-describe-the-frame-using-a-struct-instead-of-constants.patch [new file with mode: 0644]
queue-6.12/s390-bpf-write-back-tail-call-counter-for-bpf_pseudo_call.patch [new file with mode: 0644]
queue-6.12/s390-bpf-write-back-tail-call-counter-for-bpf_tramp_f_call_orig.patch [new file with mode: 0644]
queue-6.12/series

diff --git a/queue-6.12/mm-rmap-fix-soft-dirty-and-uffd-wp-bit-loss-when-remapping-zero-filled-mthp-subpage-to-shared-zeropage.patch b/queue-6.12/mm-rmap-fix-soft-dirty-and-uffd-wp-bit-loss-when-remapping-zero-filled-mthp-subpage-to-shared-zeropage.patch
new file mode 100644 (file)
index 0000000..5e0efe4
--- /dev/null
@@ -0,0 +1,110 @@
+From 9658d698a8a83540bf6a6c80d13c9a61590ee985 Mon Sep 17 00:00:00 2001
+From: Lance Yang <lance.yang@linux.dev>
+Date: Tue, 30 Sep 2025 16:10:40 +0800
+Subject: mm/rmap: fix soft-dirty and uffd-wp bit loss when remapping zero-filled mTHP subpage to shared zeropage
+
+From: Lance Yang <lance.yang@linux.dev>
+
+commit 9658d698a8a83540bf6a6c80d13c9a61590ee985 upstream.
+
+When splitting an mTHP and replacing a zero-filled subpage with the shared
+zeropage, try_to_map_unused_to_zeropage() currently drops several
+important PTE bits.
+
+For userspace tools like CRIU, which rely on the soft-dirty mechanism for
+incremental snapshots, losing the soft-dirty bit means modified pages are
+missed, leading to inconsistent memory state after restore.
+
+As pointed out by David, the more critical uffd-wp bit is also dropped.
+This breaks the userfaultfd write-protection mechanism, causing writes to
+be silently missed by monitoring applications, which can lead to data
+corruption.
+
+Preserve both the soft-dirty and uffd-wp bits from the old PTE when
+creating the new zeropage mapping to ensure they are correctly tracked.
+
+Link: https://lkml.kernel.org/r/20250930081040.80926-1-lance.yang@linux.dev
+Fixes: b1f202060afe ("mm: remap unused subpages to shared zeropage when splitting isolated thp")
+Signed-off-by: Lance Yang <lance.yang@linux.dev>
+Suggested-by: David Hildenbrand <david@redhat.com>
+Suggested-by: Dev Jain <dev.jain@arm.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Dev Jain <dev.jain@arm.com>
+Acked-by: Zi Yan <ziy@nvidia.com>
+Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
+Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Barry Song <baohua@kernel.org>
+Cc: Byungchul Park <byungchul@sk.com>
+Cc: Gregory Price <gourry@gourry.net>
+Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
+Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Cc: Mariano Pache <npache@redhat.com>
+Cc: Mathew Brost <matthew.brost@intel.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Rakie Kim <rakie.kim@sk.com>
+Cc: Rik van Riel <riel@surriel.com>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Usama Arif <usamaarif642@gmail.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Yu Zhao <yuzhao@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/migrate.c |   15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -198,8 +198,7 @@ bool isolate_folio_to_list(struct folio
+ }
+ static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw,
+-                                        struct folio *folio,
+-                                        unsigned long idx)
++              struct folio *folio, pte_t old_pte, unsigned long idx)
+ {
+       struct page *page = folio_page(folio, idx);
+       pte_t newpte;
+@@ -208,7 +207,7 @@ static bool try_to_map_unused_to_zeropag
+               return false;
+       VM_BUG_ON_PAGE(!PageAnon(page), page);
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
+-      VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page);
++      VM_BUG_ON_PAGE(pte_present(old_pte), page);
+       if (folio_test_mlocked(folio) || (pvmw->vma->vm_flags & VM_LOCKED) ||
+           mm_forbids_zeropage(pvmw->vma->vm_mm))
+@@ -224,6 +223,12 @@ static bool try_to_map_unused_to_zeropag
+       newpte = pte_mkspecial(pfn_pte(my_zero_pfn(pvmw->address),
+                                       pvmw->vma->vm_page_prot));
++
++      if (pte_swp_soft_dirty(old_pte))
++              newpte = pte_mksoft_dirty(newpte);
++      if (pte_swp_uffd_wp(old_pte))
++              newpte = pte_mkuffd_wp(newpte);
++
+       set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte);
+       dec_mm_counter(pvmw->vma->vm_mm, mm_counter(folio));
+@@ -266,13 +271,13 @@ static bool remove_migration_pte(struct
+                       continue;
+               }
+ #endif
++              old_pte = ptep_get(pvmw.pte);
+               if (rmap_walk_arg->map_unused_to_zeropage &&
+-                  try_to_map_unused_to_zeropage(&pvmw, folio, idx))
++                  try_to_map_unused_to_zeropage(&pvmw, folio, old_pte, idx))
+                       continue;
+               folio_get(folio);
+               pte = mk_pte(new, READ_ONCE(vma->vm_page_prot));
+-              old_pte = ptep_get(pvmw.pte);
+               entry = pte_to_swp_entry(old_pte);
+               if (!is_migration_entry_young(entry))
diff --git a/queue-6.12/s390-bpf-centralize-frame-offset-calculations.patch b/queue-6.12/s390-bpf-centralize-frame-offset-calculations.patch
new file mode 100644 (file)
index 0000000..fe5661b
--- /dev/null
@@ -0,0 +1,226 @@
+From stable+bounces-186297-greg=kroah.com@vger.kernel.org Fri Oct 17 11:36:47 2025
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+Date: Fri, 17 Oct 2025 11:19:04 +0200
+Subject: s390/bpf: Centralize frame offset calculations
+To: stable@vger.kernel.org
+Cc: Ilya Leoshkevich <iii@linux.ibm.com>, Alexei Starovoitov <ast@kernel.org>
+Message-ID: <20251017092550.88640-2-iii@linux.ibm.com>
+
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+
+commit b2268d550d20ff860bddfe3a91b1aec00414689a upstream.
+
+The calculation of the distance from %r15 to the caller-allocated
+portion of the stack frame is copy-pasted into multiple places in the
+JIT code.
+
+Move it to bpf_jit_prog() and save the result into bpf_jit::frame_off,
+so that the other parts of the JIT can use it.
+
+Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Link: https://lore.kernel.org/r/20250624121501.50536-2-iii@linux.ibm.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/net/bpf_jit_comp.c |   56 +++++++++++++++++++------------------------
+ 1 file changed, 26 insertions(+), 30 deletions(-)
+
+--- a/arch/s390/net/bpf_jit_comp.c
++++ b/arch/s390/net/bpf_jit_comp.c
+@@ -56,6 +56,7 @@ struct bpf_jit {
+       int prologue_plt;       /* Start of prologue hotpatch PLT */
+       int kern_arena;         /* Pool offset of kernel arena address */
+       u64 user_arena;         /* User arena address */
++      u32 frame_off;          /* Offset of frame from %r15 */
+ };
+ #define SEEN_MEM      BIT(0)          /* use mem[] for temporary storage */
+@@ -421,12 +422,9 @@ static void save_regs(struct bpf_jit *ji
+ /*
+  * Restore registers from "rs" (register start) to "re" (register end) on stack
+  */
+-static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re, u32 stack_depth)
++static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re)
+ {
+-      u32 off = STK_OFF_R6 + (rs - 6) * 8;
+-
+-      if (jit->seen & SEEN_STACK)
+-              off += STK_OFF + stack_depth;
++      u32 off = jit->frame_off + STK_OFF_R6 + (rs - 6) * 8;
+       if (rs == re)
+               /* lg %rs,off(%r15) */
+@@ -470,8 +468,7 @@ static int get_end(u16 seen_regs, int st
+  * Save and restore clobbered registers (6-15) on stack.
+  * We save/restore registers in chunks with gap >= 2 registers.
+  */
+-static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth,
+-                            u16 extra_regs)
++static void save_restore_regs(struct bpf_jit *jit, int op, u16 extra_regs)
+ {
+       u16 seen_regs = jit->seen_regs | extra_regs;
+       const int last = 15, save_restore_size = 6;
+@@ -494,7 +491,7 @@ static void save_restore_regs(struct bpf
+               if (op == REGS_SAVE)
+                       save_regs(jit, rs, re);
+               else
+-                      restore_regs(jit, rs, re, stack_depth);
++                      restore_regs(jit, rs, re);
+               re++;
+       } while (re <= last);
+ }
+@@ -561,8 +558,7 @@ static void bpf_jit_plt(struct bpf_plt *
+  * Save registers and create stack frame if necessary.
+  * See stack frame layout description in "bpf_jit.h"!
+  */
+-static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
+-                           u32 stack_depth)
++static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp)
+ {
+       /* No-op for hotpatching */
+       /* brcl 0,prologue_plt */
+@@ -595,7 +591,7 @@ static void bpf_jit_prologue(struct bpf_
+               jit->seen_regs |= NVREGS;
+       } else {
+               /* Save registers */
+-              save_restore_regs(jit, REGS_SAVE, stack_depth,
++              save_restore_regs(jit, REGS_SAVE,
+                                 fp->aux->exception_boundary ? NVREGS : 0);
+       }
+       /* Setup literal pool */
+@@ -617,8 +613,8 @@ static void bpf_jit_prologue(struct bpf_
+               EMIT4(0xb9040000, REG_W1, REG_15);
+               /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
+               EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
+-              /* aghi %r15,-STK_OFF */
+-              EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
++              /* aghi %r15,-frame_off */
++              EMIT4_IMM(0xa70b0000, REG_15, -jit->frame_off);
+               /* stg %w1,152(%r15) (backchain) */
+               EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
+                             REG_15, 152);
+@@ -665,13 +661,13 @@ static void call_r1(struct bpf_jit *jit)
+ /*
+  * Function epilogue
+  */
+-static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
++static void bpf_jit_epilogue(struct bpf_jit *jit)
+ {
+       jit->exit_ip = jit->prg;
+       /* Load exit code: lgr %r2,%b0 */
+       EMIT4(0xb9040000, REG_2, BPF_REG_0);
+       /* Restore registers */
+-      save_restore_regs(jit, REGS_RESTORE, stack_depth, 0);
++      save_restore_regs(jit, REGS_RESTORE, 0);
+       if (nospec_uses_trampoline()) {
+               jit->r14_thunk_ip = jit->prg;
+               /* Generate __s390_indirect_jump_r14 thunk */
+@@ -862,7 +858,7 @@ static int sign_extend(struct bpf_jit *j
+  * stack space for the large switch statement.
+  */
+ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
+-                               int i, bool extra_pass, u32 stack_depth)
++                               int i, bool extra_pass)
+ {
+       struct bpf_insn *insn = &fp->insnsi[i];
+       s32 branch_oc_off = insn->off;
+@@ -1783,9 +1779,9 @@ static noinline int bpf_jit_insn(struct
+                * Note 2: We assume that the verifier does not let us call the
+                * main program, which clears the tail call counter on entry.
+                */
+-              /* mvc STK_OFF_TCCNT(4,%r15),N(%r15) */
++              /* mvc STK_OFF_TCCNT(4,%r15),frame_off+STK_OFF_TCCNT(%r15) */
+               _EMIT6(0xd203f000 | STK_OFF_TCCNT,
+-                     0xf000 | (STK_OFF_TCCNT + STK_OFF + stack_depth));
++                     0xf000 | (jit->frame_off + STK_OFF_TCCNT));
+               /* Sign-extend the kfunc arguments. */
+               if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+@@ -1836,10 +1832,7 @@ static noinline int bpf_jit_insn(struct
+                *         goto out;
+                */
+-              if (jit->seen & SEEN_STACK)
+-                      off = STK_OFF_TCCNT + STK_OFF + stack_depth;
+-              else
+-                      off = STK_OFF_TCCNT;
++              off = jit->frame_off + STK_OFF_TCCNT;
+               /* lhi %w0,1 */
+               EMIT4_IMM(0xa7080000, REG_W0, 1);
+               /* laal %w1,%w0,off(%r15) */
+@@ -1869,7 +1862,7 @@ static noinline int bpf_jit_insn(struct
+               /*
+                * Restore registers before calling function
+                */
+-              save_restore_regs(jit, REGS_RESTORE, stack_depth, 0);
++              save_restore_regs(jit, REGS_RESTORE, 0);
+               /*
+                * goto *(prog->bpf_func + tail_call_start);
+@@ -2161,7 +2154,7 @@ static int bpf_set_addr(struct bpf_jit *
+  * Compile eBPF program into s390x code
+  */
+ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
+-                      bool extra_pass, u32 stack_depth)
++                      bool extra_pass)
+ {
+       int i, insn_count, lit32_size, lit64_size;
+       u64 kern_arena;
+@@ -2170,24 +2163,28 @@ static int bpf_jit_prog(struct bpf_jit *
+       jit->lit64 = jit->lit64_start;
+       jit->prg = 0;
+       jit->excnt = 0;
++      if (is_first_pass(jit) || (jit->seen & SEEN_STACK))
++              jit->frame_off = STK_OFF + round_up(fp->aux->stack_depth, 8);
++      else
++              jit->frame_off = 0;
+       kern_arena = bpf_arena_get_kern_vm_start(fp->aux->arena);
+       if (kern_arena)
+               jit->kern_arena = _EMIT_CONST_U64(kern_arena);
+       jit->user_arena = bpf_arena_get_user_vm_start(fp->aux->arena);
+-      bpf_jit_prologue(jit, fp, stack_depth);
++      bpf_jit_prologue(jit, fp);
+       if (bpf_set_addr(jit, 0) < 0)
+               return -1;
+       for (i = 0; i < fp->len; i += insn_count) {
+-              insn_count = bpf_jit_insn(jit, fp, i, extra_pass, stack_depth);
++              insn_count = bpf_jit_insn(jit, fp, i, extra_pass);
+               if (insn_count < 0)
+                       return -1;
+               /* Next instruction address */
+               if (bpf_set_addr(jit, i + insn_count) < 0)
+                       return -1;
+       }
+-      bpf_jit_epilogue(jit, stack_depth);
++      bpf_jit_epilogue(jit);
+       lit32_size = jit->lit32 - jit->lit32_start;
+       lit64_size = jit->lit64 - jit->lit64_start;
+@@ -2263,7 +2260,6 @@ static struct bpf_binary_header *bpf_jit
+  */
+ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
+ {
+-      u32 stack_depth = round_up(fp->aux->stack_depth, 8);
+       struct bpf_prog *tmp, *orig_fp = fp;
+       struct bpf_binary_header *header;
+       struct s390_jit_data *jit_data;
+@@ -2316,7 +2312,7 @@ struct bpf_prog *bpf_int_jit_compile(str
+        *   - 3:   Calculate program size and addrs array
+        */
+       for (pass = 1; pass <= 3; pass++) {
+-              if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
++              if (bpf_jit_prog(&jit, fp, extra_pass)) {
+                       fp = orig_fp;
+                       goto free_addrs;
+               }
+@@ -2330,7 +2326,7 @@ struct bpf_prog *bpf_int_jit_compile(str
+               goto free_addrs;
+       }
+ skip_init_ctx:
+-      if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
++      if (bpf_jit_prog(&jit, fp, extra_pass)) {
+               bpf_jit_binary_free(header);
+               fp = orig_fp;
+               goto free_addrs;
diff --git a/queue-6.12/s390-bpf-describe-the-frame-using-a-struct-instead-of-constants.patch b/queue-6.12/s390-bpf-describe-the-frame-using-a-struct-instead-of-constants.patch
new file mode 100644 (file)
index 0000000..8b3f148
--- /dev/null
@@ -0,0 +1,260 @@
+From stable+bounces-186298-greg=kroah.com@vger.kernel.org Fri Oct 17 11:39:54 2025
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+Date: Fri, 17 Oct 2025 11:19:05 +0200
+Subject: s390/bpf: Describe the frame using a struct instead of constants
+To: stable@vger.kernel.org
+Cc: Ilya Leoshkevich <iii@linux.ibm.com>, Alexei Starovoitov <ast@kernel.org>
+Message-ID: <20251017092550.88640-3-iii@linux.ibm.com>
+
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+
+commit e26d523edf2a62b142d2dd2dd9b87f61ed92f33a upstream.
+
+Currently the caller-allocated portion of the stack frame is described
+using constants, hardcoded values, and an ASCII drawing, making it
+harder than necessary to ensure that everything is in sync.
+
+Declare a struct and use offsetof() and offsetofend() macros to refer
+to various values stored within the frame.
+
+Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Link: https://lore.kernel.org/r/20250624121501.50536-3-iii@linux.ibm.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/net/bpf_jit.h      |   55 ----------------------------------
+ arch/s390/net/bpf_jit_comp.c |   69 +++++++++++++++++++++++++++++--------------
+ 2 files changed, 47 insertions(+), 77 deletions(-)
+ delete mode 100644 arch/s390/net/bpf_jit.h
+
+--- a/arch/s390/net/bpf_jit.h
++++ /dev/null
+@@ -1,55 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-/*
+- * BPF Jit compiler defines
+- *
+- * Copyright IBM Corp. 2012,2015
+- *
+- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+- *          Michael Holzheu <holzheu@linux.vnet.ibm.com>
+- */
+-
+-#ifndef __ARCH_S390_NET_BPF_JIT_H
+-#define __ARCH_S390_NET_BPF_JIT_H
+-
+-#ifndef __ASSEMBLY__
+-
+-#include <linux/filter.h>
+-#include <linux/types.h>
+-
+-#endif /* __ASSEMBLY__ */
+-
+-/*
+- * Stackframe layout (packed stack):
+- *
+- *                                ^ high
+- *          +---------------+     |
+- *          | old backchain |     |
+- *          +---------------+     |
+- *          |   r15 - r6    |     |
+- *          +---------------+     |
+- *          | 4 byte align  |     |
+- *          | tail_call_cnt |     |
+- * BFP           -> +===============+     |
+- *          |               |     |
+- *          |   BPF stack   |     |
+- *          |               |     |
+- * R15+160 -> +---------------+     |
+- *          | new backchain |     |
+- * R15+152 -> +---------------+     |
+- *          | + 152 byte SA |     |
+- * R15           -> +---------------+     + low
+- *
+- * We get 160 bytes stack space from calling function, but only use
+- * 12 * 8 byte for old backchain, r15..r6, and tail_call_cnt.
+- *
+- * The stack size used by the BPF program ("BPF stack" above) is passed
+- * via "aux->stack_depth".
+- */
+-#define STK_SPACE_ADD (160)
+-#define STK_160_UNUSED        (160 - 12 * 8)
+-#define STK_OFF               (STK_SPACE_ADD - STK_160_UNUSED)
+-
+-#define STK_OFF_R6    (160 - 11 * 8)  /* Offset of r6 on stack */
+-#define STK_OFF_TCCNT (160 - 12 * 8)  /* Offset of tail_call_cnt on stack */
+-
+-#endif /* __ARCH_S390_NET_BPF_JIT_H */
+--- a/arch/s390/net/bpf_jit_comp.c
++++ b/arch/s390/net/bpf_jit_comp.c
+@@ -32,7 +32,6 @@
+ #include <asm/set_memory.h>
+ #include <asm/text-patching.h>
+ #include <asm/unwind.h>
+-#include "bpf_jit.h"
+ struct bpf_jit {
+       u32 seen;               /* Flags to remember seen eBPF instructions */
+@@ -56,7 +55,7 @@ struct bpf_jit {
+       int prologue_plt;       /* Start of prologue hotpatch PLT */
+       int kern_arena;         /* Pool offset of kernel arena address */
+       u64 user_arena;         /* User arena address */
+-      u32 frame_off;          /* Offset of frame from %r15 */
++      u32 frame_off;          /* Offset of struct bpf_prog from %r15 */
+ };
+ #define SEEN_MEM      BIT(0)          /* use mem[] for temporary storage */
+@@ -405,11 +404,25 @@ static void jit_fill_hole(void *area, un
+ }
+ /*
++ * Caller-allocated part of the frame.
++ * Thanks to packed stack, its otherwise unused initial part can be used for
++ * the BPF stack and for the next frame.
++ */
++struct prog_frame {
++      u64 unused[8];
++      /* BPF stack starts here and grows towards 0 */
++      u32 tail_call_cnt;
++      u32 pad;
++      u64 r6[10];  /* r6 - r15 */
++      u64 backchain;
++} __packed;
++
++/*
+  * Save registers from "rs" (register start) to "re" (register end) on stack
+  */
+ static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
+ {
+-      u32 off = STK_OFF_R6 + (rs - 6) * 8;
++      u32 off = offsetof(struct prog_frame, r6) + (rs - 6) * 8;
+       if (rs == re)
+               /* stg %rs,off(%r15) */
+@@ -424,7 +437,7 @@ static void save_regs(struct bpf_jit *ji
+  */
+ static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re)
+ {
+-      u32 off = jit->frame_off + STK_OFF_R6 + (rs - 6) * 8;
++      u32 off = jit->frame_off + offsetof(struct prog_frame, r6) + (rs - 6) * 8;
+       if (rs == re)
+               /* lg %rs,off(%r15) */
+@@ -556,10 +569,12 @@ static void bpf_jit_plt(struct bpf_plt *
+  * Emit function prologue
+  *
+  * Save registers and create stack frame if necessary.
+- * See stack frame layout description in "bpf_jit.h"!
++ * Stack frame layout is described by struct prog_frame.
+  */
+ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp)
+ {
++      BUILD_BUG_ON(sizeof(struct prog_frame) != STACK_FRAME_OVERHEAD);
++
+       /* No-op for hotpatching */
+       /* brcl 0,prologue_plt */
+       EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt);
+@@ -567,8 +582,9 @@ static void bpf_jit_prologue(struct bpf_
+       if (!bpf_is_subprog(fp)) {
+               /* Initialize the tail call counter in the main program. */
+-              /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
+-              _EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
++              /* xc tail_call_cnt(4,%r15),tail_call_cnt(%r15) */
++              _EMIT6(0xd703f000 | offsetof(struct prog_frame, tail_call_cnt),
++                     0xf000 | offsetof(struct prog_frame, tail_call_cnt));
+       } else {
+               /*
+                * Skip the tail call counter initialization in subprograms.
+@@ -611,13 +627,15 @@ static void bpf_jit_prologue(struct bpf_
+       if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
+               /* lgr %w1,%r15 (backchain) */
+               EMIT4(0xb9040000, REG_W1, REG_15);
+-              /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
+-              EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
++              /* la %bfp,unused_end(%r15) (BPF frame pointer) */
++              EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15,
++                         offsetofend(struct prog_frame, unused));
+               /* aghi %r15,-frame_off */
+               EMIT4_IMM(0xa70b0000, REG_15, -jit->frame_off);
+-              /* stg %w1,152(%r15) (backchain) */
++              /* stg %w1,backchain(%r15) */
+               EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
+-                            REG_15, 152);
++                            REG_15,
++                            offsetof(struct prog_frame, backchain));
+       }
+ }
+@@ -1779,9 +1797,10 @@ static noinline int bpf_jit_insn(struct
+                * Note 2: We assume that the verifier does not let us call the
+                * main program, which clears the tail call counter on entry.
+                */
+-              /* mvc STK_OFF_TCCNT(4,%r15),frame_off+STK_OFF_TCCNT(%r15) */
+-              _EMIT6(0xd203f000 | STK_OFF_TCCNT,
+-                     0xf000 | (jit->frame_off + STK_OFF_TCCNT));
++              /* mvc tail_call_cnt(4,%r15),frame_off+tail_call_cnt(%r15) */
++              _EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt),
++                     0xf000 | (jit->frame_off +
++                               offsetof(struct prog_frame, tail_call_cnt)));
+               /* Sign-extend the kfunc arguments. */
+               if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+@@ -1832,7 +1851,8 @@ static noinline int bpf_jit_insn(struct
+                *         goto out;
+                */
+-              off = jit->frame_off + STK_OFF_TCCNT;
++              off = jit->frame_off +
++                    offsetof(struct prog_frame, tail_call_cnt);
+               /* lhi %w0,1 */
+               EMIT4_IMM(0xa7080000, REG_W0, 1);
+               /* laal %w1,%w0,off(%r15) */
+@@ -2164,7 +2184,9 @@ static int bpf_jit_prog(struct bpf_jit *
+       jit->prg = 0;
+       jit->excnt = 0;
+       if (is_first_pass(jit) || (jit->seen & SEEN_STACK))
+-              jit->frame_off = STK_OFF + round_up(fp->aux->stack_depth, 8);
++              jit->frame_off = sizeof(struct prog_frame) -
++                               offsetofend(struct prog_frame, unused) +
++                               round_up(fp->aux->stack_depth, 8);
+       else
+               jit->frame_off = 0;
+@@ -2647,9 +2669,10 @@ static int __arch_prepare_bpf_trampoline
+       /* stg %r1,backchain_off(%r15) */
+       EMIT6_DISP_LH(0xe3000000, 0x0024, REG_1, REG_0, REG_15,
+                     tjit->backchain_off);
+-      /* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */
++      /* mvc tccnt_off(4,%r15),stack_size+tail_call_cnt(%r15) */
+       _EMIT6(0xd203f000 | tjit->tccnt_off,
+-             0xf000 | (tjit->stack_size + STK_OFF_TCCNT));
++             0xf000 | (tjit->stack_size +
++                       offsetof(struct prog_frame, tail_call_cnt)));
+       /* stmg %r2,%rN,fwd_reg_args_off(%r15) */
+       if (nr_reg_args)
+               EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
+@@ -2786,8 +2809,9 @@ static int __arch_prepare_bpf_trampoline
+                                      (nr_stack_args * sizeof(u64) - 1) << 16 |
+                                      tjit->stack_args_off,
+                              0xf000 | tjit->orig_stack_args_off);
+-              /* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
+-              _EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off);
++              /* mvc tail_call_cnt(4,%r15),tccnt_off(%r15) */
++              _EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt),
++                     0xf000 | tjit->tccnt_off);
+               /* lgr %r1,%r8 */
+               EMIT4(0xb9040000, REG_1, REG_8);
+               /* %r1() */
+@@ -2844,8 +2868,9 @@ static int __arch_prepare_bpf_trampoline
+       if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
+               EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
+                             tjit->retval_off);
+-      /* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
+-      _EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT),
++      /* mvc stack_size+tail_call_cnt(4,%r15),tccnt_off(%r15) */
++      _EMIT6(0xd203f000 | (tjit->stack_size +
++                           offsetof(struct prog_frame, tail_call_cnt)),
+              0xf000 | tjit->tccnt_off);
+       /* aghi %r15,stack_size */
+       EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
diff --git a/queue-6.12/s390-bpf-write-back-tail-call-counter-for-bpf_pseudo_call.patch b/queue-6.12/s390-bpf-write-back-tail-call-counter-for-bpf_pseudo_call.patch
new file mode 100644 (file)
index 0000000..9536f3a
--- /dev/null
@@ -0,0 +1,75 @@
+From stable+bounces-186299-greg=kroah.com@vger.kernel.org Fri Oct 17 11:46:12 2025
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+Date: Fri, 17 Oct 2025 11:19:06 +0200
+Subject: s390/bpf: Write back tail call counter for BPF_PSEUDO_CALL
+To: stable@vger.kernel.org
+Cc: Ilya Leoshkevich <iii@linux.ibm.com>, Daniel Borkmann <daniel@iogearbox.net>
+Message-ID: <20251017092550.88640-4-iii@linux.ibm.com>
+
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+
+commit c861a6b147137d10b5ff88a2c492ba376cd1b8b0 upstream.
+
+The tailcall_bpf2bpf_hierarchy_1 test hangs on s390. Its call graph is
+as follows:
+
+  entry()
+    subprog_tail()
+      bpf_tail_call_static(0) -> entry + tail_call_start
+    subprog_tail()
+      bpf_tail_call_static(0) -> entry + tail_call_start
+
+entry() copies its tail call counter to the subprog_tail()'s frame,
+which then increments it. However, the incremented result is discarded,
+leading to an astronomically large number of tail calls.
+
+Fix by writing the incremented counter back to the entry()'s frame.
+
+Fixes: dd691e847d28 ("s390/bpf: Implement bpf_jit_supports_subprog_tailcalls()")
+Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/bpf/20250813121016.163375-3-iii@linux.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/net/bpf_jit_comp.c |   23 ++++++++++++++++-------
+ 1 file changed, 16 insertions(+), 7 deletions(-)
+
+--- a/arch/s390/net/bpf_jit_comp.c
++++ b/arch/s390/net/bpf_jit_comp.c
+@@ -1789,13 +1789,6 @@ static noinline int bpf_jit_insn(struct
+               jit->seen |= SEEN_FUNC;
+               /*
+                * Copy the tail call counter to where the callee expects it.
+-               *
+-               * Note 1: The callee can increment the tail call counter, but
+-               * we do not load it back, since the x86 JIT does not do this
+-               * either.
+-               *
+-               * Note 2: We assume that the verifier does not let us call the
+-               * main program, which clears the tail call counter on entry.
+                */
+               /* mvc tail_call_cnt(4,%r15),frame_off+tail_call_cnt(%r15) */
+               _EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt),
+@@ -1822,6 +1815,22 @@ static noinline int bpf_jit_insn(struct
+               call_r1(jit);
+               /* lgr %b0,%r2: load return value into %b0 */
+               EMIT4(0xb9040000, BPF_REG_0, REG_2);
++
++              /*
++               * Copy the potentially updated tail call counter back.
++               */
++
++              if (insn->src_reg == BPF_PSEUDO_CALL)
++                      /*
++                       * mvc frame_off+tail_call_cnt(%r15),
++                       *     tail_call_cnt(4,%r15)
++                       */
++                      _EMIT6(0xd203f000 | (jit->frame_off +
++                                           offsetof(struct prog_frame,
++                                                    tail_call_cnt)),
++                             0xf000 | offsetof(struct prog_frame,
++                                               tail_call_cnt));
++
+               break;
+       }
+       case BPF_JMP | BPF_TAIL_CALL: {
diff --git a/queue-6.12/s390-bpf-write-back-tail-call-counter-for-bpf_tramp_f_call_orig.patch b/queue-6.12/s390-bpf-write-back-tail-call-counter-for-bpf_tramp_f_call_orig.patch
new file mode 100644 (file)
index 0000000..b6b6d4f
--- /dev/null
@@ -0,0 +1,50 @@
+From stable+bounces-186300-greg=kroah.com@vger.kernel.org Fri Oct 17 11:45:09 2025
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+Date: Fri, 17 Oct 2025 11:19:07 +0200
+Subject: s390/bpf: Write back tail call counter for BPF_TRAMP_F_CALL_ORIG
+To: stable@vger.kernel.org
+Cc: Ilya Leoshkevich <iii@linux.ibm.com>, Daniel Borkmann <daniel@iogearbox.net>
+Message-ID: <20251017092550.88640-5-iii@linux.ibm.com>
+
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+
+commit bc3905a71f02511607d3ccf732360580209cac4c upstream.
+
+The tailcall_bpf2bpf_hierarchy_fentry test hangs on s390. Its call
+graph is as follows:
+
+  entry()
+    subprog_tail()
+      trampoline()
+        fentry()
+        the rest of subprog_tail()  # via BPF_TRAMP_F_CALL_ORIG
+        return to entry()
+
+The problem is that the rest of subprog_tail() increments the tail call
+counter, but the trampoline discards the incremented value. This
+results in an astronomically large number of tail calls.
+
+Fix by making the trampoline write the incremented tail call counter
+back.
+
+Fixes: 528eb2cb87bc ("s390/bpf: Implement arch_prepare_bpf_trampoline()")
+Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/bpf/20250813121016.163375-4-iii@linux.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/net/bpf_jit_comp.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/s390/net/bpf_jit_comp.c
++++ b/arch/s390/net/bpf_jit_comp.c
+@@ -2828,6 +2828,9 @@ static int __arch_prepare_bpf_trampoline
+               /* stg %r2,retval_off(%r15) */
+               EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
+                             tjit->retval_off);
++              /* mvc tccnt_off(%r15),tail_call_cnt(4,%r15) */
++              _EMIT6(0xd203f000 | tjit->tccnt_off,
++                     0xf000 | offsetof(struct prog_frame, tail_call_cnt));
+               im->ip_after_call = jit->prg_buf + jit->prg;
index 15bdf4ad91688b9ae2667aac84d9438d68eb092d..95bb15cb5e1a243a8dab69acc46da5ca9318164c 100644 (file)
@@ -255,3 +255,8 @@ acpi-property-do-not-pass-null-handles-to-acpi_attach_data.patch
 mptcp-pm-in-kernel-usable-client-side-with-c-flag.patch
 ipmi-rework-user-message-limit-handling.patch
 ipmi-fix-handling-of-messages-with-provided-receive-message-pointer.patch
+mm-rmap-fix-soft-dirty-and-uffd-wp-bit-loss-when-remapping-zero-filled-mthp-subpage-to-shared-zeropage.patch
+s390-bpf-centralize-frame-offset-calculations.patch
+s390-bpf-describe-the-frame-using-a-struct-instead-of-constants.patch
+s390-bpf-write-back-tail-call-counter-for-bpf_pseudo_call.patch
+s390-bpf-write-back-tail-call-counter-for-bpf_tramp_f_call_orig.patch