--- /dev/null
+From 9658d698a8a83540bf6a6c80d13c9a61590ee985 Mon Sep 17 00:00:00 2001
+From: Lance Yang <lance.yang@linux.dev>
+Date: Tue, 30 Sep 2025 16:10:40 +0800
+Subject: mm/rmap: fix soft-dirty and uffd-wp bit loss when remapping zero-filled mTHP subpage to shared zeropage
+
+From: Lance Yang <lance.yang@linux.dev>
+
+commit 9658d698a8a83540bf6a6c80d13c9a61590ee985 upstream.
+
+When splitting an mTHP and replacing a zero-filled subpage with the shared
+zeropage, try_to_map_unused_to_zeropage() currently drops several
+important PTE bits.
+
+For userspace tools like CRIU, which rely on the soft-dirty mechanism for
+incremental snapshots, losing the soft-dirty bit means modified pages are
+missed, leading to inconsistent memory state after restore.
+
+As pointed out by David, the more critical uffd-wp bit is also dropped.
+This breaks the userfaultfd write-protection mechanism, causing writes to
+be silently missed by monitoring applications, which can lead to data
+corruption.
+
+Preserve both the soft-dirty and uffd-wp bits from the old PTE when
+creating the new zeropage mapping to ensure they are correctly tracked.
+
+Link: https://lkml.kernel.org/r/20250930081040.80926-1-lance.yang@linux.dev
+Fixes: b1f202060afe ("mm: remap unused subpages to shared zeropage when splitting isolated thp")
+Signed-off-by: Lance Yang <lance.yang@linux.dev>
+Suggested-by: David Hildenbrand <david@redhat.com>
+Suggested-by: Dev Jain <dev.jain@arm.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Dev Jain <dev.jain@arm.com>
+Acked-by: Zi Yan <ziy@nvidia.com>
+Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
+Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
+Cc: Alistair Popple <apopple@nvidia.com>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Barry Song <baohua@kernel.org>
+Cc: Byungchul Park <byungchul@sk.com>
+Cc: Gregory Price <gourry@gourry.net>
+Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
+Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Cc: Mariano Pache <npache@redhat.com>
+Cc: Mathew Brost <matthew.brost@intel.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: Rakie Kim <rakie.kim@sk.com>
+Cc: Rik van Riel <riel@surriel.com>
+Cc: Ryan Roberts <ryan.roberts@arm.com>
+Cc: Usama Arif <usamaarif642@gmail.com>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Yu Zhao <yuzhao@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/migrate.c | 15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -198,8 +198,7 @@ bool isolate_folio_to_list(struct folio
+ }
+
+ static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw,
+- struct folio *folio,
+- unsigned long idx)
++ struct folio *folio, pte_t old_pte, unsigned long idx)
+ {
+ struct page *page = folio_page(folio, idx);
+ pte_t newpte;
+@@ -208,7 +207,7 @@ static bool try_to_map_unused_to_zeropag
+ return false;
+ VM_BUG_ON_PAGE(!PageAnon(page), page);
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
+- VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page);
++ VM_BUG_ON_PAGE(pte_present(old_pte), page);
+
+ if (folio_test_mlocked(folio) || (pvmw->vma->vm_flags & VM_LOCKED) ||
+ mm_forbids_zeropage(pvmw->vma->vm_mm))
+@@ -224,6 +223,12 @@ static bool try_to_map_unused_to_zeropag
+
+ newpte = pte_mkspecial(pfn_pte(my_zero_pfn(pvmw->address),
+ pvmw->vma->vm_page_prot));
++
++ if (pte_swp_soft_dirty(old_pte))
++ newpte = pte_mksoft_dirty(newpte);
++ if (pte_swp_uffd_wp(old_pte))
++ newpte = pte_mkuffd_wp(newpte);
++
+ set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte);
+
+ dec_mm_counter(pvmw->vma->vm_mm, mm_counter(folio));
+@@ -266,13 +271,13 @@ static bool remove_migration_pte(struct
+ continue;
+ }
+ #endif
++ old_pte = ptep_get(pvmw.pte);
+ if (rmap_walk_arg->map_unused_to_zeropage &&
+- try_to_map_unused_to_zeropage(&pvmw, folio, idx))
++ try_to_map_unused_to_zeropage(&pvmw, folio, old_pte, idx))
+ continue;
+
+ folio_get(folio);
+ pte = mk_pte(new, READ_ONCE(vma->vm_page_prot));
+- old_pte = ptep_get(pvmw.pte);
+
+ entry = pte_to_swp_entry(old_pte);
+ if (!is_migration_entry_young(entry))
--- /dev/null
+From stable+bounces-186297-greg=kroah.com@vger.kernel.org Fri Oct 17 11:36:47 2025
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+Date: Fri, 17 Oct 2025 11:19:04 +0200
+Subject: s390/bpf: Centralize frame offset calculations
+To: stable@vger.kernel.org
+Cc: Ilya Leoshkevich <iii@linux.ibm.com>, Alexei Starovoitov <ast@kernel.org>
+Message-ID: <20251017092550.88640-2-iii@linux.ibm.com>
+
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+
+commit b2268d550d20ff860bddfe3a91b1aec00414689a upstream.
+
+The calculation of the distance from %r15 to the caller-allocated
+portion of the stack frame is copy-pasted into multiple places in the
+JIT code.
+
+Move it to bpf_jit_prog() and save the result into bpf_jit::frame_off,
+so that the other parts of the JIT can use it.
+
+Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Link: https://lore.kernel.org/r/20250624121501.50536-2-iii@linux.ibm.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/net/bpf_jit_comp.c | 56 +++++++++++++++++++------------------------
+ 1 file changed, 26 insertions(+), 30 deletions(-)
+
+--- a/arch/s390/net/bpf_jit_comp.c
++++ b/arch/s390/net/bpf_jit_comp.c
+@@ -56,6 +56,7 @@ struct bpf_jit {
+ int prologue_plt; /* Start of prologue hotpatch PLT */
+ int kern_arena; /* Pool offset of kernel arena address */
+ u64 user_arena; /* User arena address */
++ u32 frame_off; /* Offset of frame from %r15 */
+ };
+
+ #define SEEN_MEM BIT(0) /* use mem[] for temporary storage */
+@@ -421,12 +422,9 @@ static void save_regs(struct bpf_jit *ji
+ /*
+ * Restore registers from "rs" (register start) to "re" (register end) on stack
+ */
+-static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re, u32 stack_depth)
++static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re)
+ {
+- u32 off = STK_OFF_R6 + (rs - 6) * 8;
+-
+- if (jit->seen & SEEN_STACK)
+- off += STK_OFF + stack_depth;
++ u32 off = jit->frame_off + STK_OFF_R6 + (rs - 6) * 8;
+
+ if (rs == re)
+ /* lg %rs,off(%r15) */
+@@ -470,8 +468,7 @@ static int get_end(u16 seen_regs, int st
+ * Save and restore clobbered registers (6-15) on stack.
+ * We save/restore registers in chunks with gap >= 2 registers.
+ */
+-static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth,
+- u16 extra_regs)
++static void save_restore_regs(struct bpf_jit *jit, int op, u16 extra_regs)
+ {
+ u16 seen_regs = jit->seen_regs | extra_regs;
+ const int last = 15, save_restore_size = 6;
+@@ -494,7 +491,7 @@ static void save_restore_regs(struct bpf
+ if (op == REGS_SAVE)
+ save_regs(jit, rs, re);
+ else
+- restore_regs(jit, rs, re, stack_depth);
++ restore_regs(jit, rs, re);
+ re++;
+ } while (re <= last);
+ }
+@@ -561,8 +558,7 @@ static void bpf_jit_plt(struct bpf_plt *
+ * Save registers and create stack frame if necessary.
+ * See stack frame layout description in "bpf_jit.h"!
+ */
+-static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
+- u32 stack_depth)
++static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp)
+ {
+ /* No-op for hotpatching */
+ /* brcl 0,prologue_plt */
+@@ -595,7 +591,7 @@ static void bpf_jit_prologue(struct bpf_
+ jit->seen_regs |= NVREGS;
+ } else {
+ /* Save registers */
+- save_restore_regs(jit, REGS_SAVE, stack_depth,
++ save_restore_regs(jit, REGS_SAVE,
+ fp->aux->exception_boundary ? NVREGS : 0);
+ }
+ /* Setup literal pool */
+@@ -617,8 +613,8 @@ static void bpf_jit_prologue(struct bpf_
+ EMIT4(0xb9040000, REG_W1, REG_15);
+ /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
+ EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
+- /* aghi %r15,-STK_OFF */
+- EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
++ /* aghi %r15,-frame_off */
++ EMIT4_IMM(0xa70b0000, REG_15, -jit->frame_off);
+ /* stg %w1,152(%r15) (backchain) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
+ REG_15, 152);
+@@ -665,13 +661,13 @@ static void call_r1(struct bpf_jit *jit)
+ /*
+ * Function epilogue
+ */
+-static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
++static void bpf_jit_epilogue(struct bpf_jit *jit)
+ {
+ jit->exit_ip = jit->prg;
+ /* Load exit code: lgr %r2,%b0 */
+ EMIT4(0xb9040000, REG_2, BPF_REG_0);
+ /* Restore registers */
+- save_restore_regs(jit, REGS_RESTORE, stack_depth, 0);
++ save_restore_regs(jit, REGS_RESTORE, 0);
+ if (nospec_uses_trampoline()) {
+ jit->r14_thunk_ip = jit->prg;
+ /* Generate __s390_indirect_jump_r14 thunk */
+@@ -862,7 +858,7 @@ static int sign_extend(struct bpf_jit *j
+ * stack space for the large switch statement.
+ */
+ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
+- int i, bool extra_pass, u32 stack_depth)
++ int i, bool extra_pass)
+ {
+ struct bpf_insn *insn = &fp->insnsi[i];
+ s32 branch_oc_off = insn->off;
+@@ -1783,9 +1779,9 @@ static noinline int bpf_jit_insn(struct
+ * Note 2: We assume that the verifier does not let us call the
+ * main program, which clears the tail call counter on entry.
+ */
+- /* mvc STK_OFF_TCCNT(4,%r15),N(%r15) */
++ /* mvc STK_OFF_TCCNT(4,%r15),frame_off+STK_OFF_TCCNT(%r15) */
+ _EMIT6(0xd203f000 | STK_OFF_TCCNT,
+- 0xf000 | (STK_OFF_TCCNT + STK_OFF + stack_depth));
++ 0xf000 | (jit->frame_off + STK_OFF_TCCNT));
+
+ /* Sign-extend the kfunc arguments. */
+ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+@@ -1836,10 +1832,7 @@ static noinline int bpf_jit_insn(struct
+ * goto out;
+ */
+
+- if (jit->seen & SEEN_STACK)
+- off = STK_OFF_TCCNT + STK_OFF + stack_depth;
+- else
+- off = STK_OFF_TCCNT;
++ off = jit->frame_off + STK_OFF_TCCNT;
+ /* lhi %w0,1 */
+ EMIT4_IMM(0xa7080000, REG_W0, 1);
+ /* laal %w1,%w0,off(%r15) */
+@@ -1869,7 +1862,7 @@ static noinline int bpf_jit_insn(struct
+ /*
+ * Restore registers before calling function
+ */
+- save_restore_regs(jit, REGS_RESTORE, stack_depth, 0);
++ save_restore_regs(jit, REGS_RESTORE, 0);
+
+ /*
+ * goto *(prog->bpf_func + tail_call_start);
+@@ -2161,7 +2154,7 @@ static int bpf_set_addr(struct bpf_jit *
+ * Compile eBPF program into s390x code
+ */
+ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
+- bool extra_pass, u32 stack_depth)
++ bool extra_pass)
+ {
+ int i, insn_count, lit32_size, lit64_size;
+ u64 kern_arena;
+@@ -2170,24 +2163,28 @@ static int bpf_jit_prog(struct bpf_jit *
+ jit->lit64 = jit->lit64_start;
+ jit->prg = 0;
+ jit->excnt = 0;
++ if (is_first_pass(jit) || (jit->seen & SEEN_STACK))
++ jit->frame_off = STK_OFF + round_up(fp->aux->stack_depth, 8);
++ else
++ jit->frame_off = 0;
+
+ kern_arena = bpf_arena_get_kern_vm_start(fp->aux->arena);
+ if (kern_arena)
+ jit->kern_arena = _EMIT_CONST_U64(kern_arena);
+ jit->user_arena = bpf_arena_get_user_vm_start(fp->aux->arena);
+
+- bpf_jit_prologue(jit, fp, stack_depth);
++ bpf_jit_prologue(jit, fp);
+ if (bpf_set_addr(jit, 0) < 0)
+ return -1;
+ for (i = 0; i < fp->len; i += insn_count) {
+- insn_count = bpf_jit_insn(jit, fp, i, extra_pass, stack_depth);
++ insn_count = bpf_jit_insn(jit, fp, i, extra_pass);
+ if (insn_count < 0)
+ return -1;
+ /* Next instruction address */
+ if (bpf_set_addr(jit, i + insn_count) < 0)
+ return -1;
+ }
+- bpf_jit_epilogue(jit, stack_depth);
++ bpf_jit_epilogue(jit);
+
+ lit32_size = jit->lit32 - jit->lit32_start;
+ lit64_size = jit->lit64 - jit->lit64_start;
+@@ -2263,7 +2260,6 @@ static struct bpf_binary_header *bpf_jit
+ */
+ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
+ {
+- u32 stack_depth = round_up(fp->aux->stack_depth, 8);
+ struct bpf_prog *tmp, *orig_fp = fp;
+ struct bpf_binary_header *header;
+ struct s390_jit_data *jit_data;
+@@ -2316,7 +2312,7 @@ struct bpf_prog *bpf_int_jit_compile(str
+ * - 3: Calculate program size and addrs array
+ */
+ for (pass = 1; pass <= 3; pass++) {
+- if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
++ if (bpf_jit_prog(&jit, fp, extra_pass)) {
+ fp = orig_fp;
+ goto free_addrs;
+ }
+@@ -2330,7 +2326,7 @@ struct bpf_prog *bpf_int_jit_compile(str
+ goto free_addrs;
+ }
+ skip_init_ctx:
+- if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
++ if (bpf_jit_prog(&jit, fp, extra_pass)) {
+ bpf_jit_binary_free(header);
+ fp = orig_fp;
+ goto free_addrs;
--- /dev/null
+From stable+bounces-186298-greg=kroah.com@vger.kernel.org Fri Oct 17 11:39:54 2025
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+Date: Fri, 17 Oct 2025 11:19:05 +0200
+Subject: s390/bpf: Describe the frame using a struct instead of constants
+To: stable@vger.kernel.org
+Cc: Ilya Leoshkevich <iii@linux.ibm.com>, Alexei Starovoitov <ast@kernel.org>
+Message-ID: <20251017092550.88640-3-iii@linux.ibm.com>
+
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+
+commit e26d523edf2a62b142d2dd2dd9b87f61ed92f33a upstream.
+
+Currently the caller-allocated portion of the stack frame is described
+using constants, hardcoded values, and an ASCII drawing, making it
+harder than necessary to ensure that everything is in sync.
+
+Declare a struct and use offsetof() and offsetofend() macros to refer
+to various values stored within the frame.
+
+Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Link: https://lore.kernel.org/r/20250624121501.50536-3-iii@linux.ibm.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/net/bpf_jit.h | 55 ----------------------------------
+ arch/s390/net/bpf_jit_comp.c | 69 +++++++++++++++++++++++++++++--------------
+ 2 files changed, 47 insertions(+), 77 deletions(-)
+ delete mode 100644 arch/s390/net/bpf_jit.h
+
+--- a/arch/s390/net/bpf_jit.h
++++ /dev/null
+@@ -1,55 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0 */
+-/*
+- * BPF Jit compiler defines
+- *
+- * Copyright IBM Corp. 2012,2015
+- *
+- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+- * Michael Holzheu <holzheu@linux.vnet.ibm.com>
+- */
+-
+-#ifndef __ARCH_S390_NET_BPF_JIT_H
+-#define __ARCH_S390_NET_BPF_JIT_H
+-
+-#ifndef __ASSEMBLY__
+-
+-#include <linux/filter.h>
+-#include <linux/types.h>
+-
+-#endif /* __ASSEMBLY__ */
+-
+-/*
+- * Stackframe layout (packed stack):
+- *
+- * ^ high
+- * +---------------+ |
+- * | old backchain | |
+- * +---------------+ |
+- * | r15 - r6 | |
+- * +---------------+ |
+- * | 4 byte align | |
+- * | tail_call_cnt | |
+- * BFP -> +===============+ |
+- * | | |
+- * | BPF stack | |
+- * | | |
+- * R15+160 -> +---------------+ |
+- * | new backchain | |
+- * R15+152 -> +---------------+ |
+- * | + 152 byte SA | |
+- * R15 -> +---------------+ + low
+- *
+- * We get 160 bytes stack space from calling function, but only use
+- * 12 * 8 byte for old backchain, r15..r6, and tail_call_cnt.
+- *
+- * The stack size used by the BPF program ("BPF stack" above) is passed
+- * via "aux->stack_depth".
+- */
+-#define STK_SPACE_ADD (160)
+-#define STK_160_UNUSED (160 - 12 * 8)
+-#define STK_OFF (STK_SPACE_ADD - STK_160_UNUSED)
+-
+-#define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */
+-#define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */
+-
+-#endif /* __ARCH_S390_NET_BPF_JIT_H */
+--- a/arch/s390/net/bpf_jit_comp.c
++++ b/arch/s390/net/bpf_jit_comp.c
+@@ -32,7 +32,6 @@
+ #include <asm/set_memory.h>
+ #include <asm/text-patching.h>
+ #include <asm/unwind.h>
+-#include "bpf_jit.h"
+
+ struct bpf_jit {
+ u32 seen; /* Flags to remember seen eBPF instructions */
+@@ -56,7 +55,7 @@ struct bpf_jit {
+ int prologue_plt; /* Start of prologue hotpatch PLT */
+ int kern_arena; /* Pool offset of kernel arena address */
+ u64 user_arena; /* User arena address */
+- u32 frame_off; /* Offset of frame from %r15 */
++ u32 frame_off; /* Offset of struct bpf_prog from %r15 */
+ };
+
+ #define SEEN_MEM BIT(0) /* use mem[] for temporary storage */
+@@ -405,11 +404,25 @@ static void jit_fill_hole(void *area, un
+ }
+
+ /*
++ * Caller-allocated part of the frame.
++ * Thanks to packed stack, its otherwise unused initial part can be used for
++ * the BPF stack and for the next frame.
++ */
++struct prog_frame {
++ u64 unused[8];
++ /* BPF stack starts here and grows towards 0 */
++ u32 tail_call_cnt;
++ u32 pad;
++ u64 r6[10]; /* r6 - r15 */
++ u64 backchain;
++} __packed;
++
++/*
+ * Save registers from "rs" (register start) to "re" (register end) on stack
+ */
+ static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
+ {
+- u32 off = STK_OFF_R6 + (rs - 6) * 8;
++ u32 off = offsetof(struct prog_frame, r6) + (rs - 6) * 8;
+
+ if (rs == re)
+ /* stg %rs,off(%r15) */
+@@ -424,7 +437,7 @@ static void save_regs(struct bpf_jit *ji
+ */
+ static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re)
+ {
+- u32 off = jit->frame_off + STK_OFF_R6 + (rs - 6) * 8;
++ u32 off = jit->frame_off + offsetof(struct prog_frame, r6) + (rs - 6) * 8;
+
+ if (rs == re)
+ /* lg %rs,off(%r15) */
+@@ -556,10 +569,12 @@ static void bpf_jit_plt(struct bpf_plt *
+ * Emit function prologue
+ *
+ * Save registers and create stack frame if necessary.
+- * See stack frame layout description in "bpf_jit.h"!
++ * Stack frame layout is described by struct prog_frame.
+ */
+ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp)
+ {
++ BUILD_BUG_ON(sizeof(struct prog_frame) != STACK_FRAME_OVERHEAD);
++
+ /* No-op for hotpatching */
+ /* brcl 0,prologue_plt */
+ EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt);
+@@ -567,8 +582,9 @@ static void bpf_jit_prologue(struct bpf_
+
+ if (!bpf_is_subprog(fp)) {
+ /* Initialize the tail call counter in the main program. */
+- /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
+- _EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
++ /* xc tail_call_cnt(4,%r15),tail_call_cnt(%r15) */
++ _EMIT6(0xd703f000 | offsetof(struct prog_frame, tail_call_cnt),
++ 0xf000 | offsetof(struct prog_frame, tail_call_cnt));
+ } else {
+ /*
+ * Skip the tail call counter initialization in subprograms.
+@@ -611,13 +627,15 @@ static void bpf_jit_prologue(struct bpf_
+ if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
+ /* lgr %w1,%r15 (backchain) */
+ EMIT4(0xb9040000, REG_W1, REG_15);
+- /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
+- EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
++ /* la %bfp,unused_end(%r15) (BPF frame pointer) */
++ EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15,
++ offsetofend(struct prog_frame, unused));
+ /* aghi %r15,-frame_off */
+ EMIT4_IMM(0xa70b0000, REG_15, -jit->frame_off);
+- /* stg %w1,152(%r15) (backchain) */
++ /* stg %w1,backchain(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
+- REG_15, 152);
++ REG_15,
++ offsetof(struct prog_frame, backchain));
+ }
+ }
+
+@@ -1779,9 +1797,10 @@ static noinline int bpf_jit_insn(struct
+ * Note 2: We assume that the verifier does not let us call the
+ * main program, which clears the tail call counter on entry.
+ */
+- /* mvc STK_OFF_TCCNT(4,%r15),frame_off+STK_OFF_TCCNT(%r15) */
+- _EMIT6(0xd203f000 | STK_OFF_TCCNT,
+- 0xf000 | (jit->frame_off + STK_OFF_TCCNT));
++ /* mvc tail_call_cnt(4,%r15),frame_off+tail_call_cnt(%r15) */
++ _EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt),
++ 0xf000 | (jit->frame_off +
++ offsetof(struct prog_frame, tail_call_cnt)));
+
+ /* Sign-extend the kfunc arguments. */
+ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+@@ -1832,7 +1851,8 @@ static noinline int bpf_jit_insn(struct
+ * goto out;
+ */
+
+- off = jit->frame_off + STK_OFF_TCCNT;
++ off = jit->frame_off +
++ offsetof(struct prog_frame, tail_call_cnt);
+ /* lhi %w0,1 */
+ EMIT4_IMM(0xa7080000, REG_W0, 1);
+ /* laal %w1,%w0,off(%r15) */
+@@ -2164,7 +2184,9 @@ static int bpf_jit_prog(struct bpf_jit *
+ jit->prg = 0;
+ jit->excnt = 0;
+ if (is_first_pass(jit) || (jit->seen & SEEN_STACK))
+- jit->frame_off = STK_OFF + round_up(fp->aux->stack_depth, 8);
++ jit->frame_off = sizeof(struct prog_frame) -
++ offsetofend(struct prog_frame, unused) +
++ round_up(fp->aux->stack_depth, 8);
+ else
+ jit->frame_off = 0;
+
+@@ -2647,9 +2669,10 @@ static int __arch_prepare_bpf_trampoline
+ /* stg %r1,backchain_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_1, REG_0, REG_15,
+ tjit->backchain_off);
+- /* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */
++ /* mvc tccnt_off(4,%r15),stack_size+tail_call_cnt(%r15) */
+ _EMIT6(0xd203f000 | tjit->tccnt_off,
+- 0xf000 | (tjit->stack_size + STK_OFF_TCCNT));
++ 0xf000 | (tjit->stack_size +
++ offsetof(struct prog_frame, tail_call_cnt)));
+ /* stmg %r2,%rN,fwd_reg_args_off(%r15) */
+ if (nr_reg_args)
+ EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
+@@ -2786,8 +2809,9 @@ static int __arch_prepare_bpf_trampoline
+ (nr_stack_args * sizeof(u64) - 1) << 16 |
+ tjit->stack_args_off,
+ 0xf000 | tjit->orig_stack_args_off);
+- /* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
+- _EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off);
++ /* mvc tail_call_cnt(4,%r15),tccnt_off(%r15) */
++ _EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt),
++ 0xf000 | tjit->tccnt_off);
+ /* lgr %r1,%r8 */
+ EMIT4(0xb9040000, REG_1, REG_8);
+ /* %r1() */
+@@ -2844,8 +2868,9 @@ static int __arch_prepare_bpf_trampoline
+ if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
+ tjit->retval_off);
+- /* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
+- _EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT),
++ /* mvc stack_size+tail_call_cnt(4,%r15),tccnt_off(%r15) */
++ _EMIT6(0xd203f000 | (tjit->stack_size +
++ offsetof(struct prog_frame, tail_call_cnt)),
+ 0xf000 | tjit->tccnt_off);
+ /* aghi %r15,stack_size */
+ EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
--- /dev/null
+From stable+bounces-186299-greg=kroah.com@vger.kernel.org Fri Oct 17 11:46:12 2025
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+Date: Fri, 17 Oct 2025 11:19:06 +0200
+Subject: s390/bpf: Write back tail call counter for BPF_PSEUDO_CALL
+To: stable@vger.kernel.org
+Cc: Ilya Leoshkevich <iii@linux.ibm.com>, Daniel Borkmann <daniel@iogearbox.net>
+Message-ID: <20251017092550.88640-4-iii@linux.ibm.com>
+
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+
+commit c861a6b147137d10b5ff88a2c492ba376cd1b8b0 upstream.
+
+The tailcall_bpf2bpf_hierarchy_1 test hangs on s390. Its call graph is
+as follows:
+
+ entry()
+ subprog_tail()
+ bpf_tail_call_static(0) -> entry + tail_call_start
+ subprog_tail()
+ bpf_tail_call_static(0) -> entry + tail_call_start
+
+entry() copies its tail call counter to the subprog_tail()'s frame,
+which then increments it. However, the incremented result is discarded,
+leading to an astronomically large number of tail calls.
+
+Fix by writing the incremented counter back to the entry()'s frame.
+
+Fixes: dd691e847d28 ("s390/bpf: Implement bpf_jit_supports_subprog_tailcalls()")
+Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/bpf/20250813121016.163375-3-iii@linux.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/net/bpf_jit_comp.c | 23 ++++++++++++++++-------
+ 1 file changed, 16 insertions(+), 7 deletions(-)
+
+--- a/arch/s390/net/bpf_jit_comp.c
++++ b/arch/s390/net/bpf_jit_comp.c
+@@ -1789,13 +1789,6 @@ static noinline int bpf_jit_insn(struct
+ jit->seen |= SEEN_FUNC;
+ /*
+ * Copy the tail call counter to where the callee expects it.
+- *
+- * Note 1: The callee can increment the tail call counter, but
+- * we do not load it back, since the x86 JIT does not do this
+- * either.
+- *
+- * Note 2: We assume that the verifier does not let us call the
+- * main program, which clears the tail call counter on entry.
+ */
+ /* mvc tail_call_cnt(4,%r15),frame_off+tail_call_cnt(%r15) */
+ _EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt),
+@@ -1822,6 +1815,22 @@ static noinline int bpf_jit_insn(struct
+ call_r1(jit);
+ /* lgr %b0,%r2: load return value into %b0 */
+ EMIT4(0xb9040000, BPF_REG_0, REG_2);
++
++ /*
++ * Copy the potentially updated tail call counter back.
++ */
++
++ if (insn->src_reg == BPF_PSEUDO_CALL)
++ /*
++ * mvc frame_off+tail_call_cnt(%r15),
++ * tail_call_cnt(4,%r15)
++ */
++ _EMIT6(0xd203f000 | (jit->frame_off +
++ offsetof(struct prog_frame,
++ tail_call_cnt)),
++ 0xf000 | offsetof(struct prog_frame,
++ tail_call_cnt));
++
+ break;
+ }
+ case BPF_JMP | BPF_TAIL_CALL: {
--- /dev/null
+From stable+bounces-186300-greg=kroah.com@vger.kernel.org Fri Oct 17 11:45:09 2025
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+Date: Fri, 17 Oct 2025 11:19:07 +0200
+Subject: s390/bpf: Write back tail call counter for BPF_TRAMP_F_CALL_ORIG
+To: stable@vger.kernel.org
+Cc: Ilya Leoshkevich <iii@linux.ibm.com>, Daniel Borkmann <daniel@iogearbox.net>
+Message-ID: <20251017092550.88640-5-iii@linux.ibm.com>
+
+From: Ilya Leoshkevich <iii@linux.ibm.com>
+
+commit bc3905a71f02511607d3ccf732360580209cac4c upstream.
+
+The tailcall_bpf2bpf_hierarchy_fentry test hangs on s390. Its call
+graph is as follows:
+
+ entry()
+ subprog_tail()
+ trampoline()
+ fentry()
+ the rest of subprog_tail() # via BPF_TRAMP_F_CALL_ORIG
+ return to entry()
+
+The problem is that the rest of subprog_tail() increments the tail call
+counter, but the trampoline discards the incremented value. This
+results in an astronomically large number of tail calls.
+
+Fix by making the trampoline write the incremented tail call counter
+back.
+
+Fixes: 528eb2cb87bc ("s390/bpf: Implement arch_prepare_bpf_trampoline()")
+Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://lore.kernel.org/bpf/20250813121016.163375-4-iii@linux.ibm.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/net/bpf_jit_comp.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/s390/net/bpf_jit_comp.c
++++ b/arch/s390/net/bpf_jit_comp.c
+@@ -2828,6 +2828,9 @@ static int __arch_prepare_bpf_trampoline
+ /* stg %r2,retval_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
+ tjit->retval_off);
++ /* mvc tccnt_off(%r15),tail_call_cnt(4,%r15) */
++ _EMIT6(0xd203f000 | tjit->tccnt_off,
++ 0xf000 | offsetof(struct prog_frame, tail_call_cnt));
+
+ im->ip_after_call = jit->prg_buf + jit->prg;
+
mptcp-pm-in-kernel-usable-client-side-with-c-flag.patch
ipmi-rework-user-message-limit-handling.patch
ipmi-fix-handling-of-messages-with-provided-receive-message-pointer.patch
+mm-rmap-fix-soft-dirty-and-uffd-wp-bit-loss-when-remapping-zero-filled-mthp-subpage-to-shared-zeropage.patch
+s390-bpf-centralize-frame-offset-calculations.patch
+s390-bpf-describe-the-frame-using-a-struct-instead-of-constants.patch
+s390-bpf-write-back-tail-call-counter-for-bpf_pseudo_call.patch
+s390-bpf-write-back-tail-call-counter-for-bpf_tramp_f_call_orig.patch