From: Greg Kroah-Hartman Date: Fri, 17 Oct 2025 12:00:41 +0000 (+0200) Subject: 6.12-stable patches X-Git-Tag: v5.15.195~25 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=23b9c49dc6e841d1a922b7ce4b8f7819ab13fe26;p=thirdparty%2Fkernel%2Fstable-queue.git 6.12-stable patches added patches: mm-rmap-fix-soft-dirty-and-uffd-wp-bit-loss-when-remapping-zero-filled-mthp-subpage-to-shared-zeropage.patch s390-bpf-centralize-frame-offset-calculations.patch s390-bpf-describe-the-frame-using-a-struct-instead-of-constants.patch s390-bpf-write-back-tail-call-counter-for-bpf_pseudo_call.patch s390-bpf-write-back-tail-call-counter-for-bpf_tramp_f_call_orig.patch --- diff --git a/queue-6.12/mm-rmap-fix-soft-dirty-and-uffd-wp-bit-loss-when-remapping-zero-filled-mthp-subpage-to-shared-zeropage.patch b/queue-6.12/mm-rmap-fix-soft-dirty-and-uffd-wp-bit-loss-when-remapping-zero-filled-mthp-subpage-to-shared-zeropage.patch new file mode 100644 index 0000000000..5e0efe4a16 --- /dev/null +++ b/queue-6.12/mm-rmap-fix-soft-dirty-and-uffd-wp-bit-loss-when-remapping-zero-filled-mthp-subpage-to-shared-zeropage.patch @@ -0,0 +1,110 @@ +From 9658d698a8a83540bf6a6c80d13c9a61590ee985 Mon Sep 17 00:00:00 2001 +From: Lance Yang +Date: Tue, 30 Sep 2025 16:10:40 +0800 +Subject: mm/rmap: fix soft-dirty and uffd-wp bit loss when remapping zero-filled mTHP subpage to shared zeropage + +From: Lance Yang + +commit 9658d698a8a83540bf6a6c80d13c9a61590ee985 upstream. + +When splitting an mTHP and replacing a zero-filled subpage with the shared +zeropage, try_to_map_unused_to_zeropage() currently drops several +important PTE bits. + +For userspace tools like CRIU, which rely on the soft-dirty mechanism for +incremental snapshots, losing the soft-dirty bit means modified pages are +missed, leading to inconsistent memory state after restore. + +As pointed out by David, the more critical uffd-wp bit is also dropped. +This breaks the userfaultfd write-protection mechanism, causing writes to +be silently missed by monitoring applications, which can lead to data +corruption. + +Preserve both the soft-dirty and uffd-wp bits from the old PTE when +creating the new zeropage mapping to ensure they are correctly tracked. + +Link: https://lkml.kernel.org/r/20250930081040.80926-1-lance.yang@linux.dev +Fixes: b1f202060afe ("mm: remap unused subpages to shared zeropage when splitting isolated thp") +Signed-off-by: Lance Yang +Suggested-by: David Hildenbrand +Suggested-by: Dev Jain +Acked-by: David Hildenbrand +Reviewed-by: Dev Jain +Acked-by: Zi Yan +Reviewed-by: Liam R. Howlett +Reviewed-by: Harry Yoo +Cc: Alistair Popple +Cc: Baolin Wang +Cc: Barry Song +Cc: Byungchul Park +Cc: Gregory Price +Cc: "Huang, Ying" +Cc: Jann Horn +Cc: Joshua Hahn +Cc: Lorenzo Stoakes +Cc: Mariano Pache +Cc: Mathew Brost +Cc: Peter Xu +Cc: Rakie Kim +Cc: Rik van Riel +Cc: Ryan Roberts +Cc: Usama Arif +Cc: Vlastimil Babka +Cc: Yu Zhao +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/migrate.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -198,8 +198,7 @@ bool isolate_folio_to_list(struct folio + } + + static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw, +- struct folio *folio, +- unsigned long idx) ++ struct folio *folio, pte_t old_pte, unsigned long idx) + { + struct page *page = folio_page(folio, idx); + pte_t newpte; +@@ -208,7 +207,7 @@ static bool try_to_map_unused_to_zeropag + return false; + VM_BUG_ON_PAGE(!PageAnon(page), page); + VM_BUG_ON_PAGE(!PageLocked(page), page); +- VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page); ++ VM_BUG_ON_PAGE(pte_present(old_pte), page); + + if (folio_test_mlocked(folio) || (pvmw->vma->vm_flags & VM_LOCKED) || + mm_forbids_zeropage(pvmw->vma->vm_mm)) +@@ -224,6 +223,12 @@ static bool try_to_map_unused_to_zeropag + + newpte = pte_mkspecial(pfn_pte(my_zero_pfn(pvmw->address), + pvmw->vma->vm_page_prot)); ++ ++ if (pte_swp_soft_dirty(old_pte)) ++ newpte = pte_mksoft_dirty(newpte); ++ if (pte_swp_uffd_wp(old_pte)) ++ newpte = pte_mkuffd_wp(newpte); ++ + set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte, newpte); + + dec_mm_counter(pvmw->vma->vm_mm, mm_counter(folio)); +@@ -266,13 +271,13 @@ static bool remove_migration_pte(struct + continue; + } + #endif ++ old_pte = ptep_get(pvmw.pte); + if (rmap_walk_arg->map_unused_to_zeropage && +- try_to_map_unused_to_zeropage(&pvmw, folio, idx)) ++ try_to_map_unused_to_zeropage(&pvmw, folio, old_pte, idx)) + continue; + + folio_get(folio); + pte = mk_pte(new, READ_ONCE(vma->vm_page_prot)); +- old_pte = ptep_get(pvmw.pte); + + entry = pte_to_swp_entry(old_pte); + if (!is_migration_entry_young(entry)) diff --git a/queue-6.12/s390-bpf-centralize-frame-offset-calculations.patch b/queue-6.12/s390-bpf-centralize-frame-offset-calculations.patch new file mode 100644 index 0000000000..fe5661befe --- /dev/null +++ b/queue-6.12/s390-bpf-centralize-frame-offset-calculations.patch @@ -0,0 +1,226 @@ +From stable+bounces-186297-greg=kroah.com@vger.kernel.org Fri Oct 17 11:36:47 2025 +From: Ilya Leoshkevich +Date: Fri, 17 Oct 2025 11:19:04 +0200 +Subject: s390/bpf: Centralize frame offset calculations +To: stable@vger.kernel.org +Cc: Ilya Leoshkevich , Alexei Starovoitov +Message-ID: <20251017092550.88640-2-iii@linux.ibm.com> + +From: Ilya Leoshkevich + +commit b2268d550d20ff860bddfe3a91b1aec00414689a upstream. + +The calculation of the distance from %r15 to the caller-allocated +portion of the stack frame is copy-pasted into multiple places in the +JIT code. + +Move it to bpf_jit_prog() and save the result into bpf_jit::frame_off, +so that the other parts of the JIT can use it. + +Signed-off-by: Ilya Leoshkevich +Link: https://lore.kernel.org/r/20250624121501.50536-2-iii@linux.ibm.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/net/bpf_jit_comp.c | 56 +++++++++++++++++++------------------------ + 1 file changed, 26 insertions(+), 30 deletions(-) + +--- a/arch/s390/net/bpf_jit_comp.c ++++ b/arch/s390/net/bpf_jit_comp.c +@@ -56,6 +56,7 @@ struct bpf_jit { + int prologue_plt; /* Start of prologue hotpatch PLT */ + int kern_arena; /* Pool offset of kernel arena address */ + u64 user_arena; /* User arena address */ ++ u32 frame_off; /* Offset of frame from %r15 */ + }; + + #define SEEN_MEM BIT(0) /* use mem[] for temporary storage */ +@@ -421,12 +422,9 @@ static void save_regs(struct bpf_jit *ji + /* + * Restore registers from "rs" (register start) to "re" (register end) on stack + */ +-static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re, u32 stack_depth) ++static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re) + { +- u32 off = STK_OFF_R6 + (rs - 6) * 8; +- +- if (jit->seen & SEEN_STACK) +- off += STK_OFF + stack_depth; ++ u32 off = jit->frame_off + STK_OFF_R6 + (rs - 6) * 8; + + if (rs == re) + /* lg %rs,off(%r15) */ +@@ -470,8 +468,7 @@ static int get_end(u16 seen_regs, int st + * Save and restore clobbered registers (6-15) on stack. + * We save/restore registers in chunks with gap >= 2 registers. + */ +-static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth, +- u16 extra_regs) ++static void save_restore_regs(struct bpf_jit *jit, int op, u16 extra_regs) + { + u16 seen_regs = jit->seen_regs | extra_regs; + const int last = 15, save_restore_size = 6; +@@ -494,7 +491,7 @@ static void save_restore_regs(struct bpf + if (op == REGS_SAVE) + save_regs(jit, rs, re); + else +- restore_regs(jit, rs, re, stack_depth); ++ restore_regs(jit, rs, re); + re++; + } while (re <= last); + } +@@ -561,8 +558,7 @@ static void bpf_jit_plt(struct bpf_plt * + * Save registers and create stack frame if necessary. + * See stack frame layout description in "bpf_jit.h"! + */ +-static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp, +- u32 stack_depth) ++static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp) + { + /* No-op for hotpatching */ + /* brcl 0,prologue_plt */ +@@ -595,7 +591,7 @@ static void bpf_jit_prologue(struct bpf_ + jit->seen_regs |= NVREGS; + } else { + /* Save registers */ +- save_restore_regs(jit, REGS_SAVE, stack_depth, ++ save_restore_regs(jit, REGS_SAVE, + fp->aux->exception_boundary ? NVREGS : 0); + } + /* Setup literal pool */ +@@ -617,8 +613,8 @@ static void bpf_jit_prologue(struct bpf_ + EMIT4(0xb9040000, REG_W1, REG_15); + /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */ + EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED); +- /* aghi %r15,-STK_OFF */ +- EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth)); ++ /* aghi %r15,-frame_off */ ++ EMIT4_IMM(0xa70b0000, REG_15, -jit->frame_off); + /* stg %w1,152(%r15) (backchain) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, + REG_15, 152); +@@ -665,13 +661,13 @@ static void call_r1(struct bpf_jit *jit) + /* + * Function epilogue + */ +-static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) ++static void bpf_jit_epilogue(struct bpf_jit *jit) + { + jit->exit_ip = jit->prg; + /* Load exit code: lgr %r2,%b0 */ + EMIT4(0xb9040000, REG_2, BPF_REG_0); + /* Restore registers */ +- save_restore_regs(jit, REGS_RESTORE, stack_depth, 0); ++ save_restore_regs(jit, REGS_RESTORE, 0); + if (nospec_uses_trampoline()) { + jit->r14_thunk_ip = jit->prg; + /* Generate __s390_indirect_jump_r14 thunk */ +@@ -862,7 +858,7 @@ static int sign_extend(struct bpf_jit *j + * stack space for the large switch statement. + */ + static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, +- int i, bool extra_pass, u32 stack_depth) ++ int i, bool extra_pass) + { + struct bpf_insn *insn = &fp->insnsi[i]; + s32 branch_oc_off = insn->off; +@@ -1783,9 +1779,9 @@ static noinline int bpf_jit_insn(struct + * Note 2: We assume that the verifier does not let us call the + * main program, which clears the tail call counter on entry. + */ +- /* mvc STK_OFF_TCCNT(4,%r15),N(%r15) */ ++ /* mvc STK_OFF_TCCNT(4,%r15),frame_off+STK_OFF_TCCNT(%r15) */ + _EMIT6(0xd203f000 | STK_OFF_TCCNT, +- 0xf000 | (STK_OFF_TCCNT + STK_OFF + stack_depth)); ++ 0xf000 | (jit->frame_off + STK_OFF_TCCNT)); + + /* Sign-extend the kfunc arguments. */ + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { +@@ -1836,10 +1832,7 @@ static noinline int bpf_jit_insn(struct + * goto out; + */ + +- if (jit->seen & SEEN_STACK) +- off = STK_OFF_TCCNT + STK_OFF + stack_depth; +- else +- off = STK_OFF_TCCNT; ++ off = jit->frame_off + STK_OFF_TCCNT; + /* lhi %w0,1 */ + EMIT4_IMM(0xa7080000, REG_W0, 1); + /* laal %w1,%w0,off(%r15) */ +@@ -1869,7 +1862,7 @@ static noinline int bpf_jit_insn(struct + /* + * Restore registers before calling function + */ +- save_restore_regs(jit, REGS_RESTORE, stack_depth, 0); ++ save_restore_regs(jit, REGS_RESTORE, 0); + + /* + * goto *(prog->bpf_func + tail_call_start); +@@ -2161,7 +2154,7 @@ static int bpf_set_addr(struct bpf_jit * + * Compile eBPF program into s390x code + */ + static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp, +- bool extra_pass, u32 stack_depth) ++ bool extra_pass) + { + int i, insn_count, lit32_size, lit64_size; + u64 kern_arena; +@@ -2170,24 +2163,28 @@ static int bpf_jit_prog(struct bpf_jit * + jit->lit64 = jit->lit64_start; + jit->prg = 0; + jit->excnt = 0; ++ if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) ++ jit->frame_off = STK_OFF + round_up(fp->aux->stack_depth, 8); ++ else ++ jit->frame_off = 0; + + kern_arena = bpf_arena_get_kern_vm_start(fp->aux->arena); + if (kern_arena) + jit->kern_arena = _EMIT_CONST_U64(kern_arena); + jit->user_arena = bpf_arena_get_user_vm_start(fp->aux->arena); + +- bpf_jit_prologue(jit, fp, stack_depth); ++ bpf_jit_prologue(jit, fp); + if (bpf_set_addr(jit, 0) < 0) + return -1; + for (i = 0; i < fp->len; i += insn_count) { +- insn_count = bpf_jit_insn(jit, fp, i, extra_pass, stack_depth); ++ insn_count = bpf_jit_insn(jit, fp, i, extra_pass); + if (insn_count < 0) + return -1; + /* Next instruction address */ + if (bpf_set_addr(jit, i + insn_count) < 0) + return -1; + } +- bpf_jit_epilogue(jit, stack_depth); ++ bpf_jit_epilogue(jit); + + lit32_size = jit->lit32 - jit->lit32_start; + lit64_size = jit->lit64 - jit->lit64_start; +@@ -2263,7 +2260,6 @@ static struct bpf_binary_header *bpf_jit + */ + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) + { +- u32 stack_depth = round_up(fp->aux->stack_depth, 8); + struct bpf_prog *tmp, *orig_fp = fp; + struct bpf_binary_header *header; + struct s390_jit_data *jit_data; +@@ -2316,7 +2312,7 @@ struct bpf_prog *bpf_int_jit_compile(str + * - 3: Calculate program size and addrs array + */ + for (pass = 1; pass <= 3; pass++) { +- if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) { ++ if (bpf_jit_prog(&jit, fp, extra_pass)) { + fp = orig_fp; + goto free_addrs; + } +@@ -2330,7 +2326,7 @@ struct bpf_prog *bpf_int_jit_compile(str + goto free_addrs; + } + skip_init_ctx: +- if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) { ++ if (bpf_jit_prog(&jit, fp, extra_pass)) { + bpf_jit_binary_free(header); + fp = orig_fp; + goto free_addrs; diff --git a/queue-6.12/s390-bpf-describe-the-frame-using-a-struct-instead-of-constants.patch b/queue-6.12/s390-bpf-describe-the-frame-using-a-struct-instead-of-constants.patch new file mode 100644 index 0000000000..8b3f14834a --- /dev/null +++ b/queue-6.12/s390-bpf-describe-the-frame-using-a-struct-instead-of-constants.patch @@ -0,0 +1,260 @@ +From stable+bounces-186298-greg=kroah.com@vger.kernel.org Fri Oct 17 11:39:54 2025 +From: Ilya Leoshkevich +Date: Fri, 17 Oct 2025 11:19:05 +0200 +Subject: s390/bpf: Describe the frame using a struct instead of constants +To: stable@vger.kernel.org +Cc: Ilya Leoshkevich , Alexei Starovoitov +Message-ID: <20251017092550.88640-3-iii@linux.ibm.com> + +From: Ilya Leoshkevich + +commit e26d523edf2a62b142d2dd2dd9b87f61ed92f33a upstream. + +Currently the caller-allocated portion of the stack frame is described +using constants, hardcoded values, and an ASCII drawing, making it +harder than necessary to ensure that everything is in sync. + +Declare a struct and use offsetof() and offsetofend() macros to refer +to various values stored within the frame. + +Signed-off-by: Ilya Leoshkevich +Link: https://lore.kernel.org/r/20250624121501.50536-3-iii@linux.ibm.com +Signed-off-by: Alexei Starovoitov +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/net/bpf_jit.h | 55 ---------------------------------- + arch/s390/net/bpf_jit_comp.c | 69 +++++++++++++++++++++++++++++-------------- + 2 files changed, 47 insertions(+), 77 deletions(-) + delete mode 100644 arch/s390/net/bpf_jit.h + +--- a/arch/s390/net/bpf_jit.h ++++ /dev/null +@@ -1,55 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0 */ +-/* +- * BPF Jit compiler defines +- * +- * Copyright IBM Corp. 2012,2015 +- * +- * Author(s): Martin Schwidefsky +- * Michael Holzheu +- */ +- +-#ifndef __ARCH_S390_NET_BPF_JIT_H +-#define __ARCH_S390_NET_BPF_JIT_H +- +-#ifndef __ASSEMBLY__ +- +-#include +-#include +- +-#endif /* __ASSEMBLY__ */ +- +-/* +- * Stackframe layout (packed stack): +- * +- * ^ high +- * +---------------+ | +- * | old backchain | | +- * +---------------+ | +- * | r15 - r6 | | +- * +---------------+ | +- * | 4 byte align | | +- * | tail_call_cnt | | +- * BFP -> +===============+ | +- * | | | +- * | BPF stack | | +- * | | | +- * R15+160 -> +---------------+ | +- * | new backchain | | +- * R15+152 -> +---------------+ | +- * | + 152 byte SA | | +- * R15 -> +---------------+ + low +- * +- * We get 160 bytes stack space from calling function, but only use +- * 12 * 8 byte for old backchain, r15..r6, and tail_call_cnt. +- * +- * The stack size used by the BPF program ("BPF stack" above) is passed +- * via "aux->stack_depth". +- */ +-#define STK_SPACE_ADD (160) +-#define STK_160_UNUSED (160 - 12 * 8) +-#define STK_OFF (STK_SPACE_ADD - STK_160_UNUSED) +- +-#define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */ +-#define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */ +- +-#endif /* __ARCH_S390_NET_BPF_JIT_H */ +--- a/arch/s390/net/bpf_jit_comp.c ++++ b/arch/s390/net/bpf_jit_comp.c +@@ -32,7 +32,6 @@ + #include + #include + #include +-#include "bpf_jit.h" + + struct bpf_jit { + u32 seen; /* Flags to remember seen eBPF instructions */ +@@ -56,7 +55,7 @@ struct bpf_jit { + int prologue_plt; /* Start of prologue hotpatch PLT */ + int kern_arena; /* Pool offset of kernel arena address */ + u64 user_arena; /* User arena address */ +- u32 frame_off; /* Offset of frame from %r15 */ ++ u32 frame_off; /* Offset of struct bpf_prog from %r15 */ + }; + + #define SEEN_MEM BIT(0) /* use mem[] for temporary storage */ +@@ -405,11 +404,25 @@ static void jit_fill_hole(void *area, un + } + + /* ++ * Caller-allocated part of the frame. ++ * Thanks to packed stack, its otherwise unused initial part can be used for ++ * the BPF stack and for the next frame. ++ */ ++struct prog_frame { ++ u64 unused[8]; ++ /* BPF stack starts here and grows towards 0 */ ++ u32 tail_call_cnt; ++ u32 pad; ++ u64 r6[10]; /* r6 - r15 */ ++ u64 backchain; ++} __packed; ++ ++/* + * Save registers from "rs" (register start) to "re" (register end) on stack + */ + static void save_regs(struct bpf_jit *jit, u32 rs, u32 re) + { +- u32 off = STK_OFF_R6 + (rs - 6) * 8; ++ u32 off = offsetof(struct prog_frame, r6) + (rs - 6) * 8; + + if (rs == re) + /* stg %rs,off(%r15) */ +@@ -424,7 +437,7 @@ static void save_regs(struct bpf_jit *ji + */ + static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re) + { +- u32 off = jit->frame_off + STK_OFF_R6 + (rs - 6) * 8; ++ u32 off = jit->frame_off + offsetof(struct prog_frame, r6) + (rs - 6) * 8; + + if (rs == re) + /* lg %rs,off(%r15) */ +@@ -556,10 +569,12 @@ static void bpf_jit_plt(struct bpf_plt * + * Emit function prologue + * + * Save registers and create stack frame if necessary. +- * See stack frame layout description in "bpf_jit.h"! ++ * Stack frame layout is described by struct prog_frame. + */ + static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp) + { ++ BUILD_BUG_ON(sizeof(struct prog_frame) != STACK_FRAME_OVERHEAD); ++ + /* No-op for hotpatching */ + /* brcl 0,prologue_plt */ + EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt); +@@ -567,8 +582,9 @@ static void bpf_jit_prologue(struct bpf_ + + if (!bpf_is_subprog(fp)) { + /* Initialize the tail call counter in the main program. */ +- /* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */ +- _EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT); ++ /* xc tail_call_cnt(4,%r15),tail_call_cnt(%r15) */ ++ _EMIT6(0xd703f000 | offsetof(struct prog_frame, tail_call_cnt), ++ 0xf000 | offsetof(struct prog_frame, tail_call_cnt)); + } else { + /* + * Skip the tail call counter initialization in subprograms. +@@ -611,13 +627,15 @@ static void bpf_jit_prologue(struct bpf_ + if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) { + /* lgr %w1,%r15 (backchain) */ + EMIT4(0xb9040000, REG_W1, REG_15); +- /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */ +- EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED); ++ /* la %bfp,unused_end(%r15) (BPF frame pointer) */ ++ EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, ++ offsetofend(struct prog_frame, unused)); + /* aghi %r15,-frame_off */ + EMIT4_IMM(0xa70b0000, REG_15, -jit->frame_off); +- /* stg %w1,152(%r15) (backchain) */ ++ /* stg %w1,backchain(%r15) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, +- REG_15, 152); ++ REG_15, ++ offsetof(struct prog_frame, backchain)); + } + } + +@@ -1779,9 +1797,10 @@ static noinline int bpf_jit_insn(struct + * Note 2: We assume that the verifier does not let us call the + * main program, which clears the tail call counter on entry. + */ +- /* mvc STK_OFF_TCCNT(4,%r15),frame_off+STK_OFF_TCCNT(%r15) */ +- _EMIT6(0xd203f000 | STK_OFF_TCCNT, +- 0xf000 | (jit->frame_off + STK_OFF_TCCNT)); ++ /* mvc tail_call_cnt(4,%r15),frame_off+tail_call_cnt(%r15) */ ++ _EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt), ++ 0xf000 | (jit->frame_off + ++ offsetof(struct prog_frame, tail_call_cnt))); + + /* Sign-extend the kfunc arguments. */ + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { +@@ -1832,7 +1851,8 @@ static noinline int bpf_jit_insn(struct + * goto out; + */ + +- off = jit->frame_off + STK_OFF_TCCNT; ++ off = jit->frame_off + ++ offsetof(struct prog_frame, tail_call_cnt); + /* lhi %w0,1 */ + EMIT4_IMM(0xa7080000, REG_W0, 1); + /* laal %w1,%w0,off(%r15) */ +@@ -2164,7 +2184,9 @@ static int bpf_jit_prog(struct bpf_jit * + jit->prg = 0; + jit->excnt = 0; + if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) +- jit->frame_off = STK_OFF + round_up(fp->aux->stack_depth, 8); ++ jit->frame_off = sizeof(struct prog_frame) - ++ offsetofend(struct prog_frame, unused) + ++ round_up(fp->aux->stack_depth, 8); + else + jit->frame_off = 0; + +@@ -2647,9 +2669,10 @@ static int __arch_prepare_bpf_trampoline + /* stg %r1,backchain_off(%r15) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_1, REG_0, REG_15, + tjit->backchain_off); +- /* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */ ++ /* mvc tccnt_off(4,%r15),stack_size+tail_call_cnt(%r15) */ + _EMIT6(0xd203f000 | tjit->tccnt_off, +- 0xf000 | (tjit->stack_size + STK_OFF_TCCNT)); ++ 0xf000 | (tjit->stack_size + ++ offsetof(struct prog_frame, tail_call_cnt))); + /* stmg %r2,%rN,fwd_reg_args_off(%r15) */ + if (nr_reg_args) + EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2, +@@ -2786,8 +2809,9 @@ static int __arch_prepare_bpf_trampoline + (nr_stack_args * sizeof(u64) - 1) << 16 | + tjit->stack_args_off, + 0xf000 | tjit->orig_stack_args_off); +- /* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */ +- _EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off); ++ /* mvc tail_call_cnt(4,%r15),tccnt_off(%r15) */ ++ _EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt), ++ 0xf000 | tjit->tccnt_off); + /* lgr %r1,%r8 */ + EMIT4(0xb9040000, REG_1, REG_8); + /* %r1() */ +@@ -2844,8 +2868,9 @@ static int __arch_prepare_bpf_trampoline + if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET)) + EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15, + tjit->retval_off); +- /* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */ +- _EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT), ++ /* mvc stack_size+tail_call_cnt(4,%r15),tccnt_off(%r15) */ ++ _EMIT6(0xd203f000 | (tjit->stack_size + ++ offsetof(struct prog_frame, tail_call_cnt)), + 0xf000 | tjit->tccnt_off); + /* aghi %r15,stack_size */ + EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size); diff --git a/queue-6.12/s390-bpf-write-back-tail-call-counter-for-bpf_pseudo_call.patch b/queue-6.12/s390-bpf-write-back-tail-call-counter-for-bpf_pseudo_call.patch new file mode 100644 index 0000000000..9536f3a1e7 --- /dev/null +++ b/queue-6.12/s390-bpf-write-back-tail-call-counter-for-bpf_pseudo_call.patch @@ -0,0 +1,75 @@ +From stable+bounces-186299-greg=kroah.com@vger.kernel.org Fri Oct 17 11:46:12 2025 +From: Ilya Leoshkevich +Date: Fri, 17 Oct 2025 11:19:06 +0200 +Subject: s390/bpf: Write back tail call counter for BPF_PSEUDO_CALL +To: stable@vger.kernel.org +Cc: Ilya Leoshkevich , Daniel Borkmann +Message-ID: <20251017092550.88640-4-iii@linux.ibm.com> + +From: Ilya Leoshkevich + +commit c861a6b147137d10b5ff88a2c492ba376cd1b8b0 upstream. + +The tailcall_bpf2bpf_hierarchy_1 test hangs on s390. Its call graph is +as follows: + + entry() + subprog_tail() + bpf_tail_call_static(0) -> entry + tail_call_start + subprog_tail() + bpf_tail_call_static(0) -> entry + tail_call_start + +entry() copies its tail call counter to the subprog_tail()'s frame, +which then increments it. However, the incremented result is discarded, +leading to an astronomically large number of tail calls. + +Fix by writing the incremented counter back to the entry()'s frame. + +Fixes: dd691e847d28 ("s390/bpf: Implement bpf_jit_supports_subprog_tailcalls()") +Signed-off-by: Ilya Leoshkevich +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20250813121016.163375-3-iii@linux.ibm.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/net/bpf_jit_comp.c | 23 ++++++++++++++++------- + 1 file changed, 16 insertions(+), 7 deletions(-) + +--- a/arch/s390/net/bpf_jit_comp.c ++++ b/arch/s390/net/bpf_jit_comp.c +@@ -1789,13 +1789,6 @@ static noinline int bpf_jit_insn(struct + jit->seen |= SEEN_FUNC; + /* + * Copy the tail call counter to where the callee expects it. +- * +- * Note 1: The callee can increment the tail call counter, but +- * we do not load it back, since the x86 JIT does not do this +- * either. +- * +- * Note 2: We assume that the verifier does not let us call the +- * main program, which clears the tail call counter on entry. + */ + /* mvc tail_call_cnt(4,%r15),frame_off+tail_call_cnt(%r15) */ + _EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt), +@@ -1822,6 +1815,22 @@ static noinline int bpf_jit_insn(struct + call_r1(jit); + /* lgr %b0,%r2: load return value into %b0 */ + EMIT4(0xb9040000, BPF_REG_0, REG_2); ++ ++ /* ++ * Copy the potentially updated tail call counter back. ++ */ ++ ++ if (insn->src_reg == BPF_PSEUDO_CALL) ++ /* ++ * mvc frame_off+tail_call_cnt(%r15), ++ * tail_call_cnt(4,%r15) ++ */ ++ _EMIT6(0xd203f000 | (jit->frame_off + ++ offsetof(struct prog_frame, ++ tail_call_cnt)), ++ 0xf000 | offsetof(struct prog_frame, ++ tail_call_cnt)); ++ + break; + } + case BPF_JMP | BPF_TAIL_CALL: { diff --git a/queue-6.12/s390-bpf-write-back-tail-call-counter-for-bpf_tramp_f_call_orig.patch b/queue-6.12/s390-bpf-write-back-tail-call-counter-for-bpf_tramp_f_call_orig.patch new file mode 100644 index 0000000000..b6b6d4f7e5 --- /dev/null +++ b/queue-6.12/s390-bpf-write-back-tail-call-counter-for-bpf_tramp_f_call_orig.patch @@ -0,0 +1,50 @@ +From stable+bounces-186300-greg=kroah.com@vger.kernel.org Fri Oct 17 11:45:09 2025 +From: Ilya Leoshkevich +Date: Fri, 17 Oct 2025 11:19:07 +0200 +Subject: s390/bpf: Write back tail call counter for BPF_TRAMP_F_CALL_ORIG +To: stable@vger.kernel.org +Cc: Ilya Leoshkevich , Daniel Borkmann +Message-ID: <20251017092550.88640-5-iii@linux.ibm.com> + +From: Ilya Leoshkevich + +commit bc3905a71f02511607d3ccf732360580209cac4c upstream. + +The tailcall_bpf2bpf_hierarchy_fentry test hangs on s390. Its call +graph is as follows: + + entry() + subprog_tail() + trampoline() + fentry() + the rest of subprog_tail() # via BPF_TRAMP_F_CALL_ORIG + return to entry() + +The problem is that the rest of subprog_tail() increments the tail call +counter, but the trampoline discards the incremented value. This +results in an astronomically large number of tail calls. + +Fix by making the trampoline write the incremented tail call counter +back. + +Fixes: 528eb2cb87bc ("s390/bpf: Implement arch_prepare_bpf_trampoline()") +Signed-off-by: Ilya Leoshkevich +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20250813121016.163375-4-iii@linux.ibm.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/net/bpf_jit_comp.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/s390/net/bpf_jit_comp.c ++++ b/arch/s390/net/bpf_jit_comp.c +@@ -2828,6 +2828,9 @@ static int __arch_prepare_bpf_trampoline + /* stg %r2,retval_off(%r15) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15, + tjit->retval_off); ++ /* mvc tccnt_off(%r15),tail_call_cnt(4,%r15) */ ++ _EMIT6(0xd203f000 | tjit->tccnt_off, ++ 0xf000 | offsetof(struct prog_frame, tail_call_cnt)); + + im->ip_after_call = jit->prg_buf + jit->prg; + diff --git a/queue-6.12/series b/queue-6.12/series index 15bdf4ad91..95bb15cb5e 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -255,3 +255,8 @@ acpi-property-do-not-pass-null-handles-to-acpi_attach_data.patch mptcp-pm-in-kernel-usable-client-side-with-c-flag.patch ipmi-rework-user-message-limit-handling.patch ipmi-fix-handling-of-messages-with-provided-receive-message-pointer.patch +mm-rmap-fix-soft-dirty-and-uffd-wp-bit-loss-when-remapping-zero-filled-mthp-subpage-to-shared-zeropage.patch +s390-bpf-centralize-frame-offset-calculations.patch +s390-bpf-describe-the-frame-using-a-struct-instead-of-constants.patch +s390-bpf-write-back-tail-call-counter-for-bpf_pseudo_call.patch +s390-bpf-write-back-tail-call-counter-for-bpf_tramp_f_call_orig.patch