]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 29 Mar 2024 12:43:26 +0000 (13:43 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 29 Mar 2024 12:43:26 +0000 (13:43 +0100)
added patches:
arch-introduce-config_function_alignment.patch
x86-alternatives-introduce-int3_emulate_jcc.patch
x86-alternatives-teach-text_poke_bp-to-patch-jcc.d32-instructions.patch
x86-asm-differentiate-between-code-and-function-alignment.patch
x86-static_call-add-support-for-jcc-tail-calls.patch

queue-5.15/arch-introduce-config_function_alignment.patch [new file with mode: 0644]
queue-5.15/series
queue-5.15/x86-alternatives-introduce-int3_emulate_jcc.patch [new file with mode: 0644]
queue-5.15/x86-alternatives-teach-text_poke_bp-to-patch-jcc.d32-instructions.patch [new file with mode: 0644]
queue-5.15/x86-asm-differentiate-between-code-and-function-alignment.patch [new file with mode: 0644]
queue-5.15/x86-static_call-add-support-for-jcc-tail-calls.patch [new file with mode: 0644]

diff --git a/queue-5.15/arch-introduce-config_function_alignment.patch b/queue-5.15/arch-introduce-config_function_alignment.patch
new file mode 100644 (file)
index 0000000..a89fa6e
--- /dev/null
@@ -0,0 +1,193 @@
+From stable+bounces-27576-greg=kroah.com@vger.kernel.org Wed Mar 13 11:43:35 2024
+From: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
+Date: Wed, 13 Mar 2024 07:42:51 -0300
+Subject: arch: Introduce CONFIG_FUNCTION_ALIGNMENT
+To: stable@vger.kernel.org
+Cc: Peter Zijlstra <peterz@infradead.org>, Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, kernel-dev@igalia.com
+Message-ID: <20240313104255.1083365-2-cascardo@igalia.com>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit d49a0626216b95cd4bf696f6acf55f39a16ab0bb upstream.
+
+Generic function-alignment infrastructure.
+
+Architectures can select FUNCTION_ALIGNMENT_xxB symbols; the
+FUNCTION_ALIGNMENT symbol is then set to the largest such selected
+size, 0 otherwise.
+
+>From this the -falign-functions compiler argument and __ALIGN macro
+are set.
+
+This incorporates the DEBUG_FORCE_FUNCTION_ALIGN_64B knob and future
+alignment requirements for x86_64 (later in this series) into a single
+place.
+
+NOTE: also removes the 0x90 filler byte from the generic __ALIGN
+      primitive, that value makes no sense outside of x86.
+
+NOTE: .balign 0 reverts to a no-op.
+
+Requested-by: Linus Torvalds <torvalds@linux-foundation.org>
+Change-Id: I053b3c408d56988381feb8c8bdb5e27ea221755f
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20220915111143.719248727@infradead.org
+[cascardo: adjust context at arch/x86/Kconfig]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Makefile                           |    4 ++--
+ arch/Kconfig                       |   24 ++++++++++++++++++++++++
+ arch/ia64/Kconfig                  |    1 +
+ arch/ia64/Makefile                 |    2 +-
+ arch/x86/Kconfig                   |    2 ++
+ arch/x86/boot/compressed/head_64.S |    8 ++++++++
+ arch/x86/include/asm/linkage.h     |    4 +---
+ include/asm-generic/vmlinux.lds.h  |    4 ++--
+ include/linux/linkage.h            |    4 ++--
+ lib/Kconfig.debug                  |    1 +
+ 10 files changed, 44 insertions(+), 10 deletions(-)
+
+--- a/Makefile
++++ b/Makefile
+@@ -1000,8 +1000,8 @@ KBUILD_CFLAGS    += $(CC_FLAGS_CFI)
+ export CC_FLAGS_CFI
+ endif
+-ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B
+-KBUILD_CFLAGS += -falign-functions=64
++ifneq ($(CONFIG_FUNCTION_ALIGNMENT),0)
++KBUILD_CFLAGS += -falign-functions=$(CONFIG_FUNCTION_ALIGNMENT)
+ endif
+ # arch Makefile may override CC so keep this after arch Makefile is included
+--- a/arch/Kconfig
++++ b/arch/Kconfig
+@@ -1303,4 +1303,28 @@ source "kernel/gcov/Kconfig"
+ source "scripts/gcc-plugins/Kconfig"
++config FUNCTION_ALIGNMENT_4B
++      bool
++
++config FUNCTION_ALIGNMENT_8B
++      bool
++
++config FUNCTION_ALIGNMENT_16B
++      bool
++
++config FUNCTION_ALIGNMENT_32B
++      bool
++
++config FUNCTION_ALIGNMENT_64B
++      bool
++
++config FUNCTION_ALIGNMENT
++      int
++      default 64 if FUNCTION_ALIGNMENT_64B
++      default 32 if FUNCTION_ALIGNMENT_32B
++      default 16 if FUNCTION_ALIGNMENT_16B
++      default 8 if FUNCTION_ALIGNMENT_8B
++      default 4 if FUNCTION_ALIGNMENT_4B
++      default 0
++
+ endmenu
+--- a/arch/ia64/Kconfig
++++ b/arch/ia64/Kconfig
+@@ -63,6 +63,7 @@ config IA64
+       select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
+       select SET_FS
+       select ZONE_DMA32
++      select FUNCTION_ALIGNMENT_32B
+       default y
+       help
+         The Itanium Processor Family is Intel's 64-bit successor to
+--- a/arch/ia64/Makefile
++++ b/arch/ia64/Makefile
+@@ -23,7 +23,7 @@ KBUILD_AFLAGS_KERNEL := -mconstant-gp
+ EXTRA         :=
+ cflags-y      := -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \
+-                 -falign-functions=32 -frename-registers -fno-optimize-sibling-calls
++                 -frename-registers -fno-optimize-sibling-calls
+ KBUILD_CFLAGS_KERNEL := -mconstant-gp
+ GAS_STATUS    = $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)")
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -267,6 +267,8 @@ config X86
+       select HAVE_ARCH_KCSAN                  if X86_64
+       select X86_FEATURE_NAMES                if PROC_FS
+       select PROC_PID_ARCH_STATUS             if PROC_FS
++      select FUNCTION_ALIGNMENT_16B           if X86_64 || X86_ALIGNMENT_16
++      select FUNCTION_ALIGNMENT_4B
+       imply IMA_SECURE_AND_OR_TRUSTED_BOOT    if EFI
+ config INSTRUCTION_DECODER
+--- a/arch/x86/boot/compressed/head_64.S
++++ b/arch/x86/boot/compressed/head_64.S
+@@ -38,6 +38,14 @@
+ #include "pgtable.h"
+ /*
++ * Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result
++ * in assembly errors due to trying to move .org backward due to the excessive
++ * alignment.
++ */
++#undef __ALIGN
++#define __ALIGN               .balign 16, 0x90
++
++/*
+  * Locally defined symbols should be marked hidden:
+  */
+       .hidden _bss
+--- a/arch/x86/include/asm/linkage.h
++++ b/arch/x86/include/asm/linkage.h
+@@ -13,10 +13,8 @@
+ #ifdef __ASSEMBLY__
+-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16)
+-#define __ALIGN               .p2align 4, 0x90
++#define __ALIGN               .balign CONFIG_FUNCTION_ALIGNMENT, 0x90;
+ #define __ALIGN_STR   __stringify(__ALIGN)
+-#endif
+ #if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+ #define RET   jmp __x86_return_thunk
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -81,8 +81,8 @@
+ #define RO_EXCEPTION_TABLE
+ #endif
+-/* Align . to a 8 byte boundary equals to maximum function alignment. */
+-#define ALIGN_FUNCTION()  . = ALIGN(8)
++/* Align . function alignment. */
++#define ALIGN_FUNCTION()  . = ALIGN(CONFIG_FUNCTION_ALIGNMENT)
+ /*
+  * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections, which
+--- a/include/linux/linkage.h
++++ b/include/linux/linkage.h
+@@ -69,8 +69,8 @@
+ #endif
+ #ifndef __ALIGN
+-#define __ALIGN               .align 4,0x90
+-#define __ALIGN_STR   ".align 4,0x90"
++#define __ALIGN                       .balign CONFIG_FUNCTION_ALIGNMENT
++#define __ALIGN_STR           __stringify(__ALIGN)
+ #endif
+ #ifdef __ASSEMBLY__
+--- a/lib/Kconfig.debug
++++ b/lib/Kconfig.debug
+@@ -424,6 +424,7 @@ config SECTION_MISMATCH_WARN_ONLY
+ config DEBUG_FORCE_FUNCTION_ALIGN_64B
+       bool "Force all function address 64B aligned"
+       depends on EXPERT && (X86_64 || ARM64 || PPC32 || PPC64 || ARC)
++      select FUNCTION_ALIGNMENT_64B
+       help
+         There are cases that a commit from one domain changes the function
+         address alignment of other domains, and cause magic performance
index a73bf3f4d9b4076b47676dceab42c256da03e7a8..ed87d19bd0598c05d9e3dcb3ffc1671152bed96e 100644 (file)
@@ -160,3 +160,8 @@ x86-mmio-disable-kvm-mitigation-when-x86_feature_clear_cpu_buf-is-set.patch
 documentation-hw-vuln-add-documentation-for-rfds.patch
 x86-rfds-mitigate-register-file-data-sampling-rfds.patch
 kvm-x86-export-rfds_no-and-rfds_clear-to-guests.patch
+arch-introduce-config_function_alignment.patch
+x86-asm-differentiate-between-code-and-function-alignment.patch
+x86-alternatives-introduce-int3_emulate_jcc.patch
+x86-alternatives-teach-text_poke_bp-to-patch-jcc.d32-instructions.patch
+x86-static_call-add-support-for-jcc-tail-calls.patch
diff --git a/queue-5.15/x86-alternatives-introduce-int3_emulate_jcc.patch b/queue-5.15/x86-alternatives-introduce-int3_emulate_jcc.patch
new file mode 100644 (file)
index 0000000..4491ba2
--- /dev/null
@@ -0,0 +1,135 @@
+From stable+bounces-27578-greg=kroah.com@vger.kernel.org Wed Mar 13 11:43:36 2024
+From: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
+Date: Wed, 13 Mar 2024 07:42:53 -0300
+Subject: x86/alternatives: Introduce int3_emulate_jcc()
+To: stable@vger.kernel.org
+Cc: Peter Zijlstra <peterz@infradead.org>, Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, kernel-dev@igalia.com
+Message-ID: <20240313104255.1083365-4-cascardo@igalia.com>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit db7adcfd1cec4e95155e37bc066fddab302c6340 upstream.
+
+Move the kprobe Jcc emulation into int3_emulate_jcc() so it can be
+used by more code -- specifically static_call() will need this.
+
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Link: https://lore.kernel.org/r/20230123210607.057678245@infradead.org
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/text-patching.h |   31 ++++++++++++++++++++++++++++
+ arch/x86/kernel/kprobes/core.c       |   38 +++++++----------------------------
+ 2 files changed, 39 insertions(+), 30 deletions(-)
+
+--- a/arch/x86/include/asm/text-patching.h
++++ b/arch/x86/include/asm/text-patching.h
+@@ -181,6 +181,37 @@ void int3_emulate_ret(struct pt_regs *re
+       unsigned long ip = int3_emulate_pop(regs);
+       int3_emulate_jmp(regs, ip);
+ }
++
++static __always_inline
++void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp)
++{
++      static const unsigned long jcc_mask[6] = {
++              [0] = X86_EFLAGS_OF,
++              [1] = X86_EFLAGS_CF,
++              [2] = X86_EFLAGS_ZF,
++              [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF,
++              [4] = X86_EFLAGS_SF,
++              [5] = X86_EFLAGS_PF,
++      };
++
++      bool invert = cc & 1;
++      bool match;
++
++      if (cc < 0xc) {
++              match = regs->flags & jcc_mask[cc >> 1];
++      } else {
++              match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
++                      ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
++              if (cc >= 0xe)
++                      match = match || (regs->flags & X86_EFLAGS_ZF);
++      }
++
++      if ((match && !invert) || (!match && invert))
++              ip += disp;
++
++      int3_emulate_jmp(regs, ip);
++}
++
+ #endif /* !CONFIG_UML_X86 */
+ #endif /* _ASM_X86_TEXT_PATCHING_H */
+--- a/arch/x86/kernel/kprobes/core.c
++++ b/arch/x86/kernel/kprobes/core.c
+@@ -463,50 +463,26 @@ static void kprobe_emulate_call(struct k
+ }
+ NOKPROBE_SYMBOL(kprobe_emulate_call);
+-static nokprobe_inline
+-void __kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs, bool cond)
++static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs)
+ {
+       unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
+-      if (cond)
+-              ip += p->ainsn.rel32;
++      ip += p->ainsn.rel32;
+       int3_emulate_jmp(regs, ip);
+ }
+-
+-static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs)
+-{
+-      __kprobe_emulate_jmp(p, regs, true);
+-}
+ NOKPROBE_SYMBOL(kprobe_emulate_jmp);
+-static const unsigned long jcc_mask[6] = {
+-      [0] = X86_EFLAGS_OF,
+-      [1] = X86_EFLAGS_CF,
+-      [2] = X86_EFLAGS_ZF,
+-      [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF,
+-      [4] = X86_EFLAGS_SF,
+-      [5] = X86_EFLAGS_PF,
+-};
+-
+ static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs)
+ {
+-      bool invert = p->ainsn.jcc.type & 1;
+-      bool match;
++      unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
+-      if (p->ainsn.jcc.type < 0xc) {
+-              match = regs->flags & jcc_mask[p->ainsn.jcc.type >> 1];
+-      } else {
+-              match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
+-                      ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
+-              if (p->ainsn.jcc.type >= 0xe)
+-                      match = match || (regs->flags & X86_EFLAGS_ZF);
+-      }
+-      __kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert));
++      int3_emulate_jcc(regs, p->ainsn.jcc.type, ip, p->ainsn.rel32);
+ }
+ NOKPROBE_SYMBOL(kprobe_emulate_jcc);
+ static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs)
+ {
++      unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
+       bool match;
+       if (p->ainsn.loop.type != 3) {  /* LOOP* */
+@@ -534,7 +510,9 @@ static void kprobe_emulate_loop(struct k
+       else if (p->ainsn.loop.type == 1)       /* LOOPE */
+               match = match && (regs->flags & X86_EFLAGS_ZF);
+-      __kprobe_emulate_jmp(p, regs, match);
++      if (match)
++              ip += p->ainsn.rel32;
++      int3_emulate_jmp(regs, ip);
+ }
+ NOKPROBE_SYMBOL(kprobe_emulate_loop);
diff --git a/queue-5.15/x86-alternatives-teach-text_poke_bp-to-patch-jcc.d32-instructions.patch b/queue-5.15/x86-alternatives-teach-text_poke_bp-to-patch-jcc.d32-instructions.patch
new file mode 100644 (file)
index 0000000..9840bdb
--- /dev/null
@@ -0,0 +1,178 @@
+From stable+bounces-27579-greg=kroah.com@vger.kernel.org Wed Mar 13 11:43:41 2024
+From: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
+Date: Wed, 13 Mar 2024 07:42:54 -0300
+Subject: x86/alternatives: Teach text_poke_bp() to patch Jcc.d32 instructions
+To: stable@vger.kernel.org
+Cc: Peter Zijlstra <peterz@infradead.org>, Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, kernel-dev@igalia.com
+Message-ID: <20240313104255.1083365-5-cascardo@igalia.com>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit ac0ee0a9560c97fa5fe1409e450c2425d4ebd17a upstream.
+
+In order to re-write Jcc.d32 instructions text_poke_bp() needs to be
+taught about them.
+
+The biggest hurdle is that the whole machinery is currently made for 5
+byte instructions and extending this would grow struct text_poke_loc
+which is currently a nice 16 bytes and used in an array.
+
+However, since text_poke_loc contains a full copy of the (s32)
+displacement, it is possible to map the Jcc.d32 2 byte opcodes to
+Jcc.d8 1 byte opcode for the int3 emulation.
+
+This then leaves the replacement bytes; fudge that by only storing the
+last 5 bytes and adding the rule that 'length == 6' instruction will
+be prefixed with a 0x0f byte.
+
+Change-Id: Ie3f72c6b92f865d287c8940e5a87e59d41cfaa27
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Link: https://lore.kernel.org/r/20230123210607.115718513@infradead.org
+[cascardo: there is no emit_call_track_retpoline]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/alternative.c |   56 +++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 47 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -351,6 +351,12 @@ next:
+       kasan_enable_current();
+ }
++static inline bool is_jcc32(struct insn *insn)
++{
++      /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
++      return insn->opcode.bytes[0] == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80;
++}
++
+ #if defined(CONFIG_RETPOLINE) && defined(CONFIG_STACK_VALIDATION)
+ /*
+@@ -1201,6 +1207,11 @@ void text_poke_sync(void)
+       on_each_cpu(do_sync_core, NULL, 1);
+ }
++/*
++ * NOTE: crazy scheme to allow patching Jcc.d32 but not increase the size of
++ * this thing. When len == 6 everything is prefixed with 0x0f and we map
++ * opcode to Jcc.d8, using len to distinguish.
++ */
+ struct text_poke_loc {
+       /* addr := _stext + rel_addr */
+       s32 rel_addr;
+@@ -1322,6 +1333,10 @@ noinstr int poke_int3_handler(struct pt_
+               int3_emulate_jmp(regs, (long)ip + tp->disp);
+               break;
++      case 0x70 ... 0x7f: /* Jcc */
++              int3_emulate_jcc(regs, tp->opcode & 0xf, (long)ip, tp->disp);
++              break;
++
+       default:
+               BUG();
+       }
+@@ -1395,16 +1410,26 @@ static void text_poke_bp_batch(struct te
+        * Second step: update all but the first byte of the patched range.
+        */
+       for (do_sync = 0, i = 0; i < nr_entries; i++) {
+-              u8 old[POKE_MAX_OPCODE_SIZE] = { tp[i].old, };
++              u8 old[POKE_MAX_OPCODE_SIZE+1] = { tp[i].old, };
++              u8 _new[POKE_MAX_OPCODE_SIZE+1];
++              const u8 *new = tp[i].text;
+               int len = tp[i].len;
+               if (len - INT3_INSN_SIZE > 0) {
+                       memcpy(old + INT3_INSN_SIZE,
+                              text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
+                              len - INT3_INSN_SIZE);
++
++                      if (len == 6) {
++                              _new[0] = 0x0f;
++                              memcpy(_new + 1, new, 5);
++                              new = _new;
++                      }
++
+                       text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
+-                                (const char *)tp[i].text + INT3_INSN_SIZE,
++                                new + INT3_INSN_SIZE,
+                                 len - INT3_INSN_SIZE);
++
+                       do_sync++;
+               }
+@@ -1432,8 +1457,7 @@ static void text_poke_bp_batch(struct te
+                * The old instruction is recorded so that the event can be
+                * processed forwards or backwards.
+                */
+-              perf_event_text_poke(text_poke_addr(&tp[i]), old, len,
+-                                   tp[i].text, len);
++              perf_event_text_poke(text_poke_addr(&tp[i]), old, len, new, len);
+       }
+       if (do_sync) {
+@@ -1450,10 +1474,15 @@ static void text_poke_bp_batch(struct te
+        * replacing opcode.
+        */
+       for (do_sync = 0, i = 0; i < nr_entries; i++) {
+-              if (tp[i].text[0] == INT3_INSN_OPCODE)
++              u8 byte = tp[i].text[0];
++
++              if (tp[i].len == 6)
++                      byte = 0x0f;
++
++              if (byte == INT3_INSN_OPCODE)
+                       continue;
+-              text_poke(text_poke_addr(&tp[i]), tp[i].text, INT3_INSN_SIZE);
++              text_poke(text_poke_addr(&tp[i]), &byte, INT3_INSN_SIZE);
+               do_sync++;
+       }
+@@ -1471,9 +1500,11 @@ static void text_poke_loc_init(struct te
+                              const void *opcode, size_t len, const void *emulate)
+ {
+       struct insn insn;
+-      int ret, i;
++      int ret, i = 0;
+-      memcpy((void *)tp->text, opcode, len);
++      if (len == 6)
++              i = 1;
++      memcpy((void *)tp->text, opcode+i, len-i);
+       if (!emulate)
+               emulate = opcode;
+@@ -1484,6 +1515,13 @@ static void text_poke_loc_init(struct te
+       tp->len = len;
+       tp->opcode = insn.opcode.bytes[0];
++      if (is_jcc32(&insn)) {
++              /*
++               * Map Jcc.d32 onto Jcc.d8 and use len to distinguish.
++               */
++              tp->opcode = insn.opcode.bytes[1] - 0x10;
++      }
++
+       switch (tp->opcode) {
+       case RET_INSN_OPCODE:
+       case JMP32_INSN_OPCODE:
+@@ -1500,7 +1538,6 @@ static void text_poke_loc_init(struct te
+               BUG_ON(len != insn.length);
+       };
+-
+       switch (tp->opcode) {
+       case INT3_INSN_OPCODE:
+       case RET_INSN_OPCODE:
+@@ -1509,6 +1546,7 @@ static void text_poke_loc_init(struct te
+       case CALL_INSN_OPCODE:
+       case JMP32_INSN_OPCODE:
+       case JMP8_INSN_OPCODE:
++      case 0x70 ... 0x7f: /* Jcc */
+               tp->disp = insn.immediate.value;
+               break;
diff --git a/queue-5.15/x86-asm-differentiate-between-code-and-function-alignment.patch b/queue-5.15/x86-asm-differentiate-between-code-and-function-alignment.patch
new file mode 100644 (file)
index 0000000..11b54c8
--- /dev/null
@@ -0,0 +1,52 @@
+From stable+bounces-27577-greg=kroah.com@vger.kernel.org Wed Mar 13 11:43:38 2024
+From: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
+Date: Wed, 13 Mar 2024 07:42:52 -0300
+Subject: x86/asm: Differentiate between code and function alignment
+To: stable@vger.kernel.org
+Cc: Peter Zijlstra <peterz@infradead.org>, Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, kernel-dev@igalia.com
+Message-ID: <20240313104255.1083365-3-cascardo@igalia.com>
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 8eb5d34e77c63fde8af21c691bcf6e3cd87f7829 upstream.
+
+Create SYM_F_ALIGN to differentiate alignment requirements between
+SYM_CODE and SYM_FUNC.
+
+This distinction is useful later when adding padding in front of
+functions; IOW this allows following the compiler's
+patchable-function-entry option.
+
+[peterz: Changelog]
+
+Change-Id: I4f9bc0507e5c3fdb3e0839806989efc305e0a758
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20220915111143.824822743@infradead.org
+[cascardo: adjust for missing commit c4691712b546 ("x86/linkage: Add ENDBR to SYM_FUNC_START*()")]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/linkage.h |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/linkage.h
++++ b/arch/x86/include/asm/linkage.h
+@@ -11,11 +11,15 @@
+ #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
+ #endif /* CONFIG_X86_32 */
+-#ifdef __ASSEMBLY__
+-
+ #define __ALIGN               .balign CONFIG_FUNCTION_ALIGNMENT, 0x90;
+ #define __ALIGN_STR   __stringify(__ALIGN)
++#define ASM_FUNC_ALIGN                __ALIGN_STR
++#define __FUNC_ALIGN          __ALIGN
++#define SYM_F_ALIGN           __FUNC_ALIGN
++
++#ifdef __ASSEMBLY__
++
+ #if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+ #define RET   jmp __x86_return_thunk
+ #else /* CONFIG_RETPOLINE */
diff --git a/queue-5.15/x86-static_call-add-support-for-jcc-tail-calls.patch b/queue-5.15/x86-static_call-add-support-for-jcc-tail-calls.patch
new file mode 100644 (file)
index 0000000..f3b51e0
--- /dev/null
@@ -0,0 +1,132 @@
+From stable+bounces-27580-greg=kroah.com@vger.kernel.org Wed Mar 13 11:43:43 2024
+From: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
+Date: Wed, 13 Mar 2024 07:42:55 -0300
+Subject: x86/static_call: Add support for Jcc tail-calls
+To: stable@vger.kernel.org
+Cc: Peter Zijlstra <peterz@infradead.org>, Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@kernel.org>, kernel-dev@igalia.com
+Message-ID: <20240313104255.1083365-6-cascardo@igalia.com>
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 923510c88d2b7d947c4217835fd9ca6bd65cc56c upstream.
+
+Clang likes to create conditional tail calls like:
+
+  0000000000000350 <amd_pmu_add_event>:
+  350:       0f 1f 44 00 00          nopl   0x0(%rax,%rax,1) 351: R_X86_64_NONE      __fentry__-0x4
+  355:       48 83 bf 20 01 00 00 00         cmpq   $0x0,0x120(%rdi)
+  35d:       0f 85 00 00 00 00       jne    363 <amd_pmu_add_event+0x13>     35f: R_X86_64_PLT32     __SCT__amd_pmu_branch_add-0x4
+  363:       e9 00 00 00 00          jmp    368 <amd_pmu_add_event+0x18>     364: R_X86_64_PLT32     __x86_return_thunk-0x4
+
+Where 0x35d is a static call site that's turned into a conditional
+tail-call using the Jcc class of instructions.
+
+Teach the in-line static call text patching about this.
+
+Notably, since there is no conditional-ret, in that case patch the Jcc
+to point at an empty stub function that does the ret -- or the return
+thunk when needed.
+
+Reported-by: "Erhard F." <erhard_f@mailbox.org>
+Change-Id: I99c8fc3f721e5d1c74f06710b38d4bac5230303a
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Link: https://lore.kernel.org/r/Y9Kdg9QjHkr9G5b5@hirez.programming.kicks-ass.net
+[cascardo: __static_call_validate didn't have the bool tramp argument]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/static_call.c |   50 +++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 47 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kernel/static_call.c
++++ b/arch/x86/kernel/static_call.c
+@@ -9,6 +9,7 @@ enum insn_type {
+       NOP = 1,  /* site cond-call */
+       JMP = 2,  /* tramp / site tail-call */
+       RET = 3,  /* tramp / site cond-tail-call */
++      JCC = 4,
+ };
+ /*
+@@ -25,12 +26,40 @@ static const u8 xor5rax[] = { 0x2e, 0x2e
+ static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc };
++static u8 __is_Jcc(u8 *insn) /* Jcc.d32 */
++{
++      u8 ret = 0;
++
++      if (insn[0] == 0x0f) {
++              u8 tmp = insn[1];
++              if ((tmp & 0xf0) == 0x80)
++                      ret = tmp;
++      }
++
++      return ret;
++}
++
++extern void __static_call_return(void);
++
++asm (".global __static_call_return\n\t"
++     ".type __static_call_return, @function\n\t"
++     ASM_FUNC_ALIGN "\n\t"
++     "__static_call_return:\n\t"
++     ANNOTATE_NOENDBR
++     ANNOTATE_RETPOLINE_SAFE
++     "ret; int3\n\t"
++     ".size __static_call_return, . - __static_call_return \n\t");
++
+ static void __ref __static_call_transform(void *insn, enum insn_type type,
+                                         void *func, bool modinit)
+ {
+       const void *emulate = NULL;
+       int size = CALL_INSN_SIZE;
+       const void *code;
++      u8 op, buf[6];
++
++      if ((type == JMP || type == RET) && (op = __is_Jcc(insn)))
++              type = JCC;
+       switch (type) {
+       case CALL:
+@@ -56,6 +85,20 @@ static void __ref __static_call_transfor
+               else
+                       code = &retinsn;
+               break;
++
++      case JCC:
++              if (!func) {
++                      func = __static_call_return;
++                      if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
++                              func = x86_return_thunk;
++              }
++
++              buf[0] = 0x0f;
++              __text_gen_insn(buf+1, op, insn+1, func, 5);
++              code = buf;
++              size = 6;
++
++              break;
+       }
+       if (memcmp(insn, code, size) == 0)
+@@ -67,13 +110,14 @@ static void __ref __static_call_transfor
+       text_poke_bp(insn, code, size, emulate);
+ }
+-static void __static_call_validate(void *insn, bool tail)
++static void __static_call_validate(u8 *insn, bool tail)
+ {
+-      u8 opcode = *(u8 *)insn;
++      u8 opcode = insn[0];
+       if (tail) {
+               if (opcode == JMP32_INSN_OPCODE ||
+-                  opcode == RET_INSN_OPCODE)
++                  opcode == RET_INSN_OPCODE ||
++                  __is_Jcc(insn))
+                       return;
+       } else {
+               if (opcode == CALL_INSN_OPCODE ||