From 5db8076a94647250909bf494ad456c547430f837 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 4 Aug 2023 11:56:33 +0200 Subject: [PATCH] 5.10-stable patches added patches: kprobes-x86-fix-fall-through-warnings-for-clang.patch x86-kprobes-do-not-decode-opcode-in-resume_execution.patch x86-kprobes-fix-jng-jnle-emulation.patch x86-kprobes-fix-to-identify-indirect-jmp-and-others-using-range-case.patch x86-kprobes-identify-far-indirect-jmp-correctly.patch x86-kprobes-move-inline-to-the-beginning-of-the-kprobe_is_ss-declaration.patch x86-kprobes-retrieve-correct-opcode-for-group-instruction.patch x86-kprobes-update-kcb-status-flag-after-singlestepping.patch x86-kprobes-use-int3-instead-of-debug-trap-for-single-step.patch --- ...-fix-fall-through-warnings-for-clang.patch | 35 + queue-5.10/series | 9 + ...ot-decode-opcode-in-resume_execution.patch | 291 +++++++ .../x86-kprobes-fix-jng-jnle-emulation.patch | 44 + ...rect-jmp-and-others-using-range-case.patch | 88 ++ ...-identify-far-indirect-jmp-correctly.patch | 41 + ...ning-of-the-kprobe_is_ss-declaration.patch | 44 + ...correct-opcode-for-group-instruction.patch | 44 + ...kcb-status-flag-after-singlestepping.patch | 63 ++ ...nstead-of-debug-trap-for-single-step.patch | 749 ++++++++++++++++++ 10 files changed, 1408 insertions(+) create mode 100644 queue-5.10/kprobes-x86-fix-fall-through-warnings-for-clang.patch create mode 100644 queue-5.10/x86-kprobes-do-not-decode-opcode-in-resume_execution.patch create mode 100644 queue-5.10/x86-kprobes-fix-jng-jnle-emulation.patch create mode 100644 queue-5.10/x86-kprobes-fix-to-identify-indirect-jmp-and-others-using-range-case.patch create mode 100644 queue-5.10/x86-kprobes-identify-far-indirect-jmp-correctly.patch create mode 100644 queue-5.10/x86-kprobes-move-inline-to-the-beginning-of-the-kprobe_is_ss-declaration.patch create mode 100644 queue-5.10/x86-kprobes-retrieve-correct-opcode-for-group-instruction.patch create mode 100644 queue-5.10/x86-kprobes-update-kcb-status-flag-after-singlestepping.patch create mode 100644 queue-5.10/x86-kprobes-use-int3-instead-of-debug-trap-for-single-step.patch diff --git a/queue-5.10/kprobes-x86-fix-fall-through-warnings-for-clang.patch b/queue-5.10/kprobes-x86-fix-fall-through-warnings-for-clang.patch new file mode 100644 index 00000000000..571f9ab7296 --- /dev/null +++ b/queue-5.10/kprobes-x86-fix-fall-through-warnings-for-clang.patch @@ -0,0 +1,35 @@ +From stable-owner@vger.kernel.org Wed Jul 5 08:47:50 2023 +From: Li Huafei +Date: Wed, 5 Jul 2023 14:46:45 +0800 +Subject: kprobes/x86: Fix fall-through warnings for Clang +To: +Cc: , , , , , , , , , , , , , , , , , , , +Message-ID: <20230705064653.226811-2-lihuafei1@huawei.com> + +From: "Gustavo A. R. Silva" + +[ Upstream commit e689b300c99ca2dd80d3f662e19499bba27cda09 ] + +In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning +by explicitly adding a break statement instead of just letting the code +fall through to the next case. + +Signed-off-by: Gustavo A. R. Silva +Signed-off-by: Peter Zijlstra (Intel) +Link: https://github.com/KSPP/linux/issues/115 +Signed-off-by: Li Huafei +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/kprobes/core.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kernel/kprobes/core.c ++++ b/arch/x86/kernel/kprobes/core.c +@@ -880,6 +880,7 @@ static void resume_execution(struct kpro + p->ainsn.boostable = true; + goto no_change; + } ++ break; + default: + break; + } diff --git a/queue-5.10/series b/queue-5.10/series index 288d22c8062..a96fe7acb97 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -110,3 +110,12 @@ cpufreq-intel_pstate-drop-acpi-_pss-states-table-patching.patch selftests-mptcp-depend-on-syn_cookies.patch io_uring-treat-eagain-for-req_f_nowait-as-final-for-io-wq.patch asoc-cs42l51-fix-driver-to-properly-autoload-with-automatic-module-loading.patch +kprobes-x86-fix-fall-through-warnings-for-clang.patch +x86-kprobes-do-not-decode-opcode-in-resume_execution.patch +x86-kprobes-retrieve-correct-opcode-for-group-instruction.patch +x86-kprobes-identify-far-indirect-jmp-correctly.patch +x86-kprobes-use-int3-instead-of-debug-trap-for-single-step.patch +x86-kprobes-fix-to-identify-indirect-jmp-and-others-using-range-case.patch +x86-kprobes-move-inline-to-the-beginning-of-the-kprobe_is_ss-declaration.patch +x86-kprobes-update-kcb-status-flag-after-singlestepping.patch +x86-kprobes-fix-jng-jnle-emulation.patch diff --git a/queue-5.10/x86-kprobes-do-not-decode-opcode-in-resume_execution.patch b/queue-5.10/x86-kprobes-do-not-decode-opcode-in-resume_execution.patch new file mode 100644 index 00000000000..4b532b78d71 --- /dev/null +++ b/queue-5.10/x86-kprobes-do-not-decode-opcode-in-resume_execution.patch @@ -0,0 +1,291 @@ +From stable-owner@vger.kernel.org Wed Jul 5 08:47:51 2023 +From: Li Huafei +Date: Wed, 5 Jul 2023 14:46:46 +0800 +Subject: x86/kprobes: Do not decode opcode in resume_execution() +To: +Cc: , , , , , , , , , , , , , , , , , , , +Message-ID: <20230705064653.226811-3-lihuafei1@huawei.com> + +From: Masami Hiramatsu + +[ Upstream commit abd82e533d88df1521e3da6799b83ce88852ab88 ] + +Currently, kprobes decodes the opcode right after single-stepping in +resume_execution(). But the opcode was already decoded while preparing +arch_specific_insn in arch_copy_kprobe(). + +Decode the opcode in arch_copy_kprobe() instead of in resume_execution() +and set some flags which classify the opcode for the resuming process. + + [ bp: Massage commit message. ] + +Signed-off-by: Masami Hiramatsu +Signed-off-by: Borislav Petkov +Acked-by: Steven Rostedt (VMware) +Link: https://lkml.kernel.org/r/160830072561.349576.3014979564448023213.stgit@devnote2 +Signed-off-by: Li Huafei +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/kprobes.h | 11 +- + arch/x86/kernel/kprobes/core.c | 168 ++++++++++++++++++----------------------- + 2 files changed, 81 insertions(+), 98 deletions(-) + +--- a/arch/x86/include/asm/kprobes.h ++++ b/arch/x86/include/asm/kprobes.h +@@ -58,14 +58,17 @@ struct arch_specific_insn { + /* copy of the original instruction */ + kprobe_opcode_t *insn; + /* +- * boostable = false: This instruction type is not boostable. +- * boostable = true: This instruction has been boosted: we have ++ * boostable = 0: This instruction type is not boostable. ++ * boostable = 1: This instruction has been boosted: we have + * added a relative jump after the instruction copy in insn, + * so no single-step and fixup are needed (unless there's + * a post_handler). + */ +- bool boostable; +- bool if_modifier; ++ unsigned boostable:1; ++ unsigned if_modifier:1; ++ unsigned is_call:1; ++ unsigned is_pushf:1; ++ unsigned is_abs_ip:1; + /* Number of bytes of text poked */ + int tp_len; + }; +--- a/arch/x86/kernel/kprobes/core.c ++++ b/arch/x86/kernel/kprobes/core.c +@@ -134,26 +134,6 @@ void synthesize_relcall(void *dest, void + NOKPROBE_SYMBOL(synthesize_relcall); + + /* +- * Skip the prefixes of the instruction. +- */ +-static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn) +-{ +- insn_attr_t attr; +- +- attr = inat_get_opcode_attribute((insn_byte_t)*insn); +- while (inat_is_legacy_prefix(attr)) { +- insn++; +- attr = inat_get_opcode_attribute((insn_byte_t)*insn); +- } +-#ifdef CONFIG_X86_64 +- if (inat_is_rex_prefix(attr)) +- insn++; +-#endif +- return insn; +-} +-NOKPROBE_SYMBOL(skip_prefixes); +- +-/* + * Returns non-zero if INSN is boostable. + * RIP relative instructions are adjusted at copying time in 64 bits mode + */ +@@ -327,25 +307,6 @@ static int can_probe(unsigned long paddr + } + + /* +- * Returns non-zero if opcode modifies the interrupt flag. +- */ +-static int is_IF_modifier(kprobe_opcode_t *insn) +-{ +- /* Skip prefixes */ +- insn = skip_prefixes(insn); +- +- switch (*insn) { +- case 0xfa: /* cli */ +- case 0xfb: /* sti */ +- case 0xcf: /* iret/iretd */ +- case 0x9d: /* popf/popfd */ +- return 1; +- } +- +- return 0; +-} +- +-/* + * Copy an instruction with recovering modified instruction by kprobes + * and adjust the displacement if the instruction uses the %rip-relative + * addressing mode. Note that since @real will be the final place of copied +@@ -427,9 +388,9 @@ static int prepare_boost(kprobe_opcode_t + synthesize_reljump(buf + len, p->ainsn.insn + len, + p->addr + insn->length); + len += JMP32_INSN_SIZE; +- p->ainsn.boostable = true; ++ p->ainsn.boostable = 1; + } else { +- p->ainsn.boostable = false; ++ p->ainsn.boostable = 0; + } + + return len; +@@ -466,6 +427,67 @@ void free_insn_page(void *page) + module_memfree(page); + } + ++static void set_resume_flags(struct kprobe *p, struct insn *insn) ++{ ++ insn_byte_t opcode = insn->opcode.bytes[0]; ++ ++ switch (opcode) { ++ case 0xfa: /* cli */ ++ case 0xfb: /* sti */ ++ case 0x9d: /* popf/popfd */ ++ /* Check whether the instruction modifies Interrupt Flag or not */ ++ p->ainsn.if_modifier = 1; ++ break; ++ case 0x9c: /* pushfl */ ++ p->ainsn.is_pushf = 1; ++ break; ++ case 0xcf: /* iret */ ++ p->ainsn.if_modifier = 1; ++ fallthrough; ++ case 0xc2: /* ret/lret */ ++ case 0xc3: ++ case 0xca: ++ case 0xcb: ++ case 0xea: /* jmp absolute -- ip is correct */ ++ /* ip is already adjusted, no more changes required */ ++ p->ainsn.is_abs_ip = 1; ++ /* Without resume jump, this is boostable */ ++ p->ainsn.boostable = 1; ++ break; ++ case 0xe8: /* call relative - Fix return addr */ ++ p->ainsn.is_call = 1; ++ break; ++#ifdef CONFIG_X86_32 ++ case 0x9a: /* call absolute -- same as call absolute, indirect */ ++ p->ainsn.is_call = 1; ++ p->ainsn.is_abs_ip = 1; ++ break; ++#endif ++ case 0xff: ++ opcode = insn->opcode.bytes[1]; ++ if ((opcode & 0x30) == 0x10) { ++ /* ++ * call absolute, indirect ++ * Fix return addr; ip is correct. ++ * But this is not boostable ++ */ ++ p->ainsn.is_call = 1; ++ p->ainsn.is_abs_ip = 1; ++ break; ++ } else if (((opcode & 0x31) == 0x20) || ++ ((opcode & 0x31) == 0x21)) { ++ /* ++ * jmp near and far, absolute indirect ++ * ip is correct. ++ */ ++ p->ainsn.is_abs_ip = 1; ++ /* Without resume jump, this is boostable */ ++ p->ainsn.boostable = 1; ++ } ++ break; ++ } ++} ++ + static int arch_copy_kprobe(struct kprobe *p) + { + struct insn insn; +@@ -483,8 +505,8 @@ static int arch_copy_kprobe(struct kprob + */ + len = prepare_boost(buf, p, &insn); + +- /* Check whether the instruction modifies Interrupt Flag or not */ +- p->ainsn.if_modifier = is_IF_modifier(buf); ++ /* Analyze the opcode and set resume flags */ ++ set_resume_flags(p, &insn); + + /* Also, displacement change doesn't affect the first byte */ + p->opcode = buf[0]; +@@ -507,6 +529,9 @@ int arch_prepare_kprobe(struct kprobe *p + + if (!can_probe((unsigned long)p->addr)) + return -EILSEQ; ++ ++ memset(&p->ainsn, 0, sizeof(p->ainsn)); ++ + /* insn: must be on special executable page on x86. */ + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) +@@ -822,11 +847,6 @@ NOKPROBE_SYMBOL(trampoline_handler); + * 2) If the single-stepped instruction was a call, the return address + * that is atop the stack is the address following the copied instruction. + * We need to make it the address following the original instruction. +- * +- * If this is the first time we've single-stepped the instruction at +- * this probepoint, and the instruction is boostable, boost it: add a +- * jump instruction after the copied instruction, that jumps to the next +- * instruction after the probepoint. + */ + static void resume_execution(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +@@ -834,60 +854,20 @@ static void resume_execution(struct kpro + unsigned long *tos = stack_addr(regs); + unsigned long copy_ip = (unsigned long)p->ainsn.insn; + unsigned long orig_ip = (unsigned long)p->addr; +- kprobe_opcode_t *insn = p->ainsn.insn; +- +- /* Skip prefixes */ +- insn = skip_prefixes(insn); + + regs->flags &= ~X86_EFLAGS_TF; +- switch (*insn) { +- case 0x9c: /* pushfl */ ++ ++ /* Fixup the contents of top of stack */ ++ if (p->ainsn.is_pushf) { + *tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF); + *tos |= kcb->kprobe_old_flags; +- break; +- case 0xc2: /* iret/ret/lret */ +- case 0xc3: +- case 0xca: +- case 0xcb: +- case 0xcf: +- case 0xea: /* jmp absolute -- ip is correct */ +- /* ip is already adjusted, no more changes required */ +- p->ainsn.boostable = true; +- goto no_change; +- case 0xe8: /* call relative - Fix return addr */ ++ } else if (p->ainsn.is_call) { + *tos = orig_ip + (*tos - copy_ip); +- break; +-#ifdef CONFIG_X86_32 +- case 0x9a: /* call absolute -- same as call absolute, indirect */ +- *tos = orig_ip + (*tos - copy_ip); +- goto no_change; +-#endif +- case 0xff: +- if ((insn[1] & 0x30) == 0x10) { +- /* +- * call absolute, indirect +- * Fix return addr; ip is correct. +- * But this is not boostable +- */ +- *tos = orig_ip + (*tos - copy_ip); +- goto no_change; +- } else if (((insn[1] & 0x31) == 0x20) || +- ((insn[1] & 0x31) == 0x21)) { +- /* +- * jmp near and far, absolute indirect +- * ip is correct. And this is boostable +- */ +- p->ainsn.boostable = true; +- goto no_change; +- } +- break; +- default: +- break; + } + +- regs->ip += orig_ip - copy_ip; ++ if (!p->ainsn.is_abs_ip) ++ regs->ip += orig_ip - copy_ip; + +-no_change: + restore_btf(); + } + NOKPROBE_SYMBOL(resume_execution); diff --git a/queue-5.10/x86-kprobes-fix-jng-jnle-emulation.patch b/queue-5.10/x86-kprobes-fix-jng-jnle-emulation.patch new file mode 100644 index 00000000000..5ee4c393b56 --- /dev/null +++ b/queue-5.10/x86-kprobes-fix-jng-jnle-emulation.patch @@ -0,0 +1,44 @@ +From stable-owner@vger.kernel.org Wed Jul 5 08:48:02 2023 +From: Li Huafei +Date: Wed, 5 Jul 2023 14:46:53 +0800 +Subject: x86/kprobes: Fix JNG/JNLE emulation +To: +Cc: , , , , , , , , , , , , , , , , , , , +Message-ID: <20230705064653.226811-10-lihuafei1@huawei.com> + +From: Nadav Amit + +[ Upstream commit 8924779df820c53875abaeb10c648e9cb75b46d4 ] + +When kprobes emulates JNG/JNLE instructions on x86 it uses the wrong +condition. For JNG (opcode: 0F 8E), according to Intel SDM, the jump is +performed if (ZF == 1 or SF != OF). However the kernel emulation +currently uses 'and' instead of 'or'. + +As a result, setting a kprobe on JNG/JNLE might cause the kernel to +behave incorrectly whenever the kprobe is hit. + +Fix by changing the 'and' to 'or'. + +Fixes: 6256e668b7af ("x86/kprobes: Use int3 instead of debug trap for single-step") +Signed-off-by: Nadav Amit +Signed-off-by: Ingo Molnar +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20220813225943.143767-1-namit@vmware.com +Signed-off-by: Li Huafei +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/kprobes/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kernel/kprobes/core.c ++++ b/arch/x86/kernel/kprobes/core.c +@@ -505,7 +505,7 @@ static void kprobe_emulate_jcc(struct kp + match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ + ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); + if (p->ainsn.jcc.type >= 0xe) +- match = match && (regs->flags & X86_EFLAGS_ZF); ++ match = match || (regs->flags & X86_EFLAGS_ZF); + } + __kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert)); + } diff --git a/queue-5.10/x86-kprobes-fix-to-identify-indirect-jmp-and-others-using-range-case.patch b/queue-5.10/x86-kprobes-fix-to-identify-indirect-jmp-and-others-using-range-case.patch new file mode 100644 index 00000000000..dcccb74d40f --- /dev/null +++ b/queue-5.10/x86-kprobes-fix-to-identify-indirect-jmp-and-others-using-range-case.patch @@ -0,0 +1,88 @@ +From stable-owner@vger.kernel.org Wed Jul 5 08:48:03 2023 +From: Li Huafei +Date: Wed, 5 Jul 2023 14:46:50 +0800 +Subject: x86/kprobes: Fix to identify indirect jmp and others using range case +To: +Cc: , , , , , , , , , , , , , , , , , , , +Message-ID: <20230705064653.226811-7-lihuafei1@huawei.com> + +From: Masami Hiramatsu + +[ Upstream commit 2f706e0e5e263c0d204e37ea496cbb0e98aac2d2 ] + +Fix can_boost() to identify indirect jmp and others using range case +correctly. + +Since the condition in switch statement is opcode & 0xf0, it can not +evaluate to 0xff case. This should be under the 0xf0 case. However, +there is no reason to use the conbinations of the bit-masked condition +and lower bit checking. + +Use range case to clean up the switch statement too. + +Fixes: 6256e668b7 ("x86/kprobes: Use int3 instead of debug trap for single-step") +Reported-by: Colin Ian King +Signed-off-by: Masami Hiramatsu +Signed-off-by: Ingo Molnar +Link: https://lore.kernel.org/r/161666692308.1120877.4675552834049546493.stgit@devnote2 +Signed-off-by: Li Huafei +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/kprobes/core.c | 44 ++++++++++++++++++----------------------- + 1 file changed, 20 insertions(+), 24 deletions(-) + +--- a/arch/x86/kernel/kprobes/core.c ++++ b/arch/x86/kernel/kprobes/core.c +@@ -165,32 +165,28 @@ int can_boost(struct insn *insn, void *a + + opcode = insn->opcode.bytes[0]; + +- switch (opcode & 0xf0) { +- case 0x60: +- /* can't boost "bound" */ +- return (opcode != 0x62); +- case 0x70: +- return 0; /* can't boost conditional jump */ +- case 0x90: +- return opcode != 0x9a; /* can't boost call far */ +- case 0xc0: +- /* can't boost software-interruptions */ +- return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf; +- case 0xd0: +- /* can boost AA* and XLAT */ +- return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); +- case 0xe0: +- /* can boost in/out and absolute jmps */ +- return ((opcode & 0x04) || opcode == 0xea); +- case 0xf0: +- /* clear and set flags are boostable */ +- return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); +- case 0xff: +- /* indirect jmp is boostable */ ++ switch (opcode) { ++ case 0x62: /* bound */ ++ case 0x70 ... 0x7f: /* Conditional jumps */ ++ case 0x9a: /* Call far */ ++ case 0xc0 ... 0xc1: /* Grp2 */ ++ case 0xcc ... 0xce: /* software exceptions */ ++ case 0xd0 ... 0xd3: /* Grp2 */ ++ case 0xd6: /* (UD) */ ++ case 0xd8 ... 0xdf: /* ESC */ ++ case 0xe0 ... 0xe3: /* LOOP*, JCXZ */ ++ case 0xe8 ... 0xe9: /* near Call, JMP */ ++ case 0xeb: /* Short JMP */ ++ case 0xf0 ... 0xf4: /* LOCK/REP, HLT */ ++ case 0xf6 ... 0xf7: /* Grp3 */ ++ case 0xfe: /* Grp4 */ ++ /* ... are not boostable */ ++ return 0; ++ case 0xff: /* Grp5 */ ++ /* Only indirect jmp is boostable */ + return X86_MODRM_REG(insn->modrm.bytes[0]) == 4; + default: +- /* call is not boostable */ +- return opcode != 0x9a; ++ return 1; + } + } + diff --git a/queue-5.10/x86-kprobes-identify-far-indirect-jmp-correctly.patch b/queue-5.10/x86-kprobes-identify-far-indirect-jmp-correctly.patch new file mode 100644 index 00000000000..6a7e484535d --- /dev/null +++ b/queue-5.10/x86-kprobes-identify-far-indirect-jmp-correctly.patch @@ -0,0 +1,41 @@ +From stable-owner@vger.kernel.org Wed Jul 5 08:47:51 2023 +From: Li Huafei +Date: Wed, 5 Jul 2023 14:46:48 +0800 +Subject: x86/kprobes: Identify far indirect JMP correctly +To: +Cc: , , , , , , , , , , , , , , , , , , , +Message-ID: <20230705064653.226811-5-lihuafei1@huawei.com> + +From: Masami Hiramatsu + +[ Upstream commit a194acd316f93f3435a64de3b37dca2b5a77b338 ] + +Since Grp5 far indirect JMP is FF "mod 101 r/m", it should be +(modrm & 0x38) == 0x28, and near indirect JMP is also 0x38 == 0x20. +So we can mask modrm with 0x30 and check 0x20. +This is actually what the original code does, it also doesn't care +the last bit. So the result code is same. + +Thus, I think this is just a cosmetic cleanup. + +Signed-off-by: Masami Hiramatsu +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/161469873475.49483.13257083019966335137.stgit@devnote2 +Signed-off-by: Li Huafei +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/kprobes/core.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/arch/x86/kernel/kprobes/core.c ++++ b/arch/x86/kernel/kprobes/core.c +@@ -478,8 +478,7 @@ static void set_resume_flags(struct kpro + p->ainsn.is_call = 1; + p->ainsn.is_abs_ip = 1; + break; +- } else if (((opcode & 0x31) == 0x20) || +- ((opcode & 0x31) == 0x21)) { ++ } else if ((opcode & 0x30) == 0x20) { + /* + * jmp near and far, absolute indirect + * ip is correct. diff --git a/queue-5.10/x86-kprobes-move-inline-to-the-beginning-of-the-kprobe_is_ss-declaration.patch b/queue-5.10/x86-kprobes-move-inline-to-the-beginning-of-the-kprobe_is_ss-declaration.patch new file mode 100644 index 00000000000..c49b43c2b96 --- /dev/null +++ b/queue-5.10/x86-kprobes-move-inline-to-the-beginning-of-the-kprobe_is_ss-declaration.patch @@ -0,0 +1,44 @@ +From stable-owner@vger.kernel.org Wed Jul 5 08:47:51 2023 +From: Li Huafei +Date: Wed, 5 Jul 2023 14:46:51 +0800 +Subject: x86/kprobes: Move 'inline' to the beginning of the kprobe_is_ss() declaration +To: +Cc: , , , , , , , , , , , , , , , , , , , +Message-ID: <20230705064653.226811-8-lihuafei1@huawei.com> + +From: Wei Yongjun + +[ Upstream commit 2304d14db6595bea5292bece06c4c625b12d8f89 ] + +Address this GCC warning: + + arch/x86/kernel/kprobes/core.c:940:1: + warning: 'inline' is not at beginning of declaration [-Wold-style-declaration] + 940 | static int nokprobe_inline kprobe_is_ss(struct kprobe_ctlblk *kcb) + | ^~~~~~ + +[ mingo: Tidied up the changelog. ] + +Fixes: 6256e668b7af: ("x86/kprobes: Use int3 instead of debug trap for single-step") +Reported-by: Hulk Robot +Signed-off-by: Wei Yongjun +Signed-off-by: Ingo Molnar +Acked-by: Masami Hiramatsu +Link: https://lore.kernel.org/r/20210324144502.1154883-1-weiyongjun1@huawei.com +Signed-off-by: Li Huafei +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/kprobes/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kernel/kprobes/core.c ++++ b/arch/x86/kernel/kprobes/core.c +@@ -944,7 +944,7 @@ static int reenter_kprobe(struct kprobe + } + NOKPROBE_SYMBOL(reenter_kprobe); + +-static int nokprobe_inline kprobe_is_ss(struct kprobe_ctlblk *kcb) ++static nokprobe_inline int kprobe_is_ss(struct kprobe_ctlblk *kcb) + { + return (kcb->kprobe_status == KPROBE_HIT_SS || + kcb->kprobe_status == KPROBE_REENTER); diff --git a/queue-5.10/x86-kprobes-retrieve-correct-opcode-for-group-instruction.patch b/queue-5.10/x86-kprobes-retrieve-correct-opcode-for-group-instruction.patch new file mode 100644 index 00000000000..fca01273847 --- /dev/null +++ b/queue-5.10/x86-kprobes-retrieve-correct-opcode-for-group-instruction.patch @@ -0,0 +1,44 @@ +From stable-owner@vger.kernel.org Wed Jul 5 08:48:03 2023 +From: Li Huafei +Date: Wed, 5 Jul 2023 14:46:47 +0800 +Subject: x86/kprobes: Retrieve correct opcode for group instruction +To: +Cc: , , , , , , , , , , , , , , , , , , , +Message-ID: <20230705064653.226811-4-lihuafei1@huawei.com> + +From: Masami Hiramatsu + +[ Upstream commit d60ad3d46f1d04a282c56159f1deb675c12733fd ] + +Since the opcodes start from 0xff are group5 instruction group which is +not 2 bytes opcode but the extended opcode determined by the MOD/RM byte. + +The commit abd82e533d88 ("x86/kprobes: Do not decode opcode in resume_execution()") +used insn->opcode.bytes[1], but that is not correct. We have to refer +the insn->modrm.bytes[1] instead. + +Fixes: abd82e533d88 ("x86/kprobes: Do not decode opcode in resume_execution()") +Signed-off-by: Masami Hiramatsu +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/161469872400.49483.18214724458034233166.stgit@devnote2 +Signed-off-by: Li Huafei +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/kprobes/core.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/arch/x86/kernel/kprobes/core.c ++++ b/arch/x86/kernel/kprobes/core.c +@@ -464,7 +464,11 @@ static void set_resume_flags(struct kpro + break; + #endif + case 0xff: +- opcode = insn->opcode.bytes[1]; ++ /* ++ * Since the 0xff is an extended group opcode, the instruction ++ * is determined by the MOD/RM byte. ++ */ ++ opcode = insn->modrm.bytes[0]; + if ((opcode & 0x30) == 0x10) { + /* + * call absolute, indirect diff --git a/queue-5.10/x86-kprobes-update-kcb-status-flag-after-singlestepping.patch b/queue-5.10/x86-kprobes-update-kcb-status-flag-after-singlestepping.patch new file mode 100644 index 00000000000..de7d90cddab --- /dev/null +++ b/queue-5.10/x86-kprobes-update-kcb-status-flag-after-singlestepping.patch @@ -0,0 +1,63 @@ +From stable-owner@vger.kernel.org Wed Jul 5 08:48:04 2023 +From: Li Huafei +Date: Wed, 5 Jul 2023 14:46:52 +0800 +Subject: x86/kprobes: Update kcb status flag after singlestepping +To: +Cc: , , , , , , , , , , , , , , , , , , , +Message-ID: <20230705064653.226811-9-lihuafei1@huawei.com> + +From: "Masami Hiramatsu (Google)" + +[ Upstream commit dec8784c9088b131a1523f582c2194cfc8107dc0 ] + +Fix kprobes to update kcb (kprobes control block) status flag to +KPROBE_HIT_SSDONE even if the kp->post_handler is not set. + +This bug may cause a kernel panic if another INT3 user runs right +after kprobes because kprobe_int3_handler() misunderstands the +INT3 is kprobe's single stepping INT3. + +Fixes: 6256e668b7af ("x86/kprobes: Use int3 instead of debug trap for single-step") +Reported-by: Daniel Müller +Signed-off-by: Masami Hiramatsu (Google) +Signed-off-by: Ingo Molnar +Tested-by: Daniel Müller +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/all/20220727210136.jjgc3lpqeq42yr3m@muellerd-fedora-PC2BDTX9 +Link: https://lore.kernel.org/r/165942025658.342061.12452378391879093249.stgit@devnote2 +Signed-off-by: Li Huafei +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/kprobes/core.c | 18 +++++++++++------- + 1 file changed, 11 insertions(+), 7 deletions(-) + +--- a/arch/x86/kernel/kprobes/core.c ++++ b/arch/x86/kernel/kprobes/core.c +@@ -826,16 +826,20 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe); + static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) + { +- if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { +- kcb->kprobe_status = KPROBE_HIT_SSDONE; +- cur->post_handler(cur, regs, 0); +- } +- + /* Restore back the original saved kprobes variables and continue. */ +- if (kcb->kprobe_status == KPROBE_REENTER) ++ if (kcb->kprobe_status == KPROBE_REENTER) { ++ /* This will restore both kcb and current_kprobe */ + restore_previous_kprobe(kcb); +- else ++ } else { ++ /* ++ * Always update the kcb status because ++ * reset_curent_kprobe() doesn't update kcb. ++ */ ++ kcb->kprobe_status = KPROBE_HIT_SSDONE; ++ if (cur->post_handler) ++ cur->post_handler(cur, regs, 0); + reset_current_kprobe(); ++ } + } + NOKPROBE_SYMBOL(kprobe_post_process); + diff --git a/queue-5.10/x86-kprobes-use-int3-instead-of-debug-trap-for-single-step.patch b/queue-5.10/x86-kprobes-use-int3-instead-of-debug-trap-for-single-step.patch new file mode 100644 index 00000000000..2064eae6558 --- /dev/null +++ b/queue-5.10/x86-kprobes-use-int3-instead-of-debug-trap-for-single-step.patch @@ -0,0 +1,749 @@ +From stable-owner@vger.kernel.org Wed Jul 5 08:48:03 2023 +From: Li Huafei +Date: Wed, 5 Jul 2023 14:46:49 +0800 +Subject: x86/kprobes: Use int3 instead of debug trap for single-step +To: +Cc: , , , , , , , , , , , , , , , , , , , +Message-ID: <20230705064653.226811-6-lihuafei1@huawei.com> + +From: Masami Hiramatsu + +[ Upstream commit 6256e668b7af9d81472e03c6a171630c08f8858a ] + +Use int3 instead of debug trap exception for single-stepping the +probed instructions. Some instructions which change the ip +registers or modify IF flags are emulated because those are not +able to be single-stepped by int3 or may allow the interrupt +while single-stepping. + +This actually changes the kprobes behavior. + +- kprobes can not probe following instructions; int3, iret, + far jmp/call which get absolute address as immediate, + indirect far jmp/call, indirect near jmp/call with addressing + by memory (register-based indirect jmp/call are OK), and + vmcall/vmlaunch/vmresume/vmxoff. + +- If the kprobe post_handler doesn't set before registering, + it may not be called in some case even if you set it afterwards. + (IOW, kprobe booster is enabled at registration, user can not + change it) + +But both are rare issue, unsupported instructions will not be +used in the kernel (or rarely used), and post_handlers are +rarely used (I don't see it except for the test code). + +Suggested-by: Andy Lutomirski +Signed-off-by: Masami Hiramatsu +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/161469874601.49483.11985325887166921076.stgit@devnote2 +[Huafei: Fix trivial conflict in arch/x86/kernel/kprobes/core.c due to +the previously backported commit 6dd3b8c9f5881] +Signed-off-by: Li Huafei +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/kprobes.h | 21 + + arch/x86/kernel/kprobes/core.c | 517 ++++++++++++++++++++++++++--------------- + arch/x86/kernel/traps.c | 3 + 3 files changed, 353 insertions(+), 188 deletions(-) + +--- a/arch/x86/include/asm/kprobes.h ++++ b/arch/x86/include/asm/kprobes.h +@@ -65,10 +65,22 @@ struct arch_specific_insn { + * a post_handler). + */ + unsigned boostable:1; +- unsigned if_modifier:1; +- unsigned is_call:1; +- unsigned is_pushf:1; +- unsigned is_abs_ip:1; ++ unsigned char size; /* The size of insn */ ++ union { ++ unsigned char opcode; ++ struct { ++ unsigned char type; ++ } jcc; ++ struct { ++ unsigned char type; ++ unsigned char asize; ++ } loop; ++ struct { ++ unsigned char reg; ++ } indirect; ++ }; ++ s32 rel32; /* relative offset must be s32, s16, or s8 */ ++ void (*emulate_op)(struct kprobe *p, struct pt_regs *regs); + /* Number of bytes of text poked */ + int tp_len; + }; +@@ -107,7 +119,6 @@ extern int kprobe_fault_handler(struct p + extern int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); + extern int kprobe_int3_handler(struct pt_regs *regs); +-extern int kprobe_debug_handler(struct pt_regs *regs); + + #else + +--- a/arch/x86/kernel/kprobes/core.c ++++ b/arch/x86/kernel/kprobes/core.c +@@ -185,6 +185,9 @@ int can_boost(struct insn *insn, void *a + case 0xf0: + /* clear and set flags are boostable */ + return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); ++ case 0xff: ++ /* indirect jmp is boostable */ ++ return X86_MODRM_REG(insn->modrm.bytes[0]) == 4; + default: + /* call is not boostable */ + return opcode != 0x9a; +@@ -373,13 +376,14 @@ int __copy_instruction(u8 *dest, u8 *src + return insn->length; + } + +-/* Prepare reljump right after instruction to boost */ +-static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p, +- struct insn *insn) ++/* Prepare reljump or int3 right after instruction */ ++static int prepare_singlestep(kprobe_opcode_t *buf, struct kprobe *p, ++ struct insn *insn) + { + int len = insn->length; + +- if (can_boost(insn, p->addr) && ++ if (!IS_ENABLED(CONFIG_PREEMPTION) && ++ !p->post_handler && can_boost(insn, p->addr) && + MAX_INSN_SIZE - len >= JMP32_INSN_SIZE) { + /* + * These instructions can be executed directly if it +@@ -390,7 +394,12 @@ static int prepare_boost(kprobe_opcode_t + len += JMP32_INSN_SIZE; + p->ainsn.boostable = 1; + } else { +- p->ainsn.boostable = 0; ++ /* Otherwise, put an int3 for trapping singlestep */ ++ if (MAX_INSN_SIZE - len < INT3_INSN_SIZE) ++ return -ENOSPC; ++ ++ buf[len] = INT3_INSN_OPCODE; ++ len += INT3_INSN_SIZE; + } + + return len; +@@ -427,42 +436,232 @@ void free_insn_page(void *page) + module_memfree(page); + } + +-static void set_resume_flags(struct kprobe *p, struct insn *insn) ++/* Kprobe x86 instruction emulation - only regs->ip or IF flag modifiers */ ++ ++static void kprobe_emulate_ifmodifiers(struct kprobe *p, struct pt_regs *regs) ++{ ++ switch (p->ainsn.opcode) { ++ case 0xfa: /* cli */ ++ regs->flags &= ~(X86_EFLAGS_IF); ++ break; ++ case 0xfb: /* sti */ ++ regs->flags |= X86_EFLAGS_IF; ++ break; ++ case 0x9c: /* pushf */ ++ int3_emulate_push(regs, regs->flags); ++ break; ++ case 0x9d: /* popf */ ++ regs->flags = int3_emulate_pop(regs); ++ break; ++ } ++ regs->ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size; ++} ++NOKPROBE_SYMBOL(kprobe_emulate_ifmodifiers); ++ ++static void kprobe_emulate_ret(struct kprobe *p, struct pt_regs *regs) ++{ ++ int3_emulate_ret(regs); ++} ++NOKPROBE_SYMBOL(kprobe_emulate_ret); ++ ++static void kprobe_emulate_call(struct kprobe *p, struct pt_regs *regs) ++{ ++ unsigned long func = regs->ip - INT3_INSN_SIZE + p->ainsn.size; ++ ++ func += p->ainsn.rel32; ++ int3_emulate_call(regs, func); ++} ++NOKPROBE_SYMBOL(kprobe_emulate_call); ++ ++static nokprobe_inline ++void __kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs, bool cond) ++{ ++ unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size; ++ ++ if (cond) ++ ip += p->ainsn.rel32; ++ int3_emulate_jmp(regs, ip); ++} ++ ++static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs) ++{ ++ __kprobe_emulate_jmp(p, regs, true); ++} ++NOKPROBE_SYMBOL(kprobe_emulate_jmp); ++ ++static const unsigned long jcc_mask[6] = { ++ [0] = X86_EFLAGS_OF, ++ [1] = X86_EFLAGS_CF, ++ [2] = X86_EFLAGS_ZF, ++ [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF, ++ [4] = X86_EFLAGS_SF, ++ [5] = X86_EFLAGS_PF, ++}; ++ ++static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs) ++{ ++ bool invert = p->ainsn.jcc.type & 1; ++ bool match; ++ ++ if (p->ainsn.jcc.type < 0xc) { ++ match = regs->flags & jcc_mask[p->ainsn.jcc.type >> 1]; ++ } else { ++ match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ ++ ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); ++ if (p->ainsn.jcc.type >= 0xe) ++ match = match && (regs->flags & X86_EFLAGS_ZF); ++ } ++ __kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert)); ++} ++NOKPROBE_SYMBOL(kprobe_emulate_jcc); ++ ++static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs) ++{ ++ bool match; ++ ++ if (p->ainsn.loop.type != 3) { /* LOOP* */ ++ if (p->ainsn.loop.asize == 32) ++ match = ((*(u32 *)®s->cx)--) != 0; ++#ifdef CONFIG_X86_64 ++ else if (p->ainsn.loop.asize == 64) ++ match = ((*(u64 *)®s->cx)--) != 0; ++#endif ++ else ++ match = ((*(u16 *)®s->cx)--) != 0; ++ } else { /* JCXZ */ ++ if (p->ainsn.loop.asize == 32) ++ match = *(u32 *)(®s->cx) == 0; ++#ifdef CONFIG_X86_64 ++ else if (p->ainsn.loop.asize == 64) ++ match = *(u64 *)(®s->cx) == 0; ++#endif ++ else ++ match = *(u16 *)(®s->cx) == 0; ++ } ++ ++ if (p->ainsn.loop.type == 0) /* LOOPNE */ ++ match = match && !(regs->flags & X86_EFLAGS_ZF); ++ else if (p->ainsn.loop.type == 1) /* LOOPE */ ++ match = match && (regs->flags & X86_EFLAGS_ZF); ++ ++ __kprobe_emulate_jmp(p, regs, match); ++} ++NOKPROBE_SYMBOL(kprobe_emulate_loop); ++ ++static const int addrmode_regoffs[] = { ++ offsetof(struct pt_regs, ax), ++ offsetof(struct pt_regs, cx), ++ offsetof(struct pt_regs, dx), ++ offsetof(struct pt_regs, bx), ++ offsetof(struct pt_regs, sp), ++ offsetof(struct pt_regs, bp), ++ offsetof(struct pt_regs, si), ++ offsetof(struct pt_regs, di), ++#ifdef CONFIG_X86_64 ++ offsetof(struct pt_regs, r8), ++ offsetof(struct pt_regs, r9), ++ offsetof(struct pt_regs, r10), ++ offsetof(struct pt_regs, r11), ++ offsetof(struct pt_regs, r12), ++ offsetof(struct pt_regs, r13), ++ offsetof(struct pt_regs, r14), ++ offsetof(struct pt_regs, r15), ++#endif ++}; ++ ++static void kprobe_emulate_call_indirect(struct kprobe *p, struct pt_regs *regs) ++{ ++ unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg]; ++ ++ int3_emulate_call(regs, regs_get_register(regs, offs)); ++} ++NOKPROBE_SYMBOL(kprobe_emulate_call_indirect); ++ ++static void kprobe_emulate_jmp_indirect(struct kprobe *p, struct pt_regs *regs) ++{ ++ unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg]; ++ ++ int3_emulate_jmp(regs, regs_get_register(regs, offs)); ++} ++NOKPROBE_SYMBOL(kprobe_emulate_jmp_indirect); ++ ++static int prepare_emulation(struct kprobe *p, struct insn *insn) + { + insn_byte_t opcode = insn->opcode.bytes[0]; + + switch (opcode) { + case 0xfa: /* cli */ + case 0xfb: /* sti */ ++ case 0x9c: /* pushfl */ + case 0x9d: /* popf/popfd */ +- /* Check whether the instruction modifies Interrupt Flag or not */ +- p->ainsn.if_modifier = 1; +- break; +- case 0x9c: /* pushfl */ +- p->ainsn.is_pushf = 1; ++ /* ++ * IF modifiers must be emulated since it will enable interrupt while ++ * int3 single stepping. ++ */ ++ p->ainsn.emulate_op = kprobe_emulate_ifmodifiers; ++ p->ainsn.opcode = opcode; + break; +- case 0xcf: /* iret */ +- p->ainsn.if_modifier = 1; +- fallthrough; + case 0xc2: /* ret/lret */ + case 0xc3: + case 0xca: + case 0xcb: +- case 0xea: /* jmp absolute -- ip is correct */ +- /* ip is already adjusted, no more changes required */ +- p->ainsn.is_abs_ip = 1; +- /* Without resume jump, this is boostable */ +- p->ainsn.boostable = 1; ++ p->ainsn.emulate_op = kprobe_emulate_ret; + break; +- case 0xe8: /* call relative - Fix return addr */ +- p->ainsn.is_call = 1; ++ case 0x9a: /* far call absolute -- segment is not supported */ ++ case 0xea: /* far jmp absolute -- segment is not supported */ ++ case 0xcc: /* int3 */ ++ case 0xcf: /* iret -- in-kernel IRET is not supported */ ++ return -EOPNOTSUPP; + break; +-#ifdef CONFIG_X86_32 +- case 0x9a: /* call absolute -- same as call absolute, indirect */ +- p->ainsn.is_call = 1; +- p->ainsn.is_abs_ip = 1; ++ case 0xe8: /* near call relative */ ++ p->ainsn.emulate_op = kprobe_emulate_call; ++ if (insn->immediate.nbytes == 2) ++ p->ainsn.rel32 = *(s16 *)&insn->immediate.value; ++ else ++ p->ainsn.rel32 = *(s32 *)&insn->immediate.value; ++ break; ++ case 0xeb: /* short jump relative */ ++ case 0xe9: /* near jump relative */ ++ p->ainsn.emulate_op = kprobe_emulate_jmp; ++ if (insn->immediate.nbytes == 1) ++ p->ainsn.rel32 = *(s8 *)&insn->immediate.value; ++ else if (insn->immediate.nbytes == 2) ++ p->ainsn.rel32 = *(s16 *)&insn->immediate.value; ++ else ++ p->ainsn.rel32 = *(s32 *)&insn->immediate.value; ++ break; ++ case 0x70 ... 0x7f: ++ /* 1 byte conditional jump */ ++ p->ainsn.emulate_op = kprobe_emulate_jcc; ++ p->ainsn.jcc.type = opcode & 0xf; ++ p->ainsn.rel32 = *(char *)insn->immediate.bytes; ++ break; ++ case 0x0f: ++ opcode = insn->opcode.bytes[1]; ++ if ((opcode & 0xf0) == 0x80) { ++ /* 2 bytes Conditional Jump */ ++ p->ainsn.emulate_op = kprobe_emulate_jcc; ++ p->ainsn.jcc.type = opcode & 0xf; ++ if (insn->immediate.nbytes == 2) ++ p->ainsn.rel32 = *(s16 *)&insn->immediate.value; ++ else ++ p->ainsn.rel32 = *(s32 *)&insn->immediate.value; ++ } else if (opcode == 0x01 && ++ X86_MODRM_REG(insn->modrm.bytes[0]) == 0 && ++ X86_MODRM_MOD(insn->modrm.bytes[0]) == 3) { ++ /* VM extensions - not supported */ ++ return -EOPNOTSUPP; ++ } ++ break; ++ case 0xe0: /* Loop NZ */ ++ case 0xe1: /* Loop */ ++ case 0xe2: /* Loop */ ++ case 0xe3: /* J*CXZ */ ++ p->ainsn.emulate_op = kprobe_emulate_loop; ++ p->ainsn.loop.type = opcode & 0x3; ++ p->ainsn.loop.asize = insn->addr_bytes * 8; ++ p->ainsn.rel32 = *(s8 *)&insn->immediate.value; + break; +-#endif + case 0xff: + /* + * Since the 0xff is an extended group opcode, the instruction +@@ -470,46 +669,57 @@ static void set_resume_flags(struct kpro + */ + opcode = insn->modrm.bytes[0]; + if ((opcode & 0x30) == 0x10) { +- /* +- * call absolute, indirect +- * Fix return addr; ip is correct. +- * But this is not boostable +- */ +- p->ainsn.is_call = 1; +- p->ainsn.is_abs_ip = 1; +- break; ++ if ((opcode & 0x8) == 0x8) ++ return -EOPNOTSUPP; /* far call */ ++ /* call absolute, indirect */ ++ p->ainsn.emulate_op = kprobe_emulate_call_indirect; + } else if ((opcode & 0x30) == 0x20) { +- /* +- * jmp near and far, absolute indirect +- * ip is correct. +- */ +- p->ainsn.is_abs_ip = 1; +- /* Without resume jump, this is boostable */ +- p->ainsn.boostable = 1; +- } ++ if ((opcode & 0x8) == 0x8) ++ return -EOPNOTSUPP; /* far jmp */ ++ /* jmp near absolute indirect */ ++ p->ainsn.emulate_op = kprobe_emulate_jmp_indirect; ++ } else ++ break; ++ ++ if (insn->addr_bytes != sizeof(unsigned long)) ++ return -EOPNOTSUPP; /* Don't support differnt size */ ++ if (X86_MODRM_MOD(opcode) != 3) ++ return -EOPNOTSUPP; /* TODO: support memory addressing */ ++ ++ p->ainsn.indirect.reg = X86_MODRM_RM(opcode); ++#ifdef CONFIG_X86_64 ++ if (X86_REX_B(insn->rex_prefix.value)) ++ p->ainsn.indirect.reg += 8; ++#endif ++ break; ++ default: + break; + } ++ p->ainsn.size = insn->length; ++ ++ return 0; + } + + static int arch_copy_kprobe(struct kprobe *p) + { + struct insn insn; + kprobe_opcode_t buf[MAX_INSN_SIZE]; +- int len; ++ int ret, len; + + /* Copy an instruction with recovering if other optprobe modifies it.*/ + len = __copy_instruction(buf, p->addr, p->ainsn.insn, &insn); + if (!len) + return -EINVAL; + +- /* +- * __copy_instruction can modify the displacement of the instruction, +- * but it doesn't affect boostable check. +- */ +- len = prepare_boost(buf, p, &insn); ++ /* Analyze the opcode and setup emulate functions */ ++ ret = prepare_emulation(p, &insn); ++ if (ret < 0) ++ return ret; + +- /* Analyze the opcode and set resume flags */ +- set_resume_flags(p, &insn); ++ /* Add int3 for single-step or booster jmp */ ++ len = prepare_singlestep(buf, p, &insn); ++ if (len < 0) ++ return len; + + /* Also, displacement change doesn't affect the first byte */ + p->opcode = buf[0]; +@@ -602,29 +812,7 @@ set_current_kprobe(struct kprobe *p, str + { + __this_cpu_write(current_kprobe, p); + kcb->kprobe_saved_flags = kcb->kprobe_old_flags +- = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); +- if (p->ainsn.if_modifier) +- kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF; +-} +- +-static nokprobe_inline void clear_btf(void) +-{ +- if (test_thread_flag(TIF_BLOCKSTEP)) { +- unsigned long debugctl = get_debugctlmsr(); +- +- debugctl &= ~DEBUGCTLMSR_BTF; +- update_debugctlmsr(debugctl); +- } +-} +- +-static nokprobe_inline void restore_btf(void) +-{ +- if (test_thread_flag(TIF_BLOCKSTEP)) { +- unsigned long debugctl = get_debugctlmsr(); +- +- debugctl |= DEBUGCTLMSR_BTF; +- update_debugctlmsr(debugctl); +- } ++ = (regs->flags & X86_EFLAGS_IF); + } + + void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) +@@ -639,6 +827,22 @@ void arch_prepare_kretprobe(struct kretp + } + NOKPROBE_SYMBOL(arch_prepare_kretprobe); + ++static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs, ++ struct kprobe_ctlblk *kcb) ++{ ++ if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { ++ kcb->kprobe_status = KPROBE_HIT_SSDONE; ++ cur->post_handler(cur, regs, 0); ++ } ++ ++ /* Restore back the original saved kprobes variables and continue. */ ++ if (kcb->kprobe_status == KPROBE_REENTER) ++ restore_previous_kprobe(kcb); ++ else ++ reset_current_kprobe(); ++} ++NOKPROBE_SYMBOL(kprobe_post_process); ++ + static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb, int reenter) + { +@@ -646,7 +850,7 @@ static void setup_singlestep(struct kpro + return; + + #if !defined(CONFIG_PREEMPTION) +- if (p->ainsn.boostable && !p->post_handler) { ++ if (p->ainsn.boostable) { + /* Boost up -- we can execute copied instructions directly */ + if (!reenter) + reset_current_kprobe(); +@@ -665,19 +869,51 @@ static void setup_singlestep(struct kpro + kcb->kprobe_status = KPROBE_REENTER; + } else + kcb->kprobe_status = KPROBE_HIT_SS; +- /* Prepare real single stepping */ +- clear_btf(); +- regs->flags |= X86_EFLAGS_TF; ++ ++ if (p->ainsn.emulate_op) { ++ p->ainsn.emulate_op(p, regs); ++ kprobe_post_process(p, regs, kcb); ++ return; ++ } ++ ++ /* Disable interrupt, and set ip register on trampoline */ + regs->flags &= ~X86_EFLAGS_IF; +- /* single step inline if the instruction is an int3 */ +- if (p->opcode == INT3_INSN_OPCODE) +- regs->ip = (unsigned long)p->addr; +- else +- regs->ip = (unsigned long)p->ainsn.insn; ++ regs->ip = (unsigned long)p->ainsn.insn; + } + NOKPROBE_SYMBOL(setup_singlestep); + + /* ++ * Called after single-stepping. p->addr is the address of the ++ * instruction whose first byte has been replaced by the "int3" ++ * instruction. To avoid the SMP problems that can occur when we ++ * temporarily put back the original opcode to single-step, we ++ * single-stepped a copy of the instruction. The address of this ++ * copy is p->ainsn.insn. We also doesn't use trap, but "int3" again ++ * right after the copied instruction. ++ * Different from the trap single-step, "int3" single-step can not ++ * handle the instruction which changes the ip register, e.g. jmp, ++ * call, conditional jmp, and the instructions which changes the IF ++ * flags because interrupt must be disabled around the single-stepping. ++ * Such instructions are software emulated, but others are single-stepped ++ * using "int3". ++ * ++ * When the 2nd "int3" handled, the regs->ip and regs->flags needs to ++ * be adjusted, so that we can resume execution on correct code. ++ */ ++static void resume_singlestep(struct kprobe *p, struct pt_regs *regs, ++ struct kprobe_ctlblk *kcb) ++{ ++ unsigned long copy_ip = (unsigned long)p->ainsn.insn; ++ unsigned long orig_ip = (unsigned long)p->addr; ++ ++ /* Restore saved interrupt flag and ip register */ ++ regs->flags |= kcb->kprobe_saved_flags; ++ /* Note that regs->ip is executed int3 so must be a step back */ ++ regs->ip += (orig_ip - copy_ip) - INT3_INSN_SIZE; ++} ++NOKPROBE_SYMBOL(resume_singlestep); ++ ++/* + * We have reentered the kprobe_handler(), since another probe was hit while + * within the handler. We save the original kprobes variables and just single + * step on the instruction of the new probe without calling any user handlers. +@@ -712,6 +948,12 @@ static int reenter_kprobe(struct kprobe + } + NOKPROBE_SYMBOL(reenter_kprobe); + ++static int nokprobe_inline kprobe_is_ss(struct kprobe_ctlblk *kcb) ++{ ++ return (kcb->kprobe_status == KPROBE_HIT_SS || ++ kcb->kprobe_status == KPROBE_REENTER); ++} ++ + /* + * Interrupts are disabled on entry as trap3 is an interrupt gate and they + * remain disabled throughout this function. +@@ -756,7 +998,18 @@ int kprobe_int3_handler(struct pt_regs * + reset_current_kprobe(); + return 1; + } +- } else if (*addr != INT3_INSN_OPCODE) { ++ } else if (kprobe_is_ss(kcb)) { ++ p = kprobe_running(); ++ if ((unsigned long)p->ainsn.insn < regs->ip && ++ (unsigned long)p->ainsn.insn + MAX_INSN_SIZE > regs->ip) { ++ /* Most provably this is the second int3 for singlestep */ ++ resume_singlestep(p, regs, kcb); ++ kprobe_post_process(p, regs, kcb); ++ return 1; ++ } ++ } ++ ++ if (*addr != INT3_INSN_OPCODE) { + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed +@@ -829,91 +1082,6 @@ __used __visible void *trampoline_handle + } + NOKPROBE_SYMBOL(trampoline_handler); + +-/* +- * Called after single-stepping. p->addr is the address of the +- * instruction whose first byte has been replaced by the "int 3" +- * instruction. To avoid the SMP problems that can occur when we +- * temporarily put back the original opcode to single-step, we +- * single-stepped a copy of the instruction. The address of this +- * copy is p->ainsn.insn. +- * +- * This function prepares to return from the post-single-step +- * interrupt. We have to fix up the stack as follows: +- * +- * 0) Except in the case of absolute or indirect jump or call instructions, +- * the new ip is relative to the copied instruction. We need to make +- * it relative to the original instruction. +- * +- * 1) If the single-stepped instruction was pushfl, then the TF and IF +- * flags are set in the just-pushed flags, and may need to be cleared. +- * +- * 2) If the single-stepped instruction was a call, the return address +- * that is atop the stack is the address following the copied instruction. +- * We need to make it the address following the original instruction. +- */ +-static void resume_execution(struct kprobe *p, struct pt_regs *regs, +- struct kprobe_ctlblk *kcb) +-{ +- unsigned long *tos = stack_addr(regs); +- unsigned long copy_ip = (unsigned long)p->ainsn.insn; +- unsigned long orig_ip = (unsigned long)p->addr; +- +- regs->flags &= ~X86_EFLAGS_TF; +- +- /* Fixup the contents of top of stack */ +- if (p->ainsn.is_pushf) { +- *tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF); +- *tos |= kcb->kprobe_old_flags; +- } else if (p->ainsn.is_call) { +- *tos = orig_ip + (*tos - copy_ip); +- } +- +- if (!p->ainsn.is_abs_ip) +- regs->ip += orig_ip - copy_ip; +- +- restore_btf(); +-} +-NOKPROBE_SYMBOL(resume_execution); +- +-/* +- * Interrupts are disabled on entry as trap1 is an interrupt gate and they +- * remain disabled throughout this function. +- */ +-int kprobe_debug_handler(struct pt_regs *regs) +-{ +- struct kprobe *cur = kprobe_running(); +- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); +- +- if (!cur) +- return 0; +- +- resume_execution(cur, regs, kcb); +- regs->flags |= kcb->kprobe_saved_flags; +- +- if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { +- kcb->kprobe_status = KPROBE_HIT_SSDONE; +- cur->post_handler(cur, regs, 0); +- } +- +- /* Restore back the original saved kprobes variables and continue. */ +- if (kcb->kprobe_status == KPROBE_REENTER) { +- restore_previous_kprobe(kcb); +- goto out; +- } +- reset_current_kprobe(); +-out: +- /* +- * if somebody else is singlestepping across a probe point, flags +- * will have TF set, in which case, continue the remaining processing +- * of do_debug, as if this is not a probe hit. +- */ +- if (regs->flags & X86_EFLAGS_TF) +- return 0; +- +- return 1; +-} +-NOKPROBE_SYMBOL(kprobe_debug_handler); +- + int kprobe_fault_handler(struct pt_regs *regs, int trapnr) + { + struct kprobe *cur = kprobe_running(); +@@ -931,20 +1099,9 @@ int kprobe_fault_handler(struct pt_regs + * normal page fault. + */ + regs->ip = (unsigned long)cur->addr; +- /* +- * Trap flag (TF) has been set here because this fault +- * happened where the single stepping will be done. +- * So clear it by resetting the current kprobe: +- */ +- regs->flags &= ~X86_EFLAGS_TF; +- /* +- * Since the single step (trap) has been cancelled, +- * we need to restore BTF here. +- */ +- restore_btf(); + + /* +- * If the TF flag was set before the kprobe hit, ++ * If the IF flag was set before the kprobe hit, + * don't touch it: + */ + regs->flags |= kcb->kprobe_old_flags; +--- a/arch/x86/kernel/traps.c ++++ b/arch/x86/kernel/traps.c +@@ -917,9 +917,6 @@ static __always_inline void exc_debug_ke + if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs)) + dr6 &= ~DR_STEP; + +- if (kprobe_debug_handler(regs)) +- goto out; +- + /* + * The kernel doesn't use INT1 + */ -- 2.47.3