]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 4 Aug 2023 09:56:33 +0000 (11:56 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 4 Aug 2023 09:56:33 +0000 (11:56 +0200)
added patches:
kprobes-x86-fix-fall-through-warnings-for-clang.patch
x86-kprobes-do-not-decode-opcode-in-resume_execution.patch
x86-kprobes-fix-jng-jnle-emulation.patch
x86-kprobes-fix-to-identify-indirect-jmp-and-others-using-range-case.patch
x86-kprobes-identify-far-indirect-jmp-correctly.patch
x86-kprobes-move-inline-to-the-beginning-of-the-kprobe_is_ss-declaration.patch
x86-kprobes-retrieve-correct-opcode-for-group-instruction.patch
x86-kprobes-update-kcb-status-flag-after-singlestepping.patch
x86-kprobes-use-int3-instead-of-debug-trap-for-single-step.patch

queue-5.10/kprobes-x86-fix-fall-through-warnings-for-clang.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/x86-kprobes-do-not-decode-opcode-in-resume_execution.patch [new file with mode: 0644]
queue-5.10/x86-kprobes-fix-jng-jnle-emulation.patch [new file with mode: 0644]
queue-5.10/x86-kprobes-fix-to-identify-indirect-jmp-and-others-using-range-case.patch [new file with mode: 0644]
queue-5.10/x86-kprobes-identify-far-indirect-jmp-correctly.patch [new file with mode: 0644]
queue-5.10/x86-kprobes-move-inline-to-the-beginning-of-the-kprobe_is_ss-declaration.patch [new file with mode: 0644]
queue-5.10/x86-kprobes-retrieve-correct-opcode-for-group-instruction.patch [new file with mode: 0644]
queue-5.10/x86-kprobes-update-kcb-status-flag-after-singlestepping.patch [new file with mode: 0644]
queue-5.10/x86-kprobes-use-int3-instead-of-debug-trap-for-single-step.patch [new file with mode: 0644]

diff --git a/queue-5.10/kprobes-x86-fix-fall-through-warnings-for-clang.patch b/queue-5.10/kprobes-x86-fix-fall-through-warnings-for-clang.patch
new file mode 100644 (file)
index 0000000..571f9ab
--- /dev/null
@@ -0,0 +1,35 @@
+From stable-owner@vger.kernel.org Wed Jul  5 08:47:50 2023
+From: Li Huafei <lihuafei1@huawei.com>
+Date: Wed, 5 Jul 2023 14:46:45 +0800
+Subject: kprobes/x86: Fix fall-through warnings for Clang
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <mhiramat@kernel.org>, <tglx@linutronix.de>, <mingo@redhat.com>, <bp@alien8.de>, <x86@kernel.org>, <hpa@zytor.com>, <sashal@kernel.org>, <peterz@infradead.org>, <linux-kernel@vger.kernel.org>, <xukuohai@huawei.com>, <natechancellor@gmail.com>, <ndesaulniers@google.com>, <rostedt@goodmis.org>, <weiyongjun1@huawei.com>, <gustavoars@kernel.org>, <namit@vmware.com>, <laijs@linux.alibaba.com>, <clang-built-linux@googlegroups.com>, <lihuafei1@huawei.com>
+Message-ID: <20230705064653.226811-2-lihuafei1@huawei.com>
+
+From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
+
+[ Upstream commit e689b300c99ca2dd80d3f662e19499bba27cda09 ]
+
+In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
+by explicitly adding a break statement instead of just letting the code
+fall through to the next case.
+
+Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://github.com/KSPP/linux/issues/115
+Signed-off-by: Li Huafei <lihuafei1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/kprobes/core.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kernel/kprobes/core.c
++++ b/arch/x86/kernel/kprobes/core.c
+@@ -880,6 +880,7 @@ static void resume_execution(struct kpro
+                       p->ainsn.boostable = true;
+                       goto no_change;
+               }
++              break;
+       default:
+               break;
+       }
index 288d22c8062f1c194c8f33863e36ecfef95cf0c6..a96fe7acb97a4b7282fcaa4c3183faa7077b3fcd 100644 (file)
@@ -110,3 +110,12 @@ cpufreq-intel_pstate-drop-acpi-_pss-states-table-patching.patch
 selftests-mptcp-depend-on-syn_cookies.patch
 io_uring-treat-eagain-for-req_f_nowait-as-final-for-io-wq.patch
 asoc-cs42l51-fix-driver-to-properly-autoload-with-automatic-module-loading.patch
+kprobes-x86-fix-fall-through-warnings-for-clang.patch
+x86-kprobes-do-not-decode-opcode-in-resume_execution.patch
+x86-kprobes-retrieve-correct-opcode-for-group-instruction.patch
+x86-kprobes-identify-far-indirect-jmp-correctly.patch
+x86-kprobes-use-int3-instead-of-debug-trap-for-single-step.patch
+x86-kprobes-fix-to-identify-indirect-jmp-and-others-using-range-case.patch
+x86-kprobes-move-inline-to-the-beginning-of-the-kprobe_is_ss-declaration.patch
+x86-kprobes-update-kcb-status-flag-after-singlestepping.patch
+x86-kprobes-fix-jng-jnle-emulation.patch
diff --git a/queue-5.10/x86-kprobes-do-not-decode-opcode-in-resume_execution.patch b/queue-5.10/x86-kprobes-do-not-decode-opcode-in-resume_execution.patch
new file mode 100644 (file)
index 0000000..4b532b7
--- /dev/null
@@ -0,0 +1,291 @@
+From stable-owner@vger.kernel.org Wed Jul  5 08:47:51 2023
+From: Li Huafei <lihuafei1@huawei.com>
+Date: Wed, 5 Jul 2023 14:46:46 +0800
+Subject: x86/kprobes: Do not decode opcode in resume_execution()
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <mhiramat@kernel.org>, <tglx@linutronix.de>, <mingo@redhat.com>, <bp@alien8.de>, <x86@kernel.org>, <hpa@zytor.com>, <sashal@kernel.org>, <peterz@infradead.org>, <linux-kernel@vger.kernel.org>, <xukuohai@huawei.com>, <natechancellor@gmail.com>, <ndesaulniers@google.com>, <rostedt@goodmis.org>, <weiyongjun1@huawei.com>, <gustavoars@kernel.org>, <namit@vmware.com>, <laijs@linux.alibaba.com>, <clang-built-linux@googlegroups.com>, <lihuafei1@huawei.com>
+Message-ID: <20230705064653.226811-3-lihuafei1@huawei.com>
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+[ Upstream commit abd82e533d88df1521e3da6799b83ce88852ab88 ]
+
+Currently, kprobes decodes the opcode right after single-stepping in
+resume_execution(). But the opcode was already decoded while preparing
+arch_specific_insn in arch_copy_kprobe().
+
+Decode the opcode in arch_copy_kprobe() instead of in resume_execution()
+and set some flags which classify the opcode for the resuming process.
+
+ [ bp: Massage commit message. ]
+
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Link: https://lkml.kernel.org/r/160830072561.349576.3014979564448023213.stgit@devnote2
+Signed-off-by: Li Huafei <lihuafei1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kprobes.h |   11 +-
+ arch/x86/kernel/kprobes/core.c |  168 ++++++++++++++++++-----------------------
+ 2 files changed, 81 insertions(+), 98 deletions(-)
+
+--- a/arch/x86/include/asm/kprobes.h
++++ b/arch/x86/include/asm/kprobes.h
+@@ -58,14 +58,17 @@ struct arch_specific_insn {
+       /* copy of the original instruction */
+       kprobe_opcode_t *insn;
+       /*
+-       * boostable = false: This instruction type is not boostable.
+-       * boostable = true: This instruction has been boosted: we have
++       * boostable = 0: This instruction type is not boostable.
++       * boostable = 1: This instruction has been boosted: we have
+        * added a relative jump after the instruction copy in insn,
+        * so no single-step and fixup are needed (unless there's
+        * a post_handler).
+        */
+-      bool boostable;
+-      bool if_modifier;
++      unsigned boostable:1;
++      unsigned if_modifier:1;
++      unsigned is_call:1;
++      unsigned is_pushf:1;
++      unsigned is_abs_ip:1;
+       /* Number of bytes of text poked */
+       int tp_len;
+ };
+--- a/arch/x86/kernel/kprobes/core.c
++++ b/arch/x86/kernel/kprobes/core.c
+@@ -134,26 +134,6 @@ void synthesize_relcall(void *dest, void
+ NOKPROBE_SYMBOL(synthesize_relcall);
+ /*
+- * Skip the prefixes of the instruction.
+- */
+-static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn)
+-{
+-      insn_attr_t attr;
+-
+-      attr = inat_get_opcode_attribute((insn_byte_t)*insn);
+-      while (inat_is_legacy_prefix(attr)) {
+-              insn++;
+-              attr = inat_get_opcode_attribute((insn_byte_t)*insn);
+-      }
+-#ifdef CONFIG_X86_64
+-      if (inat_is_rex_prefix(attr))
+-              insn++;
+-#endif
+-      return insn;
+-}
+-NOKPROBE_SYMBOL(skip_prefixes);
+-
+-/*
+  * Returns non-zero if INSN is boostable.
+  * RIP relative instructions are adjusted at copying time in 64 bits mode
+  */
+@@ -327,25 +307,6 @@ static int can_probe(unsigned long paddr
+ }
+ /*
+- * Returns non-zero if opcode modifies the interrupt flag.
+- */
+-static int is_IF_modifier(kprobe_opcode_t *insn)
+-{
+-      /* Skip prefixes */
+-      insn = skip_prefixes(insn);
+-
+-      switch (*insn) {
+-      case 0xfa:              /* cli */
+-      case 0xfb:              /* sti */
+-      case 0xcf:              /* iret/iretd */
+-      case 0x9d:              /* popf/popfd */
+-              return 1;
+-      }
+-
+-      return 0;
+-}
+-
+-/*
+  * Copy an instruction with recovering modified instruction by kprobes
+  * and adjust the displacement if the instruction uses the %rip-relative
+  * addressing mode. Note that since @real will be the final place of copied
+@@ -427,9 +388,9 @@ static int prepare_boost(kprobe_opcode_t
+               synthesize_reljump(buf + len, p->ainsn.insn + len,
+                                  p->addr + insn->length);
+               len += JMP32_INSN_SIZE;
+-              p->ainsn.boostable = true;
++              p->ainsn.boostable = 1;
+       } else {
+-              p->ainsn.boostable = false;
++              p->ainsn.boostable = 0;
+       }
+       return len;
+@@ -466,6 +427,67 @@ void free_insn_page(void *page)
+       module_memfree(page);
+ }
++static void set_resume_flags(struct kprobe *p, struct insn *insn)
++{
++      insn_byte_t opcode = insn->opcode.bytes[0];
++
++      switch (opcode) {
++      case 0xfa:              /* cli */
++      case 0xfb:              /* sti */
++      case 0x9d:              /* popf/popfd */
++              /* Check whether the instruction modifies Interrupt Flag or not */
++              p->ainsn.if_modifier = 1;
++              break;
++      case 0x9c:      /* pushfl */
++              p->ainsn.is_pushf = 1;
++              break;
++      case 0xcf:      /* iret */
++              p->ainsn.if_modifier = 1;
++              fallthrough;
++      case 0xc2:      /* ret/lret */
++      case 0xc3:
++      case 0xca:
++      case 0xcb:
++      case 0xea:      /* jmp absolute -- ip is correct */
++              /* ip is already adjusted, no more changes required */
++              p->ainsn.is_abs_ip = 1;
++              /* Without resume jump, this is boostable */
++              p->ainsn.boostable = 1;
++              break;
++      case 0xe8:      /* call relative - Fix return addr */
++              p->ainsn.is_call = 1;
++              break;
++#ifdef CONFIG_X86_32
++      case 0x9a:      /* call absolute -- same as call absolute, indirect */
++              p->ainsn.is_call = 1;
++              p->ainsn.is_abs_ip = 1;
++              break;
++#endif
++      case 0xff:
++              opcode = insn->opcode.bytes[1];
++              if ((opcode & 0x30) == 0x10) {
++                      /*
++                       * call absolute, indirect
++                       * Fix return addr; ip is correct.
++                       * But this is not boostable
++                       */
++                      p->ainsn.is_call = 1;
++                      p->ainsn.is_abs_ip = 1;
++                      break;
++              } else if (((opcode & 0x31) == 0x20) ||
++                         ((opcode & 0x31) == 0x21)) {
++                      /*
++                       * jmp near and far, absolute indirect
++                       * ip is correct.
++                       */
++                      p->ainsn.is_abs_ip = 1;
++                      /* Without resume jump, this is boostable */
++                      p->ainsn.boostable = 1;
++              }
++              break;
++      }
++}
++
+ static int arch_copy_kprobe(struct kprobe *p)
+ {
+       struct insn insn;
+@@ -483,8 +505,8 @@ static int arch_copy_kprobe(struct kprob
+        */
+       len = prepare_boost(buf, p, &insn);
+-      /* Check whether the instruction modifies Interrupt Flag or not */
+-      p->ainsn.if_modifier = is_IF_modifier(buf);
++      /* Analyze the opcode and set resume flags */
++      set_resume_flags(p, &insn);
+       /* Also, displacement change doesn't affect the first byte */
+       p->opcode = buf[0];
+@@ -507,6 +529,9 @@ int arch_prepare_kprobe(struct kprobe *p
+       if (!can_probe((unsigned long)p->addr))
+               return -EILSEQ;
++
++      memset(&p->ainsn, 0, sizeof(p->ainsn));
++
+       /* insn: must be on special executable page on x86. */
+       p->ainsn.insn = get_insn_slot();
+       if (!p->ainsn.insn)
+@@ -822,11 +847,6 @@ NOKPROBE_SYMBOL(trampoline_handler);
+  * 2) If the single-stepped instruction was a call, the return address
+  * that is atop the stack is the address following the copied instruction.
+  * We need to make it the address following the original instruction.
+- *
+- * If this is the first time we've single-stepped the instruction at
+- * this probepoint, and the instruction is boostable, boost it: add a
+- * jump instruction after the copied instruction, that jumps to the next
+- * instruction after the probepoint.
+  */
+ static void resume_execution(struct kprobe *p, struct pt_regs *regs,
+                            struct kprobe_ctlblk *kcb)
+@@ -834,60 +854,20 @@ static void resume_execution(struct kpro
+       unsigned long *tos = stack_addr(regs);
+       unsigned long copy_ip = (unsigned long)p->ainsn.insn;
+       unsigned long orig_ip = (unsigned long)p->addr;
+-      kprobe_opcode_t *insn = p->ainsn.insn;
+-
+-      /* Skip prefixes */
+-      insn = skip_prefixes(insn);
+       regs->flags &= ~X86_EFLAGS_TF;
+-      switch (*insn) {
+-      case 0x9c:      /* pushfl */
++
++      /* Fixup the contents of top of stack */
++      if (p->ainsn.is_pushf) {
+               *tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF);
+               *tos |= kcb->kprobe_old_flags;
+-              break;
+-      case 0xc2:      /* iret/ret/lret */
+-      case 0xc3:
+-      case 0xca:
+-      case 0xcb:
+-      case 0xcf:
+-      case 0xea:      /* jmp absolute -- ip is correct */
+-              /* ip is already adjusted, no more changes required */
+-              p->ainsn.boostable = true;
+-              goto no_change;
+-      case 0xe8:      /* call relative - Fix return addr */
++      } else if (p->ainsn.is_call) {
+               *tos = orig_ip + (*tos - copy_ip);
+-              break;
+-#ifdef CONFIG_X86_32
+-      case 0x9a:      /* call absolute -- same as call absolute, indirect */
+-              *tos = orig_ip + (*tos - copy_ip);
+-              goto no_change;
+-#endif
+-      case 0xff:
+-              if ((insn[1] & 0x30) == 0x10) {
+-                      /*
+-                       * call absolute, indirect
+-                       * Fix return addr; ip is correct.
+-                       * But this is not boostable
+-                       */
+-                      *tos = orig_ip + (*tos - copy_ip);
+-                      goto no_change;
+-              } else if (((insn[1] & 0x31) == 0x20) ||
+-                         ((insn[1] & 0x31) == 0x21)) {
+-                      /*
+-                       * jmp near and far, absolute indirect
+-                       * ip is correct. And this is boostable
+-                       */
+-                      p->ainsn.boostable = true;
+-                      goto no_change;
+-              }
+-              break;
+-      default:
+-              break;
+       }
+-      regs->ip += orig_ip - copy_ip;
++      if (!p->ainsn.is_abs_ip)
++              regs->ip += orig_ip - copy_ip;
+-no_change:
+       restore_btf();
+ }
+ NOKPROBE_SYMBOL(resume_execution);
diff --git a/queue-5.10/x86-kprobes-fix-jng-jnle-emulation.patch b/queue-5.10/x86-kprobes-fix-jng-jnle-emulation.patch
new file mode 100644 (file)
index 0000000..5ee4c39
--- /dev/null
@@ -0,0 +1,44 @@
+From stable-owner@vger.kernel.org Wed Jul  5 08:48:02 2023
+From: Li Huafei <lihuafei1@huawei.com>
+Date: Wed, 5 Jul 2023 14:46:53 +0800
+Subject: x86/kprobes: Fix JNG/JNLE emulation
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <mhiramat@kernel.org>, <tglx@linutronix.de>, <mingo@redhat.com>, <bp@alien8.de>, <x86@kernel.org>, <hpa@zytor.com>, <sashal@kernel.org>, <peterz@infradead.org>, <linux-kernel@vger.kernel.org>, <xukuohai@huawei.com>, <natechancellor@gmail.com>, <ndesaulniers@google.com>, <rostedt@goodmis.org>, <weiyongjun1@huawei.com>, <gustavoars@kernel.org>, <namit@vmware.com>, <laijs@linux.alibaba.com>, <clang-built-linux@googlegroups.com>, <lihuafei1@huawei.com>
+Message-ID: <20230705064653.226811-10-lihuafei1@huawei.com>
+
+From: Nadav Amit <namit@vmware.com>
+
+[ Upstream commit 8924779df820c53875abaeb10c648e9cb75b46d4 ]
+
+When kprobes emulates JNG/JNLE instructions on x86 it uses the wrong
+condition. For JNG (opcode: 0F 8E), according to Intel SDM, the jump is
+performed if (ZF == 1 or SF != OF). However the kernel emulation
+currently uses 'and' instead of 'or'.
+
+As a result, setting a kprobe on JNG/JNLE might cause the kernel to
+behave incorrectly whenever the kprobe is hit.
+
+Fix by changing the 'and' to 'or'.
+
+Fixes: 6256e668b7af ("x86/kprobes: Use int3 instead of debug trap for single-step")
+Signed-off-by: Nadav Amit <namit@vmware.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20220813225943.143767-1-namit@vmware.com
+Signed-off-by: Li Huafei <lihuafei1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/kprobes/core.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/kprobes/core.c
++++ b/arch/x86/kernel/kprobes/core.c
+@@ -505,7 +505,7 @@ static void kprobe_emulate_jcc(struct kp
+               match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
+                       ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
+               if (p->ainsn.jcc.type >= 0xe)
+-                      match = match && (regs->flags & X86_EFLAGS_ZF);
++                      match = match || (regs->flags & X86_EFLAGS_ZF);
+       }
+       __kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert));
+ }
diff --git a/queue-5.10/x86-kprobes-fix-to-identify-indirect-jmp-and-others-using-range-case.patch b/queue-5.10/x86-kprobes-fix-to-identify-indirect-jmp-and-others-using-range-case.patch
new file mode 100644 (file)
index 0000000..dcccb74
--- /dev/null
@@ -0,0 +1,88 @@
+From stable-owner@vger.kernel.org Wed Jul  5 08:48:03 2023
+From: Li Huafei <lihuafei1@huawei.com>
+Date: Wed, 5 Jul 2023 14:46:50 +0800
+Subject: x86/kprobes: Fix to identify indirect jmp and others using range case
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <mhiramat@kernel.org>, <tglx@linutronix.de>, <mingo@redhat.com>, <bp@alien8.de>, <x86@kernel.org>, <hpa@zytor.com>, <sashal@kernel.org>, <peterz@infradead.org>, <linux-kernel@vger.kernel.org>, <xukuohai@huawei.com>, <natechancellor@gmail.com>, <ndesaulniers@google.com>, <rostedt@goodmis.org>, <weiyongjun1@huawei.com>, <gustavoars@kernel.org>, <namit@vmware.com>, <laijs@linux.alibaba.com>, <clang-built-linux@googlegroups.com>, <lihuafei1@huawei.com>
+Message-ID: <20230705064653.226811-7-lihuafei1@huawei.com>
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+[ Upstream commit 2f706e0e5e263c0d204e37ea496cbb0e98aac2d2 ]
+
+Fix can_boost() to identify indirect jmp and others using range case
+correctly.
+
+Since the condition in switch statement is opcode & 0xf0, it can not
+evaluate to 0xff case. This should be under the 0xf0 case. However,
+there is no reason to use the conbinations of the bit-masked condition
+and lower bit checking.
+
+Use range case to clean up the switch statement too.
+
+Fixes: 6256e668b7 ("x86/kprobes: Use int3 instead of debug trap for single-step")
+Reported-by: Colin Ian King <colin.king@canonical.com>
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Link: https://lore.kernel.org/r/161666692308.1120877.4675552834049546493.stgit@devnote2
+Signed-off-by: Li Huafei <lihuafei1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/kprobes/core.c |   44 ++++++++++++++++++-----------------------
+ 1 file changed, 20 insertions(+), 24 deletions(-)
+
+--- a/arch/x86/kernel/kprobes/core.c
++++ b/arch/x86/kernel/kprobes/core.c
+@@ -165,32 +165,28 @@ int can_boost(struct insn *insn, void *a
+       opcode = insn->opcode.bytes[0];
+-      switch (opcode & 0xf0) {
+-      case 0x60:
+-              /* can't boost "bound" */
+-              return (opcode != 0x62);
+-      case 0x70:
+-              return 0; /* can't boost conditional jump */
+-      case 0x90:
+-              return opcode != 0x9a;  /* can't boost call far */
+-      case 0xc0:
+-              /* can't boost software-interruptions */
+-              return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf;
+-      case 0xd0:
+-              /* can boost AA* and XLAT */
+-              return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7);
+-      case 0xe0:
+-              /* can boost in/out and absolute jmps */
+-              return ((opcode & 0x04) || opcode == 0xea);
+-      case 0xf0:
+-              /* clear and set flags are boostable */
+-              return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
+-      case 0xff:
+-              /* indirect jmp is boostable */
++      switch (opcode) {
++      case 0x62:              /* bound */
++      case 0x70 ... 0x7f:     /* Conditional jumps */
++      case 0x9a:              /* Call far */
++      case 0xc0 ... 0xc1:     /* Grp2 */
++      case 0xcc ... 0xce:     /* software exceptions */
++      case 0xd0 ... 0xd3:     /* Grp2 */
++      case 0xd6:              /* (UD) */
++      case 0xd8 ... 0xdf:     /* ESC */
++      case 0xe0 ... 0xe3:     /* LOOP*, JCXZ */
++      case 0xe8 ... 0xe9:     /* near Call, JMP */
++      case 0xeb:              /* Short JMP */
++      case 0xf0 ... 0xf4:     /* LOCK/REP, HLT */
++      case 0xf6 ... 0xf7:     /* Grp3 */
++      case 0xfe:              /* Grp4 */
++              /* ... are not boostable */
++              return 0;
++      case 0xff:              /* Grp5 */
++              /* Only indirect jmp is boostable */
+               return X86_MODRM_REG(insn->modrm.bytes[0]) == 4;
+       default:
+-              /* call is not boostable */
+-              return opcode != 0x9a;
++              return 1;
+       }
+ }
diff --git a/queue-5.10/x86-kprobes-identify-far-indirect-jmp-correctly.patch b/queue-5.10/x86-kprobes-identify-far-indirect-jmp-correctly.patch
new file mode 100644 (file)
index 0000000..6a7e484
--- /dev/null
@@ -0,0 +1,41 @@
+From stable-owner@vger.kernel.org Wed Jul  5 08:47:51 2023
+From: Li Huafei <lihuafei1@huawei.com>
+Date: Wed, 5 Jul 2023 14:46:48 +0800
+Subject: x86/kprobes: Identify far indirect JMP correctly
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <mhiramat@kernel.org>, <tglx@linutronix.de>, <mingo@redhat.com>, <bp@alien8.de>, <x86@kernel.org>, <hpa@zytor.com>, <sashal@kernel.org>, <peterz@infradead.org>, <linux-kernel@vger.kernel.org>, <xukuohai@huawei.com>, <natechancellor@gmail.com>, <ndesaulniers@google.com>, <rostedt@goodmis.org>, <weiyongjun1@huawei.com>, <gustavoars@kernel.org>, <namit@vmware.com>, <laijs@linux.alibaba.com>, <clang-built-linux@googlegroups.com>, <lihuafei1@huawei.com>
+Message-ID: <20230705064653.226811-5-lihuafei1@huawei.com>
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+[ Upstream commit a194acd316f93f3435a64de3b37dca2b5a77b338 ]
+
+Since Grp5 far indirect JMP is FF "mod 101 r/m", it should be
+(modrm & 0x38) == 0x28, and near indirect JMP is also 0x38 == 0x20.
+So we can mask modrm with 0x30 and check 0x20.
+This is actually what the original code does, it also doesn't care
+the last bit. So the result code is same.
+
+Thus, I think this is just a cosmetic cleanup.
+
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/161469873475.49483.13257083019966335137.stgit@devnote2
+Signed-off-by: Li Huafei <lihuafei1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/kprobes/core.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/kprobes/core.c
++++ b/arch/x86/kernel/kprobes/core.c
+@@ -478,8 +478,7 @@ static void set_resume_flags(struct kpro
+                       p->ainsn.is_call = 1;
+                       p->ainsn.is_abs_ip = 1;
+                       break;
+-              } else if (((opcode & 0x31) == 0x20) ||
+-                         ((opcode & 0x31) == 0x21)) {
++              } else if ((opcode & 0x30) == 0x20) {
+                       /*
+                        * jmp near and far, absolute indirect
+                        * ip is correct.
diff --git a/queue-5.10/x86-kprobes-move-inline-to-the-beginning-of-the-kprobe_is_ss-declaration.patch b/queue-5.10/x86-kprobes-move-inline-to-the-beginning-of-the-kprobe_is_ss-declaration.patch
new file mode 100644 (file)
index 0000000..c49b43c
--- /dev/null
@@ -0,0 +1,44 @@
+From stable-owner@vger.kernel.org Wed Jul  5 08:47:51 2023
+From: Li Huafei <lihuafei1@huawei.com>
+Date: Wed, 5 Jul 2023 14:46:51 +0800
+Subject: x86/kprobes: Move 'inline' to the beginning of the kprobe_is_ss() declaration
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <mhiramat@kernel.org>, <tglx@linutronix.de>, <mingo@redhat.com>, <bp@alien8.de>, <x86@kernel.org>, <hpa@zytor.com>, <sashal@kernel.org>, <peterz@infradead.org>, <linux-kernel@vger.kernel.org>, <xukuohai@huawei.com>, <natechancellor@gmail.com>, <ndesaulniers@google.com>, <rostedt@goodmis.org>, <weiyongjun1@huawei.com>, <gustavoars@kernel.org>, <namit@vmware.com>, <laijs@linux.alibaba.com>, <clang-built-linux@googlegroups.com>, <lihuafei1@huawei.com>
+Message-ID: <20230705064653.226811-8-lihuafei1@huawei.com>
+
+From: Wei Yongjun <weiyongjun1@huawei.com>
+
+[ Upstream commit 2304d14db6595bea5292bece06c4c625b12d8f89 ]
+
+Address this GCC warning:
+
+  arch/x86/kernel/kprobes/core.c:940:1:
+   warning: 'inline' is not at beginning of declaration [-Wold-style-declaration]
+    940 | static int nokprobe_inline kprobe_is_ss(struct kprobe_ctlblk *kcb)
+        | ^~~~~~
+
+[ mingo: Tidied up the changelog. ]
+
+Fixes: 6256e668b7af: ("x86/kprobes: Use int3 instead of debug trap for single-step")
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
+Link: https://lore.kernel.org/r/20210324144502.1154883-1-weiyongjun1@huawei.com
+Signed-off-by: Li Huafei <lihuafei1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/kprobes/core.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/kprobes/core.c
++++ b/arch/x86/kernel/kprobes/core.c
+@@ -944,7 +944,7 @@ static int reenter_kprobe(struct kprobe
+ }
+ NOKPROBE_SYMBOL(reenter_kprobe);
+-static int nokprobe_inline kprobe_is_ss(struct kprobe_ctlblk *kcb)
++static nokprobe_inline int kprobe_is_ss(struct kprobe_ctlblk *kcb)
+ {
+       return (kcb->kprobe_status == KPROBE_HIT_SS ||
+               kcb->kprobe_status == KPROBE_REENTER);
diff --git a/queue-5.10/x86-kprobes-retrieve-correct-opcode-for-group-instruction.patch b/queue-5.10/x86-kprobes-retrieve-correct-opcode-for-group-instruction.patch
new file mode 100644 (file)
index 0000000..fca0127
--- /dev/null
@@ -0,0 +1,44 @@
+From stable-owner@vger.kernel.org Wed Jul  5 08:48:03 2023
+From: Li Huafei <lihuafei1@huawei.com>
+Date: Wed, 5 Jul 2023 14:46:47 +0800
+Subject: x86/kprobes: Retrieve correct opcode for group instruction
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <mhiramat@kernel.org>, <tglx@linutronix.de>, <mingo@redhat.com>, <bp@alien8.de>, <x86@kernel.org>, <hpa@zytor.com>, <sashal@kernel.org>, <peterz@infradead.org>, <linux-kernel@vger.kernel.org>, <xukuohai@huawei.com>, <natechancellor@gmail.com>, <ndesaulniers@google.com>, <rostedt@goodmis.org>, <weiyongjun1@huawei.com>, <gustavoars@kernel.org>, <namit@vmware.com>, <laijs@linux.alibaba.com>, <clang-built-linux@googlegroups.com>, <lihuafei1@huawei.com>
+Message-ID: <20230705064653.226811-4-lihuafei1@huawei.com>
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+[ Upstream commit d60ad3d46f1d04a282c56159f1deb675c12733fd ]
+
+Since the opcodes start from 0xff are group5 instruction group which is
+not 2 bytes opcode but the extended opcode determined by the MOD/RM byte.
+
+The commit abd82e533d88 ("x86/kprobes: Do not decode opcode in resume_execution()")
+used insn->opcode.bytes[1], but that is not correct. We have to refer
+the insn->modrm.bytes[1] instead.
+
+Fixes: abd82e533d88 ("x86/kprobes: Do not decode opcode in resume_execution()")
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/161469872400.49483.18214724458034233166.stgit@devnote2
+Signed-off-by: Li Huafei <lihuafei1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/kprobes/core.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/kprobes/core.c
++++ b/arch/x86/kernel/kprobes/core.c
+@@ -464,7 +464,11 @@ static void set_resume_flags(struct kpro
+               break;
+ #endif
+       case 0xff:
+-              opcode = insn->opcode.bytes[1];
++              /*
++               * Since the 0xff is an extended group opcode, the instruction
++               * is determined by the MOD/RM byte.
++               */
++              opcode = insn->modrm.bytes[0];
+               if ((opcode & 0x30) == 0x10) {
+                       /*
+                        * call absolute, indirect
diff --git a/queue-5.10/x86-kprobes-update-kcb-status-flag-after-singlestepping.patch b/queue-5.10/x86-kprobes-update-kcb-status-flag-after-singlestepping.patch
new file mode 100644 (file)
index 0000000..de7d90c
--- /dev/null
@@ -0,0 +1,63 @@
+From stable-owner@vger.kernel.org Wed Jul  5 08:48:04 2023
+From: Li Huafei <lihuafei1@huawei.com>
+Date: Wed, 5 Jul 2023 14:46:52 +0800
+Subject: x86/kprobes: Update kcb status flag after singlestepping
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <mhiramat@kernel.org>, <tglx@linutronix.de>, <mingo@redhat.com>, <bp@alien8.de>, <x86@kernel.org>, <hpa@zytor.com>, <sashal@kernel.org>, <peterz@infradead.org>, <linux-kernel@vger.kernel.org>, <xukuohai@huawei.com>, <natechancellor@gmail.com>, <ndesaulniers@google.com>, <rostedt@goodmis.org>, <weiyongjun1@huawei.com>, <gustavoars@kernel.org>, <namit@vmware.com>, <laijs@linux.alibaba.com>, <clang-built-linux@googlegroups.com>, <lihuafei1@huawei.com>
+Message-ID: <20230705064653.226811-9-lihuafei1@huawei.com>
+
+From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
+
+[ Upstream commit dec8784c9088b131a1523f582c2194cfc8107dc0 ]
+
+Fix kprobes to update kcb (kprobes control block) status flag to
+KPROBE_HIT_SSDONE even if the kp->post_handler is not set.
+
+This bug may cause a kernel panic if another INT3 user runs right
+after kprobes because kprobe_int3_handler() misunderstands the
+INT3 is kprobe's single stepping INT3.
+
+Fixes: 6256e668b7af ("x86/kprobes: Use int3 instead of debug trap for single-step")
+Reported-by: Daniel Müller <deso@posteo.net>
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Tested-by: Daniel Müller <deso@posteo.net>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/all/20220727210136.jjgc3lpqeq42yr3m@muellerd-fedora-PC2BDTX9
+Link: https://lore.kernel.org/r/165942025658.342061.12452378391879093249.stgit@devnote2
+Signed-off-by: Li Huafei <lihuafei1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/kprobes/core.c |   18 +++++++++++-------
+ 1 file changed, 11 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kernel/kprobes/core.c
++++ b/arch/x86/kernel/kprobes/core.c
+@@ -826,16 +826,20 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe);
+ static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs,
+                              struct kprobe_ctlblk *kcb)
+ {
+-      if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+-              kcb->kprobe_status = KPROBE_HIT_SSDONE;
+-              cur->post_handler(cur, regs, 0);
+-      }
+-
+       /* Restore back the original saved kprobes variables and continue. */
+-      if (kcb->kprobe_status == KPROBE_REENTER)
++      if (kcb->kprobe_status == KPROBE_REENTER) {
++              /* This will restore both kcb and current_kprobe */
+               restore_previous_kprobe(kcb);
+-      else
++      } else {
++              /*
++               * Always update the kcb status because
++               * reset_curent_kprobe() doesn't update kcb.
++               */
++              kcb->kprobe_status = KPROBE_HIT_SSDONE;
++              if (cur->post_handler)
++                      cur->post_handler(cur, regs, 0);
+               reset_current_kprobe();
++      }
+ }
+ NOKPROBE_SYMBOL(kprobe_post_process);
diff --git a/queue-5.10/x86-kprobes-use-int3-instead-of-debug-trap-for-single-step.patch b/queue-5.10/x86-kprobes-use-int3-instead-of-debug-trap-for-single-step.patch
new file mode 100644 (file)
index 0000000..2064eae
--- /dev/null
@@ -0,0 +1,749 @@
+From stable-owner@vger.kernel.org Wed Jul  5 08:48:03 2023
+From: Li Huafei <lihuafei1@huawei.com>
+Date: Wed, 5 Jul 2023 14:46:49 +0800
+Subject: x86/kprobes: Use int3 instead of debug trap for single-step
+To: <stable@vger.kernel.org>
+Cc: <gregkh@linuxfoundation.org>, <mhiramat@kernel.org>, <tglx@linutronix.de>, <mingo@redhat.com>, <bp@alien8.de>, <x86@kernel.org>, <hpa@zytor.com>, <sashal@kernel.org>, <peterz@infradead.org>, <linux-kernel@vger.kernel.org>, <xukuohai@huawei.com>, <natechancellor@gmail.com>, <ndesaulniers@google.com>, <rostedt@goodmis.org>, <weiyongjun1@huawei.com>, <gustavoars@kernel.org>, <namit@vmware.com>, <laijs@linux.alibaba.com>, <clang-built-linux@googlegroups.com>, <lihuafei1@huawei.com>
+Message-ID: <20230705064653.226811-6-lihuafei1@huawei.com>
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+[ Upstream commit 6256e668b7af9d81472e03c6a171630c08f8858a ]
+
+Use int3 instead of debug trap exception for single-stepping the
+probed instructions. Some instructions which change the ip
+registers or modify IF flags are emulated because those are not
+able to be single-stepped by int3 or may allow the interrupt
+while single-stepping.
+
+This actually changes the kprobes behavior.
+
+- kprobes can not probe following instructions; int3, iret,
+  far jmp/call which get absolute address as immediate,
+  indirect far jmp/call, indirect near jmp/call with addressing
+  by memory (register-based indirect jmp/call are OK), and
+  vmcall/vmlaunch/vmresume/vmxoff.
+
+- If the kprobe post_handler doesn't set before registering,
+  it may not be called in some case even if you set it afterwards.
+  (IOW, kprobe booster is enabled at registration, user can not
+   change it)
+
+But both are rare issue, unsupported instructions will not be
+used in the kernel (or rarely used), and post_handlers are
+rarely used (I don't see it except for the test code).
+
+Suggested-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/161469874601.49483.11985325887166921076.stgit@devnote2
+[Huafei: Fix trivial conflict in arch/x86/kernel/kprobes/core.c due to
+the previously backported commit 6dd3b8c9f5881]
+Signed-off-by: Li Huafei <lihuafei1@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kprobes.h |   21 +
+ arch/x86/kernel/kprobes/core.c |  517 ++++++++++++++++++++++++++---------------
+ arch/x86/kernel/traps.c        |    3 
+ 3 files changed, 353 insertions(+), 188 deletions(-)
+
+--- a/arch/x86/include/asm/kprobes.h
++++ b/arch/x86/include/asm/kprobes.h
+@@ -65,10 +65,22 @@ struct arch_specific_insn {
+        * a post_handler).
+        */
+       unsigned boostable:1;
+-      unsigned if_modifier:1;
+-      unsigned is_call:1;
+-      unsigned is_pushf:1;
+-      unsigned is_abs_ip:1;
++      unsigned char size;     /* The size of insn */
++      union {
++              unsigned char opcode;
++              struct {
++                      unsigned char type;
++              } jcc;
++              struct {
++                      unsigned char type;
++                      unsigned char asize;
++              } loop;
++              struct {
++                      unsigned char reg;
++              } indirect;
++      };
++      s32 rel32;      /* relative offset must be s32, s16, or s8 */
++      void (*emulate_op)(struct kprobe *p, struct pt_regs *regs);
+       /* Number of bytes of text poked */
+       int tp_len;
+ };
+@@ -107,7 +119,6 @@ extern int kprobe_fault_handler(struct p
+ extern int kprobe_exceptions_notify(struct notifier_block *self,
+                                   unsigned long val, void *data);
+ extern int kprobe_int3_handler(struct pt_regs *regs);
+-extern int kprobe_debug_handler(struct pt_regs *regs);
+ #else
+--- a/arch/x86/kernel/kprobes/core.c
++++ b/arch/x86/kernel/kprobes/core.c
+@@ -185,6 +185,9 @@ int can_boost(struct insn *insn, void *a
+       case 0xf0:
+               /* clear and set flags are boostable */
+               return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe));
++      case 0xff:
++              /* indirect jmp is boostable */
++              return X86_MODRM_REG(insn->modrm.bytes[0]) == 4;
+       default:
+               /* call is not boostable */
+               return opcode != 0x9a;
+@@ -373,13 +376,14 @@ int __copy_instruction(u8 *dest, u8 *src
+       return insn->length;
+ }
+-/* Prepare reljump right after instruction to boost */
+-static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p,
+-                        struct insn *insn)
++/* Prepare reljump or int3 right after instruction */
++static int prepare_singlestep(kprobe_opcode_t *buf, struct kprobe *p,
++                            struct insn *insn)
+ {
+       int len = insn->length;
+-      if (can_boost(insn, p->addr) &&
++      if (!IS_ENABLED(CONFIG_PREEMPTION) &&
++          !p->post_handler && can_boost(insn, p->addr) &&
+           MAX_INSN_SIZE - len >= JMP32_INSN_SIZE) {
+               /*
+                * These instructions can be executed directly if it
+@@ -390,7 +394,12 @@ static int prepare_boost(kprobe_opcode_t
+               len += JMP32_INSN_SIZE;
+               p->ainsn.boostable = 1;
+       } else {
+-              p->ainsn.boostable = 0;
++              /* Otherwise, put an int3 for trapping singlestep */
++              if (MAX_INSN_SIZE - len < INT3_INSN_SIZE)
++                      return -ENOSPC;
++
++              buf[len] = INT3_INSN_OPCODE;
++              len += INT3_INSN_SIZE;
+       }
+       return len;
+@@ -427,42 +436,232 @@ void free_insn_page(void *page)
+       module_memfree(page);
+ }
+-static void set_resume_flags(struct kprobe *p, struct insn *insn)
++/* Kprobe x86 instruction emulation - only regs->ip or IF flag modifiers */
++
++static void kprobe_emulate_ifmodifiers(struct kprobe *p, struct pt_regs *regs)
++{
++      switch (p->ainsn.opcode) {
++      case 0xfa:      /* cli */
++              regs->flags &= ~(X86_EFLAGS_IF);
++              break;
++      case 0xfb:      /* sti */
++              regs->flags |= X86_EFLAGS_IF;
++              break;
++      case 0x9c:      /* pushf */
++              int3_emulate_push(regs, regs->flags);
++              break;
++      case 0x9d:      /* popf */
++              regs->flags = int3_emulate_pop(regs);
++              break;
++      }
++      regs->ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
++}
++NOKPROBE_SYMBOL(kprobe_emulate_ifmodifiers);
++
++static void kprobe_emulate_ret(struct kprobe *p, struct pt_regs *regs)
++{
++      int3_emulate_ret(regs);
++}
++NOKPROBE_SYMBOL(kprobe_emulate_ret);
++
++static void kprobe_emulate_call(struct kprobe *p, struct pt_regs *regs)
++{
++      unsigned long func = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
++
++      func += p->ainsn.rel32;
++      int3_emulate_call(regs, func);
++}
++NOKPROBE_SYMBOL(kprobe_emulate_call);
++
++static nokprobe_inline
++void __kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs, bool cond)
++{
++      unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
++
++      if (cond)
++              ip += p->ainsn.rel32;
++      int3_emulate_jmp(regs, ip);
++}
++
++static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs)
++{
++      __kprobe_emulate_jmp(p, regs, true);
++}
++NOKPROBE_SYMBOL(kprobe_emulate_jmp);
++
++static const unsigned long jcc_mask[6] = {
++      [0] = X86_EFLAGS_OF,
++      [1] = X86_EFLAGS_CF,
++      [2] = X86_EFLAGS_ZF,
++      [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF,
++      [4] = X86_EFLAGS_SF,
++      [5] = X86_EFLAGS_PF,
++};
++
++static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs)
++{
++      bool invert = p->ainsn.jcc.type & 1;
++      bool match;
++
++      if (p->ainsn.jcc.type < 0xc) {
++              match = regs->flags & jcc_mask[p->ainsn.jcc.type >> 1];
++      } else {
++              match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
++                      ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
++              if (p->ainsn.jcc.type >= 0xe)
++                      match = match && (regs->flags & X86_EFLAGS_ZF);
++      }
++      __kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert));
++}
++NOKPROBE_SYMBOL(kprobe_emulate_jcc);
++
++static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs)
++{
++      bool match;
++
++      if (p->ainsn.loop.type != 3) {  /* LOOP* */
++              if (p->ainsn.loop.asize == 32)
++                      match = ((*(u32 *)&regs->cx)--) != 0;
++#ifdef CONFIG_X86_64
++              else if (p->ainsn.loop.asize == 64)
++                      match = ((*(u64 *)&regs->cx)--) != 0;
++#endif
++              else
++                      match = ((*(u16 *)&regs->cx)--) != 0;
++      } else {                        /* JCXZ */
++              if (p->ainsn.loop.asize == 32)
++                      match = *(u32 *)(&regs->cx) == 0;
++#ifdef CONFIG_X86_64
++              else if (p->ainsn.loop.asize == 64)
++                      match = *(u64 *)(&regs->cx) == 0;
++#endif
++              else
++                      match = *(u16 *)(&regs->cx) == 0;
++      }
++
++      if (p->ainsn.loop.type == 0)    /* LOOPNE */
++              match = match && !(regs->flags & X86_EFLAGS_ZF);
++      else if (p->ainsn.loop.type == 1)       /* LOOPE */
++              match = match && (regs->flags & X86_EFLAGS_ZF);
++
++      __kprobe_emulate_jmp(p, regs, match);
++}
++NOKPROBE_SYMBOL(kprobe_emulate_loop);
++
++static const int addrmode_regoffs[] = {
++      offsetof(struct pt_regs, ax),
++      offsetof(struct pt_regs, cx),
++      offsetof(struct pt_regs, dx),
++      offsetof(struct pt_regs, bx),
++      offsetof(struct pt_regs, sp),
++      offsetof(struct pt_regs, bp),
++      offsetof(struct pt_regs, si),
++      offsetof(struct pt_regs, di),
++#ifdef CONFIG_X86_64
++      offsetof(struct pt_regs, r8),
++      offsetof(struct pt_regs, r9),
++      offsetof(struct pt_regs, r10),
++      offsetof(struct pt_regs, r11),
++      offsetof(struct pt_regs, r12),
++      offsetof(struct pt_regs, r13),
++      offsetof(struct pt_regs, r14),
++      offsetof(struct pt_regs, r15),
++#endif
++};
++
++static void kprobe_emulate_call_indirect(struct kprobe *p, struct pt_regs *regs)
++{
++      unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg];
++
++      int3_emulate_call(regs, regs_get_register(regs, offs));
++}
++NOKPROBE_SYMBOL(kprobe_emulate_call_indirect);
++
++static void kprobe_emulate_jmp_indirect(struct kprobe *p, struct pt_regs *regs)
++{
++      unsigned long offs = addrmode_regoffs[p->ainsn.indirect.reg];
++
++      int3_emulate_jmp(regs, regs_get_register(regs, offs));
++}
++NOKPROBE_SYMBOL(kprobe_emulate_jmp_indirect);
++
++static int prepare_emulation(struct kprobe *p, struct insn *insn)
+ {
+       insn_byte_t opcode = insn->opcode.bytes[0];
+       switch (opcode) {
+       case 0xfa:              /* cli */
+       case 0xfb:              /* sti */
++      case 0x9c:              /* pushfl */
+       case 0x9d:              /* popf/popfd */
+-              /* Check whether the instruction modifies Interrupt Flag or not */
+-              p->ainsn.if_modifier = 1;
+-              break;
+-      case 0x9c:      /* pushfl */
+-              p->ainsn.is_pushf = 1;
++              /*
++               * IF modifiers must be emulated since it will enable interrupt while
++               * int3 single stepping.
++               */
++              p->ainsn.emulate_op = kprobe_emulate_ifmodifiers;
++              p->ainsn.opcode = opcode;
+               break;
+-      case 0xcf:      /* iret */
+-              p->ainsn.if_modifier = 1;
+-              fallthrough;
+       case 0xc2:      /* ret/lret */
+       case 0xc3:
+       case 0xca:
+       case 0xcb:
+-      case 0xea:      /* jmp absolute -- ip is correct */
+-              /* ip is already adjusted, no more changes required */
+-              p->ainsn.is_abs_ip = 1;
+-              /* Without resume jump, this is boostable */
+-              p->ainsn.boostable = 1;
++              p->ainsn.emulate_op = kprobe_emulate_ret;
+               break;
+-      case 0xe8:      /* call relative - Fix return addr */
+-              p->ainsn.is_call = 1;
++      case 0x9a:      /* far call absolute -- segment is not supported */
++      case 0xea:      /* far jmp absolute -- segment is not supported */
++      case 0xcc:      /* int3 */
++      case 0xcf:      /* iret -- in-kernel IRET is not supported */
++              return -EOPNOTSUPP;
+               break;
+-#ifdef CONFIG_X86_32
+-      case 0x9a:      /* call absolute -- same as call absolute, indirect */
+-              p->ainsn.is_call = 1;
+-              p->ainsn.is_abs_ip = 1;
++      case 0xe8:      /* near call relative */
++              p->ainsn.emulate_op = kprobe_emulate_call;
++              if (insn->immediate.nbytes == 2)
++                      p->ainsn.rel32 = *(s16 *)&insn->immediate.value;
++              else
++                      p->ainsn.rel32 = *(s32 *)&insn->immediate.value;
++              break;
++      case 0xeb:      /* short jump relative */
++      case 0xe9:      /* near jump relative */
++              p->ainsn.emulate_op = kprobe_emulate_jmp;
++              if (insn->immediate.nbytes == 1)
++                      p->ainsn.rel32 = *(s8 *)&insn->immediate.value;
++              else if (insn->immediate.nbytes == 2)
++                      p->ainsn.rel32 = *(s16 *)&insn->immediate.value;
++              else
++                      p->ainsn.rel32 = *(s32 *)&insn->immediate.value;
++              break;
++      case 0x70 ... 0x7f:
++              /* 1 byte conditional jump */
++              p->ainsn.emulate_op = kprobe_emulate_jcc;
++              p->ainsn.jcc.type = opcode & 0xf;
++              p->ainsn.rel32 = *(char *)insn->immediate.bytes;
++              break;
++      case 0x0f:
++              opcode = insn->opcode.bytes[1];
++              if ((opcode & 0xf0) == 0x80) {
++                      /* 2 bytes Conditional Jump */
++                      p->ainsn.emulate_op = kprobe_emulate_jcc;
++                      p->ainsn.jcc.type = opcode & 0xf;
++                      if (insn->immediate.nbytes == 2)
++                              p->ainsn.rel32 = *(s16 *)&insn->immediate.value;
++                      else
++                              p->ainsn.rel32 = *(s32 *)&insn->immediate.value;
++              } else if (opcode == 0x01 &&
++                         X86_MODRM_REG(insn->modrm.bytes[0]) == 0 &&
++                         X86_MODRM_MOD(insn->modrm.bytes[0]) == 3) {
++                      /* VM extensions - not supported */
++                      return -EOPNOTSUPP;
++              }
++              break;
++      case 0xe0:      /* Loop NZ */
++      case 0xe1:      /* Loop */
++      case 0xe2:      /* Loop */
++      case 0xe3:      /* J*CXZ */
++              p->ainsn.emulate_op = kprobe_emulate_loop;
++              p->ainsn.loop.type = opcode & 0x3;
++              p->ainsn.loop.asize = insn->addr_bytes * 8;
++              p->ainsn.rel32 = *(s8 *)&insn->immediate.value;
+               break;
+-#endif
+       case 0xff:
+               /*
+                * Since the 0xff is an extended group opcode, the instruction
+@@ -470,46 +669,57 @@ static void set_resume_flags(struct kpro
+                */
+               opcode = insn->modrm.bytes[0];
+               if ((opcode & 0x30) == 0x10) {
+-                      /*
+-                       * call absolute, indirect
+-                       * Fix return addr; ip is correct.
+-                       * But this is not boostable
+-                       */
+-                      p->ainsn.is_call = 1;
+-                      p->ainsn.is_abs_ip = 1;
+-                      break;
++                      if ((opcode & 0x8) == 0x8)
++                              return -EOPNOTSUPP;     /* far call */
++                      /* call absolute, indirect */
++                      p->ainsn.emulate_op = kprobe_emulate_call_indirect;
+               } else if ((opcode & 0x30) == 0x20) {
+-                      /*
+-                       * jmp near and far, absolute indirect
+-                       * ip is correct.
+-                       */
+-                      p->ainsn.is_abs_ip = 1;
+-                      /* Without resume jump, this is boostable */
+-                      p->ainsn.boostable = 1;
+-              }
++                      if ((opcode & 0x8) == 0x8)
++                              return -EOPNOTSUPP;     /* far jmp */
++                      /* jmp near absolute indirect */
++                      p->ainsn.emulate_op = kprobe_emulate_jmp_indirect;
++              } else
++                      break;
++
++              if (insn->addr_bytes != sizeof(unsigned long))
++                      return -EOPNOTSUPP;     /* Don't support differnt size */
++              if (X86_MODRM_MOD(opcode) != 3)
++                      return -EOPNOTSUPP;     /* TODO: support memory addressing */
++
++              p->ainsn.indirect.reg = X86_MODRM_RM(opcode);
++#ifdef CONFIG_X86_64
++              if (X86_REX_B(insn->rex_prefix.value))
++                      p->ainsn.indirect.reg += 8;
++#endif
++              break;
++      default:
+               break;
+       }
++      p->ainsn.size = insn->length;
++
++      return 0;
+ }
+ static int arch_copy_kprobe(struct kprobe *p)
+ {
+       struct insn insn;
+       kprobe_opcode_t buf[MAX_INSN_SIZE];
+-      int len;
++      int ret, len;
+       /* Copy an instruction with recovering if other optprobe modifies it.*/
+       len = __copy_instruction(buf, p->addr, p->ainsn.insn, &insn);
+       if (!len)
+               return -EINVAL;
+-      /*
+-       * __copy_instruction can modify the displacement of the instruction,
+-       * but it doesn't affect boostable check.
+-       */
+-      len = prepare_boost(buf, p, &insn);
++      /* Analyze the opcode and setup emulate functions */
++      ret = prepare_emulation(p, &insn);
++      if (ret < 0)
++              return ret;
+-      /* Analyze the opcode and set resume flags */
+-      set_resume_flags(p, &insn);
++      /* Add int3 for single-step or booster jmp */
++      len = prepare_singlestep(buf, p, &insn);
++      if (len < 0)
++              return len;
+       /* Also, displacement change doesn't affect the first byte */
+       p->opcode = buf[0];
+@@ -602,29 +812,7 @@ set_current_kprobe(struct kprobe *p, str
+ {
+       __this_cpu_write(current_kprobe, p);
+       kcb->kprobe_saved_flags = kcb->kprobe_old_flags
+-              = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
+-      if (p->ainsn.if_modifier)
+-              kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF;
+-}
+-
+-static nokprobe_inline void clear_btf(void)
+-{
+-      if (test_thread_flag(TIF_BLOCKSTEP)) {
+-              unsigned long debugctl = get_debugctlmsr();
+-
+-              debugctl &= ~DEBUGCTLMSR_BTF;
+-              update_debugctlmsr(debugctl);
+-      }
+-}
+-
+-static nokprobe_inline void restore_btf(void)
+-{
+-      if (test_thread_flag(TIF_BLOCKSTEP)) {
+-              unsigned long debugctl = get_debugctlmsr();
+-
+-              debugctl |= DEBUGCTLMSR_BTF;
+-              update_debugctlmsr(debugctl);
+-      }
++              = (regs->flags & X86_EFLAGS_IF);
+ }
+ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
+@@ -639,6 +827,22 @@ void arch_prepare_kretprobe(struct kretp
+ }
+ NOKPROBE_SYMBOL(arch_prepare_kretprobe);
++static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs,
++                             struct kprobe_ctlblk *kcb)
++{
++      if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
++              kcb->kprobe_status = KPROBE_HIT_SSDONE;
++              cur->post_handler(cur, regs, 0);
++      }
++
++      /* Restore back the original saved kprobes variables and continue. */
++      if (kcb->kprobe_status == KPROBE_REENTER)
++              restore_previous_kprobe(kcb);
++      else
++              reset_current_kprobe();
++}
++NOKPROBE_SYMBOL(kprobe_post_process);
++
+ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
+                            struct kprobe_ctlblk *kcb, int reenter)
+ {
+@@ -646,7 +850,7 @@ static void setup_singlestep(struct kpro
+               return;
+ #if !defined(CONFIG_PREEMPTION)
+-      if (p->ainsn.boostable && !p->post_handler) {
++      if (p->ainsn.boostable) {
+               /* Boost up -- we can execute copied instructions directly */
+               if (!reenter)
+                       reset_current_kprobe();
+@@ -665,19 +869,51 @@ static void setup_singlestep(struct kpro
+               kcb->kprobe_status = KPROBE_REENTER;
+       } else
+               kcb->kprobe_status = KPROBE_HIT_SS;
+-      /* Prepare real single stepping */
+-      clear_btf();
+-      regs->flags |= X86_EFLAGS_TF;
++
++      if (p->ainsn.emulate_op) {
++              p->ainsn.emulate_op(p, regs);
++              kprobe_post_process(p, regs, kcb);
++              return;
++      }
++
++      /* Disable interrupt, and set ip register on trampoline */
+       regs->flags &= ~X86_EFLAGS_IF;
+-      /* single step inline if the instruction is an int3 */
+-      if (p->opcode == INT3_INSN_OPCODE)
+-              regs->ip = (unsigned long)p->addr;
+-      else
+-              regs->ip = (unsigned long)p->ainsn.insn;
++      regs->ip = (unsigned long)p->ainsn.insn;
+ }
+ NOKPROBE_SYMBOL(setup_singlestep);
+ /*
++ * Called after single-stepping.  p->addr is the address of the
++ * instruction whose first byte has been replaced by the "int3"
++ * instruction.  To avoid the SMP problems that can occur when we
++ * temporarily put back the original opcode to single-step, we
++ * single-stepped a copy of the instruction.  The address of this
++ * copy is p->ainsn.insn. We also doesn't use trap, but "int3" again
++ * right after the copied instruction.
++ * Different from the trap single-step, "int3" single-step can not
++ * handle the instruction which changes the ip register, e.g. jmp,
++ * call, conditional jmp, and the instructions which changes the IF
++ * flags because interrupt must be disabled around the single-stepping.
++ * Such instructions are software emulated, but others are single-stepped
++ * using "int3".
++ *
++ * When the 2nd "int3" handled, the regs->ip and regs->flags needs to
++ * be adjusted, so that we can resume execution on correct code.
++ */
++static void resume_singlestep(struct kprobe *p, struct pt_regs *regs,
++                            struct kprobe_ctlblk *kcb)
++{
++      unsigned long copy_ip = (unsigned long)p->ainsn.insn;
++      unsigned long orig_ip = (unsigned long)p->addr;
++
++      /* Restore saved interrupt flag and ip register */
++      regs->flags |= kcb->kprobe_saved_flags;
++      /* Note that regs->ip is executed int3 so must be a step back */
++      regs->ip += (orig_ip - copy_ip) - INT3_INSN_SIZE;
++}
++NOKPROBE_SYMBOL(resume_singlestep);
++
++/*
+  * We have reentered the kprobe_handler(), since another probe was hit while
+  * within the handler. We save the original kprobes variables and just single
+  * step on the instruction of the new probe without calling any user handlers.
+@@ -712,6 +948,12 @@ static int reenter_kprobe(struct kprobe
+ }
+ NOKPROBE_SYMBOL(reenter_kprobe);
++static int nokprobe_inline kprobe_is_ss(struct kprobe_ctlblk *kcb)
++{
++      return (kcb->kprobe_status == KPROBE_HIT_SS ||
++              kcb->kprobe_status == KPROBE_REENTER);
++}
++
+ /*
+  * Interrupts are disabled on entry as trap3 is an interrupt gate and they
+  * remain disabled throughout this function.
+@@ -756,7 +998,18 @@ int kprobe_int3_handler(struct pt_regs *
+                               reset_current_kprobe();
+                       return 1;
+               }
+-      } else if (*addr != INT3_INSN_OPCODE) {
++      } else if (kprobe_is_ss(kcb)) {
++              p = kprobe_running();
++              if ((unsigned long)p->ainsn.insn < regs->ip &&
++                  (unsigned long)p->ainsn.insn + MAX_INSN_SIZE > regs->ip) {
++                      /* Most provably this is the second int3 for singlestep */
++                      resume_singlestep(p, regs, kcb);
++                      kprobe_post_process(p, regs, kcb);
++                      return 1;
++              }
++      }
++
++      if (*addr != INT3_INSN_OPCODE) {
+               /*
+                * The breakpoint instruction was removed right
+                * after we hit it.  Another cpu has removed
+@@ -829,91 +1082,6 @@ __used __visible void *trampoline_handle
+ }
+ NOKPROBE_SYMBOL(trampoline_handler);
+-/*
+- * Called after single-stepping.  p->addr is the address of the
+- * instruction whose first byte has been replaced by the "int 3"
+- * instruction.  To avoid the SMP problems that can occur when we
+- * temporarily put back the original opcode to single-step, we
+- * single-stepped a copy of the instruction.  The address of this
+- * copy is p->ainsn.insn.
+- *
+- * This function prepares to return from the post-single-step
+- * interrupt.  We have to fix up the stack as follows:
+- *
+- * 0) Except in the case of absolute or indirect jump or call instructions,
+- * the new ip is relative to the copied instruction.  We need to make
+- * it relative to the original instruction.
+- *
+- * 1) If the single-stepped instruction was pushfl, then the TF and IF
+- * flags are set in the just-pushed flags, and may need to be cleared.
+- *
+- * 2) If the single-stepped instruction was a call, the return address
+- * that is atop the stack is the address following the copied instruction.
+- * We need to make it the address following the original instruction.
+- */
+-static void resume_execution(struct kprobe *p, struct pt_regs *regs,
+-                           struct kprobe_ctlblk *kcb)
+-{
+-      unsigned long *tos = stack_addr(regs);
+-      unsigned long copy_ip = (unsigned long)p->ainsn.insn;
+-      unsigned long orig_ip = (unsigned long)p->addr;
+-
+-      regs->flags &= ~X86_EFLAGS_TF;
+-
+-      /* Fixup the contents of top of stack */
+-      if (p->ainsn.is_pushf) {
+-              *tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF);
+-              *tos |= kcb->kprobe_old_flags;
+-      } else if (p->ainsn.is_call) {
+-              *tos = orig_ip + (*tos - copy_ip);
+-      }
+-
+-      if (!p->ainsn.is_abs_ip)
+-              regs->ip += orig_ip - copy_ip;
+-
+-      restore_btf();
+-}
+-NOKPROBE_SYMBOL(resume_execution);
+-
+-/*
+- * Interrupts are disabled on entry as trap1 is an interrupt gate and they
+- * remain disabled throughout this function.
+- */
+-int kprobe_debug_handler(struct pt_regs *regs)
+-{
+-      struct kprobe *cur = kprobe_running();
+-      struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+-
+-      if (!cur)
+-              return 0;
+-
+-      resume_execution(cur, regs, kcb);
+-      regs->flags |= kcb->kprobe_saved_flags;
+-
+-      if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+-              kcb->kprobe_status = KPROBE_HIT_SSDONE;
+-              cur->post_handler(cur, regs, 0);
+-      }
+-
+-      /* Restore back the original saved kprobes variables and continue. */
+-      if (kcb->kprobe_status == KPROBE_REENTER) {
+-              restore_previous_kprobe(kcb);
+-              goto out;
+-      }
+-      reset_current_kprobe();
+-out:
+-      /*
+-       * if somebody else is singlestepping across a probe point, flags
+-       * will have TF set, in which case, continue the remaining processing
+-       * of do_debug, as if this is not a probe hit.
+-       */
+-      if (regs->flags & X86_EFLAGS_TF)
+-              return 0;
+-
+-      return 1;
+-}
+-NOKPROBE_SYMBOL(kprobe_debug_handler);
+-
+ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+ {
+       struct kprobe *cur = kprobe_running();
+@@ -931,20 +1099,9 @@ int kprobe_fault_handler(struct pt_regs
+                * normal page fault.
+                */
+               regs->ip = (unsigned long)cur->addr;
+-              /*
+-               * Trap flag (TF) has been set here because this fault
+-               * happened where the single stepping will be done.
+-               * So clear it by resetting the current kprobe:
+-               */
+-              regs->flags &= ~X86_EFLAGS_TF;
+-              /*
+-               * Since the single step (trap) has been cancelled,
+-               * we need to restore BTF here.
+-               */
+-              restore_btf();
+               /*
+-               * If the TF flag was set before the kprobe hit,
++               * If the IF flag was set before the kprobe hit,
+                * don't touch it:
+                */
+               regs->flags |= kcb->kprobe_old_flags;
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -917,9 +917,6 @@ static __always_inline void exc_debug_ke
+       if ((dr6 & DR_STEP) && is_sysenter_singlestep(regs))
+               dr6 &= ~DR_STEP;
+-      if (kprobe_debug_handler(regs))
+-              goto out;
+-
+       /*
+        * The kernel doesn't use INT1
+        */