]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
perf script: Add capstone support for '-F +brstackdisasm'
authorAndi Kleen <ak@linux.intel.com>
Mon, 1 Apr 2024 21:08:04 +0000 (14:08 -0700)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Fri, 5 Apr 2024 13:43:07 +0000 (10:43 -0300)
Support capstone output for the '-F +brstackinsn' branch dump.

The new output is enabled with the new field 'brstackdisasm'.

This was possible before with --xed, but now also allow it for users
that don't have xed using the builtin capstone support.

Before:

  perf record -b emacs -Q --batch '()'
  perf script -F +brstackinsn
  ...
            emacs   55778 1814366.755945:     151564 cycles:P:      7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s>        intel_check_word.constprop.0+237:
          00007f0ab2d1711d        insn: 75 e6                     # PRED 3 cycles [3]
          00007f0ab2d17105        insn: 73 51
          00007f0ab2d17107        insn: 48 89 c1
          00007f0ab2d1710a        insn: 48 39 ca
          00007f0ab2d1710d        insn: 73 96
          00007f0ab2d1710f        insn: 48 8d 04 11
          00007f0ab2d17113        insn: 48 d1 e8
          00007f0ab2d17116        insn: 49 8d 34 c1
          00007f0ab2d1711a        insn: 44 3a 06
          00007f0ab2d1711d        insn: 75 e6                     # PRED 3 cycles [6] 3.00 IPC
          00007f0ab2d17105        insn: 73 51                     # PRED 1 cycles [7] 1.00 IPC
          00007f0ab2d17158        insn: 48 8d 50 01
          00007f0ab2d1715c        insn: eb 92                     # PRED 1 cycles [8] 2.00 IPC
          00007f0ab2d170f0        insn: 48 39 ca
          00007f0ab2d170f3        insn: 73 b0                     # PRED 1 cycles [9] 2.00 IPC

After (perf must be compiled with capstone):

  perf script -F +brstackdisasm

  ...
             emacs   55778 1814366.755945:     151564 cycles:P:      7f0ab2d17192 intel_check_word.constprop.0+0x162 (/usr/lib64/ld-linux-x86-64.s>        intel_check_word.constprop.0+237:
          00007f0ab2d1711d        jne intel_check_word.constprop.0+0xd5   # PRED 3 cycles [3]
          00007f0ab2d17105        jae intel_check_word.constprop.0+0x128
          00007f0ab2d17107        movq %rax, %rcx
          00007f0ab2d1710a        cmpq %rcx, %rdx
          00007f0ab2d1710d        jae intel_check_word.constprop.0+0x75
          00007f0ab2d1710f        leaq (%rcx, %rdx), %rax
          00007f0ab2d17113        shrq $1, %rax
          00007f0ab2d17116        leaq (%r9, %rax, 8), %rsi
          00007f0ab2d1711a        cmpb (%rsi), %r8b
          00007f0ab2d1711d        jne intel_check_word.constprop.0+0xd5   # PRED 3 cycles [6] 3.00 IPC
          00007f0ab2d17105        jae intel_check_word.constprop.0+0x128  # PRED 1 cycles [7] 1.00 IPC
          00007f0ab2d17158        leaq 1(%rax), %rdx
          00007f0ab2d1715c        jmp intel_check_word.constprop.0+0xc0   # PRED 1 cycles [8] 2.00 IPC
          00007f0ab2d170f0        cmpq %rcx, %rdx
          00007f0ab2d170f3        jae intel_check_word.constprop.0+0x75   # PRED 1 cycles [9] 2.00 IPC

Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Link: https://lore.kernel.org/r/20240401210925.209671-3-ak@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf-script.txt
tools/perf/builtin-script.c
tools/perf/util/dump-insn.h
tools/perf/util/print_insn.c
tools/perf/util/print_insn.h

index 005e51df855e7cda1451abfbca8930a140138bf5..ff086ef05a0c506f1bfe08401361e6ef8ddd5866 100644 (file)
@@ -132,9 +132,9 @@ OPTIONS
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, dsoff, addr, symoff,
         srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output,
-        brstackinsn, brstackinsnlen, brstackoff, callindent, insn, disasm,
+        brstackinsn, brstackinsnlen, brstackdisasm, brstackoff, callindent, insn, disasm,
         insnlen, synth, phys_addr, metric, misc, srccode, ipc, data_page_size,
-        code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat.
+        code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat,
 
         Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
@@ -257,6 +257,9 @@ OPTIONS
        can’t know the next sequential instruction after an unconditional branch unless
        you calculate that based on its length.
 
+       brstackdisasm acts like brstackinsn, but will print disassembled instructions if
+       perf is built with the capstone library.
+
        The brstackoff field will print an offset into a specific dso/binary.
 
        With the metric option perf script can compute metrics for
index a711bedace475953cf74bc11526e6c5aa513367d..dd10f158ed0cde58bb41f6b484514392708645f3 100644 (file)
@@ -136,6 +136,7 @@ enum perf_output_field {
        PERF_OUTPUT_RETIRE_LAT      = 1ULL << 40,
        PERF_OUTPUT_DSOFF           = 1ULL << 41,
        PERF_OUTPUT_DISASM          = 1ULL << 42,
+       PERF_OUTPUT_BRSTACKDISASM   = 1ULL << 43,
 };
 
 struct perf_script {
@@ -210,6 +211,7 @@ struct output_option {
        {.str = "vcpu", .field = PERF_OUTPUT_VCPU},
        {.str = "cgroup", .field = PERF_OUTPUT_CGROUP},
        {.str = "retire_lat", .field = PERF_OUTPUT_RETIRE_LAT},
+       {.str = "brstackdisasm", .field = PERF_OUTPUT_BRSTACKDISASM},
 };
 
 enum {
@@ -510,7 +512,8 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
                       "selected. Hence, no address to lookup the source line number.\n");
                return -EINVAL;
        }
-       if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN)) && !allow_user_set &&
+       if ((PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM))
+           && !allow_user_set &&
            !(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) {
                pr_err("Display of branch stack assembler requested, but non all-branch filter set\n"
                       "Hint: run 'perf record -b ...'\n");
@@ -1162,6 +1165,20 @@ out:
        return ret;
 }
 
+static const char *any_dump_insn(struct perf_event_attr *attr __maybe_unused,
+                       struct perf_insn *x, uint64_t ip,
+                       u8 *inbuf, int inlen, int *lenp)
+{
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+       if (PRINT_FIELD(BRSTACKDISASM)) {
+               const char *p = cs_dump_insn(x, ip, inbuf, inlen, lenp);
+               if (p)
+                       return p;
+       }
+#endif
+       return dump_insn(x, ip, inbuf, inlen, lenp);
+}
+
 static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
                            struct perf_insn *x, u8 *inbuf, int len,
                            int insn, FILE *fp, int *total_cycles,
@@ -1170,7 +1187,7 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
 {
        int ilen = 0;
        int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t", ip,
-                             dump_insn(x, ip, inbuf, len, &ilen));
+                             any_dump_insn(attr, x, ip, inbuf, len, &ilen));
 
        if (PRINT_FIELD(BRSTACKINSNLEN))
                printed += fprintf(fp, "ilen: %d\t", ilen);
@@ -1262,6 +1279,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
                nr = max_blocks + 1;
 
        x.thread = thread;
+       x.machine = machine;
        x.cpu = sample->cpu;
 
        printed += fprintf(fp, "%c", '\n');
@@ -1313,7 +1331,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
                        } else {
                                ilen = 0;
                                printed += fprintf(fp, "\t%016" PRIx64 "\t%s", ip,
-                                                  dump_insn(&x, ip, buffer + off, len - off, &ilen));
+                                                  any_dump_insn(attr, &x, ip, buffer + off, len - off, &ilen));
                                if (PRINT_FIELD(BRSTACKINSNLEN))
                                        printed += fprintf(fp, "\tilen: %d", ilen);
                                printed += fprintf(fp, "\n");
@@ -1361,7 +1379,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
                        goto out;
                ilen = 0;
                printed += fprintf(fp, "\t%016" PRIx64 "\t%s", sample->ip,
-                       dump_insn(&x, sample->ip, buffer, len, &ilen));
+                       any_dump_insn(attr, &x, sample->ip, buffer, len, &ilen));
                if (PRINT_FIELD(BRSTACKINSNLEN))
                        printed += fprintf(fp, "\tilen: %d", ilen);
                printed += fprintf(fp, "\n");
@@ -1372,7 +1390,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
        for (off = 0; off <= end - start; off += ilen) {
                ilen = 0;
                printed += fprintf(fp, "\t%016" PRIx64 "\t%s", start + off,
-                                  dump_insn(&x, start + off, buffer + off, len - off, &ilen));
+                                  any_dump_insn(attr, &x, start + off, buffer + off, len - off, &ilen));
                if (PRINT_FIELD(BRSTACKINSNLEN))
                        printed += fprintf(fp, "\tilen: %d", ilen);
                printed += fprintf(fp, "\n");
@@ -1534,7 +1552,7 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample,
                printed += fprintf(fp, "\t\t");
                printed += sample__fprintf_insn_asm(sample, thread, machine, fp, al);
        }
-       if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN))
+       if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN) || PRINT_FIELD(BRSTACKDISASM))
                printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp);
 
        return printed;
@@ -3940,7 +3958,7 @@ int cmd_script(int argc, const char **argv)
                     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,dsoff,"
                     "addr,symoff,srcline,period,iregs,uregs,brstack,"
                     "brstacksym,flags,data_src,weight,bpf-output,brstackinsn,"
-                    "brstackinsnlen,brstackoff,callindent,insn,disasm,insnlen,synth,"
+                    "brstackinsnlen,brstackdisasm,brstackoff,callindent,insn,disasm,insnlen,synth,"
                     "phys_addr,metric,misc,srccode,ipc,tod,data_page_size,"
                     "code_page_size,ins_lat,machine_pid,vcpu,cgroup,retire_lat",
                     parse_output_fields),
index 650125061530934c3f175d90ca8c82e2c59f99ca..4a7797dd6d0924ebd125d16dcc33a629177fe459 100644 (file)
@@ -11,6 +11,7 @@ struct thread;
 struct perf_insn {
        /* Initialized by callers: */
        struct thread *thread;
+       struct machine *machine;
        u8            cpumode;
        bool          is64bit;
        int           cpu;
index 32dc9dad9cf2950764be861b614612137c3b8161..8825330d435f918b864505f4d82afad2c486dd7a 100644 (file)
@@ -12,6 +12,7 @@
 #include "machine.h"
 #include "thread.h"
 #include "print_insn.h"
+#include "dump-insn.h"
 #include "map.h"
 #include "dso.h"
 
@@ -71,6 +72,57 @@ static int capstone_init(struct machine *machine, csh *cs_handle, bool is64)
        return 0;
 }
 
+static void dump_insn_x86(struct thread *thread, cs_insn *insn, struct perf_insn *x)
+{
+       struct addr_location al;
+       bool printed = false;
+
+       if (insn->detail && insn->detail->x86.op_count == 1) {
+               cs_x86_op *op = &insn->detail->x86.operands[0];
+
+               addr_location__init(&al);
+               if (op->type == X86_OP_IMM &&
+                   thread__find_symbol(thread, x->cpumode, op->imm, &al) &&
+                   al.sym &&
+                   al.addr < al.sym->end) {
+                       snprintf(x->out, sizeof(x->out), "%s %s+%#" PRIx64 " [%#" PRIx64 "]", insn[0].mnemonic,
+                                       al.sym->name, al.addr - al.sym->start, op->imm);
+                       printed = true;
+               }
+               addr_location__exit(&al);
+       }
+
+       if (!printed)
+               snprintf(x->out, sizeof(x->out), "%s %s", insn[0].mnemonic, insn[0].op_str);
+}
+
+const char *cs_dump_insn(struct perf_insn *x, uint64_t ip,
+                        u8 *inbuf, int inlen, int *lenp)
+{
+       int ret;
+       int count;
+       cs_insn *insn;
+       csh cs_handle;
+
+       ret = capstone_init(x->machine, &cs_handle, x->is64bit);
+       if (ret < 0)
+               return NULL;
+
+       count = cs_disasm(cs_handle, (uint8_t *)inbuf, inlen, ip, 1, &insn);
+       if (count > 0) {
+               if (machine__normalized_is(x->machine, "x86"))
+                       dump_insn_x86(x->thread, &insn[0], x);
+               else
+                       snprintf(x->out, sizeof(x->out), "%s %s",
+                                       insn[0].mnemonic, insn[0].op_str);
+               *lenp = insn->size;
+               cs_free(insn, count);
+       } else {
+               return NULL;
+       }
+       return x->out;
+}
+
 static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread,
                             cs_insn *insn, FILE *fp)
 {
index 6447dd41b5437a3a6c99b3a910f85f16f89cc11c..c2a6391a45cea97034a2b766b6fbd3f42bb2bb59 100644 (file)
@@ -8,9 +8,12 @@
 struct perf_sample;
 struct thread;
 struct machine;
+struct perf_insn;
 
 size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread,
                                struct machine *machine, FILE *fp, struct addr_location *al);
 size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp);
+const char *cs_dump_insn(struct perf_insn *x, uint64_t ip,
+                         u8 *inbuf, int inlen, int *lenp);
 
 #endif /* PERF_PRINT_INSN_H */