]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
perf trace: Skip internal syscall arguments
authorNamhyung Kim <namhyung@kernel.org>
Thu, 27 Nov 2025 04:44:18 +0000 (20:44 -0800)
committerNamhyung Kim <namhyung@kernel.org>
Sat, 29 Nov 2025 20:23:37 +0000 (12:23 -0800)
Recent changes in the linux-next kernel will add new field for syscalls
to have contents in the userspace like below.

  # cat /sys/kernel/tracing/events/syscalls/sys_enter_write/format
  name: sys_enter_write
  ID: 758
  format:
          field:unsigned short common_type;       offset:0;       size:2; signed:0;
          field:unsigned char common_flags;       offset:2;       size:1; signed:0;
          field:unsigned char common_preempt_count;       offset:3;       size:1; signed:0;
          field:int common_pid;   offset:4;       size:4; signed:1;

          field:int __syscall_nr; offset:8;       size:4; signed:1;
          field:unsigned int fd;  offset:16;      size:8; signed:0;
          field:const char * buf; offset:24;      size:8; signed:0;
          field:size_t count;     offset:32;      size:8; signed:0;
          field:__data_loc char[] __buf_val;      offset:40;      size:4; signed:0;

  print fmt: "fd: 0x%08lx, buf: 0x%08lx (%s), count: 0x%08lx", ((unsigned long)(REC->fd)),
             ((unsigned long)(REC->buf)), __print_dynamic_array(__buf_val, 1),
             ((unsigned long)(REC->count))

We have a different way to handle those arguments and this change
confuses perf trace then make some tests failing.  Fix it by skipping
the new fields that have "__data_loc char[]" type.

Maybe we can switch to this instead of the BPF augmentation later.

Reviewed-by: Howard Chu <howardchu95@gmail.com>
Tested-by: Thomas Richter <tmricht@linux.ibm.com>
Tested-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Howard Chu <howardchu95@gmail.com>
Reported-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
tools/perf/builtin-trace.c

index a743bda294bd34009d6039889e230fb4113351ac..baee1f6956001d86f922d9f816acb4d008c38c67 100644 (file)
@@ -2069,6 +2069,15 @@ static const struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *n
        return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name);
 }
 
+/*
+ * v6.19 kernel added new fields to read userspace memory for event tracing.
+ * But it's not used by perf and confuses the syscall parameters.
+ */
+static bool is_internal_field(struct tep_format_field *field)
+{
+       return !strcmp(field->type, "__data_loc char[]");
+}
+
 static struct tep_format_field *
 syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field,
                            bool *use_btf)
@@ -2077,6 +2086,10 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field
        int len;
 
        for (; field; field = field->next, ++arg) {
+               /* assume it's the last argument */
+               if (is_internal_field(field))
+                       continue;
+
                last_field = field;
 
                if (arg->scnprintf)
@@ -2145,6 +2158,7 @@ static int syscall__read_info(struct syscall *sc, struct trace *trace)
 {
        char tp_name[128];
        const char *name;
+       struct tep_format_field *field;
        int err;
 
        if (sc->nonexistent)
@@ -2201,6 +2215,13 @@ static int syscall__read_info(struct syscall *sc, struct trace *trace)
                --sc->nr_args;
        }
 
+       field = sc->args;
+       while (field) {
+               if (is_internal_field(field))
+                       --sc->nr_args;
+               field = field->next;
+       }
+
        sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
        sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");