]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
tools/delaytop: add flexible sorting by delay field
authorFan Yu <fan.yu9@zte.com.cn>
Sat, 6 Sep 2025 16:12:05 +0000 (00:12 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Sun, 14 Sep 2025 00:32:55 +0000 (17:32 -0700)
Patch series "tools/delaytop: implement real-time keyboard interaction
support", v2.

Current Limitations
===================

The current delaytop implementation has two main limitations:

1) Static sorting only by CPU delay Forcing users to restart with
   different parameters to analyze other resource bottlenecks.

2) Memory delay information is always expanded Causing information
   overload when only high-level memory pressure monitoring is needed.

Improvements
============

1) Implemented dynamic sorting capability
- Interactive key 'o' triggers sort mode.
- Supports sorting by CPU/IO/Memory/IRQ delays.
- Memory subcategories available in verbose mode.
 * c - CPU delay (default)
 * i - IO delay
 * m - Total memory delay
 * q - IRQ delay
 * s/r/t/p/w - Memory subcategories (in verbose mode)

2) Added memory display modes
- Compact view (default): shows aggregated memory delays.
- Verbose view ('M' key): breaks down into memory sub-delays.
 * SWAP - swapin delays
 * RCL - freepages reclaim delays
 * THR - thrashing delays
 * CMP - compaction delays
 * WP - write-protect copy delays

Practical benefits
==================

1) Dynamic Sorting for Real-Time Bottleneck Detection System
   administrators can now dynamically change sorting to identify different
   types of resource bottlenecks without restarting.

2) Enhanced Usability with On-Screen Keybindings More intuitive
   interactive usage with on-screen keybindings help.  Reduced screen
   clutter when only memory overview is needed.

Use Case
========
# ./delaytop
System Pressure Information: (avg10/avg60vg300/total)
CPU some:       0.0%/   0.0%/   0.0%/  106817(ms)
CPU full:       0.0%/   0.0%/   0.0%/       0(ms)
Memory full:    0.0%/   0.0%/   0.0%/       0(ms)
Memory some:    0.0%/   0.0%/   0.0%/       0(ms)
IO full:        0.0%/   0.0%/   0.0%/    2245(ms)
IO some:        0.0%/   0.0%/   0.0%/    2791(ms)
IRQ full:       0.0%/   0.0%/   0.0%/       0(ms)
[o]sort [M]memverbose [q]quit
Top 20 processes (sorted by cpu delay):
     PID      TGID  COMMAND           CPU(ms)   IO(ms)  IRQ(ms)  MEM(ms)
------------------------------------------------------------------------
     110       110  kworker/15:0H-s   27.91     0.00     0.00     0.00
      57        57  cpuhp/7            3.18     0.00     0.00     0.00
      99        99  cpuhp/14           2.97     0.00     0.00     0.00
      51        51  cpuhp/6            0.90     0.00     0.00     0.00
      44        44  kworker/4:0H-sy    0.80     0.00     0.00     0.00
      76        76  idle_inject/10     0.31     0.00     0.00     0.00
     100       100  idle_inject/14     0.30     0.00     0.00     0.00
    1309      1309  systemsettings     0.29     0.00     0.00     0.00
      60        60  ksoftirqd/7        0.28     0.00     0.00     0.00
      45        45  cpuhp/5            0.22     0.00     0.00     0.00
      63        63  cpuhp/8            0.20     0.00     0.00     0.00
      87        87  cpuhp/12           0.18     0.00     0.00     0.00
      93        93  cpuhp/13           0.17     0.00     0.00     0.00
    1265      1265  acpid              0.17     0.00     0.00     0.00
    1552      1552  sshd               0.17     0.00     0.00     0.00
    2584      2584  sddm-helper        0.16     0.00     0.00     0.00
    1284      1284  rtkit-daemon       0.15     0.00     0.00     0.00
    1326      1326  nde-netfilter      0.14     0.00     0.00     0.00
      27        27  cpuhp/2            0.13     0.00     0.00     0.00
     631       631  kworker/11:2-rc    0.11     0.00     0.00     0.00

# ./delaytop -M
System Pressure Information: (avg10/avg60vg300/total)
CPU some:       0.0%/   0.0%/   0.0%/  106827(ms)
CPU full:       0.0%/   0.0%/   0.0%/       0(ms)
Memory full:    0.0%/   0.0%/   0.0%/       0(ms)
Memory some:    0.0%/   0.0%/   0.0%/       0(ms)
IO full:        0.0%/   0.0%/   0.0%/    2245(ms)
IO some:        0.0%/   0.0%/   0.0%/    2791(ms)
IRQ full:       0.0%/   0.0%/   0.0%/       0(ms)
[o]sort [M]memverbose [q]quit
Top 20 processes (sorted by mem delay):
     PID      TGID  COMMAND           MEM(ms) SWAP(ms)  RCL(ms)  THR(ms)  CMP(ms)   WP(ms)
------------------------------------------------------------------------------------------
  121732    121732  delaytop           0.01     0.00     0.00     0.00     0.00     0.01
   95876     95876  top                0.00     0.00     0.00     0.00     0.00     0.00
  121641    121641  systemd-userwor    0.00     0.00     0.00     0.00     0.00     0.00
  121693    121693  systemd-userwor    0.00     0.00     0.00     0.00     0.00     0.00
  121661    121661  systemd-userwor    0.00     0.00     0.00     0.00     0.00     0.00
       1         1  systemd            0.00     0.00     0.00     0.00     0.00     0.00
       2         2  kthreadd           0.00     0.00     0.00     0.00     0.00     0.00
       3         3  pool_workqueue_    0.00     0.00     0.00     0.00     0.00     0.00
       4         4  kworker/R-rcu_g    0.00     0.00     0.00     0.00     0.00     0.00
       5         5  kworker/R-rcu_p    0.00     0.00     0.00     0.00     0.00     0.00
       6         6  kworker/R-slub_    0.00     0.00     0.00     0.00     0.00     0.00
       7         7  kworker/R-netns    0.00     0.00     0.00     0.00     0.00     0.00
       9         9  kworker/0:0H-sy    0.00     0.00     0.00     0.00     0.00     0.00
      11        11  kworker/u32:0-n    0.00     0.00     0.00     0.00     0.00     0.00
      12        12  kworker/R-mm_pe    0.00     0.00     0.00     0.00     0.00     0.00
      13        13  rcu_tasks_kthre    0.00     0.00     0.00     0.00     0.00     0.00
      14        14  rcu_tasks_rude_    0.00     0.00     0.00     0.00     0.00     0.00
      15        15  rcu_tasks_trace    0.00     0.00     0.00     0.00     0.00     0.00
      16        16  ksoftirqd/0        0.00     0.00     0.00     0.00     0.00     0.00
      17        17  rcu_preempt        0.00     0.00     0.00     0.00     0.00     0.00

When psi is not enabled:
# ./delaytop
System Pressure Information: (avg10/avg60vg300/total)
  PSI not found: check if psi=1 enabled in cmdline

This patch (of 5):

The delaytop tool only supported sorting by CPU delay, which limited its
usefulness when users needed to identify bottlenecks in other subsystems.
Users had no way to sort processes by IO, IRQ, or other delay types to
quickly pinpoint specific performance issues.

Add -s/--sort option to allow sorting by different delay types.  Users can
now quickly identify bottlenecks in specific subsystems by sorting
processes by the relevant delay metric.

Link: https://lkml.kernel.org/r/20250907001101305vrTGnXaRNvtmsGkp-Ljk_@zte.com.cn
Link: https://lkml.kernel.org/r/20250907001205573L3XpsQMIQnLgDqiiKYd3H@zte.com.cn
Signed-off-by: Fan Yu <fan.yu9@zte.com.cn>
Reviewed-by: xu xin <xu.xin16@zte.com.cn>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Wang Yaxin <wang.yaxin@zte.com.cn>
Cc: Yang Yang <yang.yang29@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
tools/accounting/delaytop.c

index 9afb1ffc00baf65d348775873e05052bf294cd8e..52718714496b4262a8ecbb293a8a2c2852651ec1 100644 (file)
@@ -42,6 +42,7 @@
 #include <linux/genetlink.h>
 #include <linux/taskstats.h>
 #include <linux/cgroupstats.h>
+#include <stddef.h>
 
 #define PSI_CPU_SOME "/proc/pressure/cpu"
 #define PSI_CPU_FULL   "/proc/pressure/cpu"
@@ -61,6 +62,7 @@
 #define TASK_COMM_LEN  16
 #define MAX_MSG_SIZE   1024
 #define MAX_TASKS              1000
+#define MAX_BUF_LEN            256
 #define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field
 #define BOOL_FPRINT(stream, fmt, ...) \
 ({ \
        ret >= 0; \
 })
 #define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n"
-
-/* Program settings structure */
-struct config {
-       int delay;                              /* Update interval in seconds */
-       int iterations;                 /* Number of iterations, 0 == infinite */
-       int max_processes;              /* Maximum number of processes to show */
-       char sort_field;                /* Field to sort by */
-       int output_one_time;    /* Output once and exit */
-       int monitor_pid;                /* Monitor specific PID */
-       char *container_path;   /* Path to container cgroup */
-};
+#define SORT_FIELD(name) \
+       {#name, \
+       offsetof(struct task_info, name##_delay_total), \
+       offsetof(struct task_info, name##_count)}
+#define END_FIELD {NULL, 0, 0}
 
 /* PSI statistics structure */
 struct psi_stats {
@@ -130,6 +126,24 @@ struct container_stats {
        int nr_io_wait;                 /* Number of processes in IO wait */
 };
 
+/* Delay field structure */
+struct field_desc {
+       const char *name;       /* Field name for cmdline argument */
+       unsigned long total_offset; /* Offset of total delay in task_info */
+       unsigned long count_offset; /* Offset of count in task_info */
+};
+
+/* Program settings structure */
+struct config {
+       int delay;                              /* Update interval in seconds */
+       int iterations;                 /* Number of iterations, 0 == infinite */
+       int max_processes;              /* Maximum number of processes to show */
+       int output_one_time;    /* Output once and exit */
+       int monitor_pid;                /* Monitor specific PID */
+       char *container_path;   /* Path to container cgroup */
+       const struct field_desc *sort_field;    /* Current sort field */
+};
+
 /* Global variables */
 static struct config cfg;
 static struct psi_stats psi;
@@ -137,6 +151,17 @@ static struct task_info tasks[MAX_TASKS];
 static int task_count;
 static int running = 1;
 static struct container_stats container_stats;
+static const struct field_desc sort_fields[] = {
+       SORT_FIELD(cpu),
+       SORT_FIELD(blkio),
+       SORT_FIELD(irq),
+       SORT_FIELD(swapin),
+       SORT_FIELD(freepages),
+       SORT_FIELD(thrashing),
+       SORT_FIELD(compact),
+       SORT_FIELD(wpcopy),
+       END_FIELD
+};
 
 /* Netlink socket variables */
 static int nl_sd = -1;
@@ -158,18 +183,59 @@ static void disable_raw_mode(void)
        tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios);
 }
 
+/* Find field descriptor by name with string comparison */
+static const struct field_desc *get_field_by_name(const char *name)
+{
+       const struct field_desc *field;
+       size_t field_len;
+
+       for (field = sort_fields; field->name != NULL; field++) {
+               field_len = strlen(field->name);
+               if (field_len != strlen(name))
+                       continue;
+               if (strncmp(field->name, name, field_len) == 0)
+                       return field;
+       }
+
+       return NULL;
+}
+
+/* Find display name for a field descriptor */
+static const char *get_name_by_field(const struct field_desc *field)
+{
+       return field ? field->name : "UNKNOWN";
+}
+
+/* Generate string of available field names */
+static void display_available_fields(void)
+{
+       const struct field_desc *field;
+       char buf[MAX_BUF_LEN];
+
+       buf[0] = '\0';
+
+       for (field = sort_fields; field->name != NULL; field++) {
+               strncat(buf, "|", MAX_BUF_LEN - strlen(buf) - 1);
+               strncat(buf, field->name, MAX_BUF_LEN - strlen(buf) - 1);
+               buf[MAX_BUF_LEN - 1] = '\0';
+       }
+
+       fprintf(stderr, "Available fields: %s\n", buf);
+}
+
 /* Display usage information and command line options */
 static void usage(void)
 {
        printf("Usage: delaytop [Options]\n"
        "Options:\n"
-       "  -h, --help                           Show this help message and exit\n"
-       "  -d, --delay=SECONDS    Set refresh interval (default: 2 seconds, min: 1)\n"
-       "  -n, --iterations=COUNT       Set number of updates (default: 0 = infinite)\n"
-       "  -P, --processes=NUMBER       Set maximum number of processes to show (default: 20, max: 1000)\n"
-       "  -o, --once                           Display once and exit\n"
-       "  -p, --pid=PID                        Monitor only the specified PID\n"
-       "  -C, --container=PATH  Monitor the container at specified cgroup path\n");
+       "  -h, --help               Show this help message and exit\n"
+       "  -d, --delay=SECONDS      Set refresh interval (default: 2 seconds, min: 1)\n"
+       "  -n, --iterations=COUNT   Set number of updates (default: 0 = infinite)\n"
+       "  -P, --processes=NUMBER   Set maximum number of processes to show (default: 20, max: 1000)\n"
+       "  -o, --once               Display once and exit\n"
+       "  -p, --pid=PID            Monitor only the specified PID\n"
+       "  -C, --container=PATH     Monitor the container at specified cgroup path\n"
+       "  -s, --sort=FIELD         Sort by delay field (default: cpu)\n");
        exit(0);
 }
 
@@ -177,6 +243,7 @@ static void usage(void)
 static void parse_args(int argc, char **argv)
 {
        int c;
+       const struct field_desc *field;
        struct option long_options[] = {
                {"help", no_argument, 0, 'h'},
                {"delay", required_argument, 0, 'd'},
@@ -184,6 +251,7 @@ static void parse_args(int argc, char **argv)
                {"pid", required_argument, 0, 'p'},
                {"once", no_argument, 0, 'o'},
                {"processes", required_argument, 0, 'P'},
+               {"sort", required_argument, 0, 's'},
                {"container", required_argument, 0, 'C'},
                {0, 0, 0, 0}
        };
@@ -192,7 +260,7 @@ static void parse_args(int argc, char **argv)
        cfg.delay = 2;
        cfg.iterations = 0;
        cfg.max_processes = 20;
-       cfg.sort_field = 'c';   /* Default sort by CPU delay */
+       cfg.sort_field = &sort_fields[0];       /* Default sorted by CPU delay */
        cfg.output_one_time = 0;
        cfg.monitor_pid = 0;    /* 0 means monitor all PIDs */
        cfg.container_path = NULL;
@@ -200,7 +268,7 @@ static void parse_args(int argc, char **argv)
        while (1) {
                int option_index = 0;
 
-               c = getopt_long(argc, argv, "hd:n:p:oP:C:", long_options, &option_index);
+               c = getopt_long(argc, argv, "hd:n:p:oP:C:s:", long_options, &option_index);
                if (c == -1)
                        break;
 
@@ -247,6 +315,22 @@ static void parse_args(int argc, char **argv)
                case 'C':
                        cfg.container_path = strdup(optarg);
                        break;
+               case 's':
+                       if (strlen(optarg) == 0) {
+                               fprintf(stderr, "Error: empty sort field\n");
+                               exit(1);
+                       }
+
+                       field = get_field_by_name(optarg);
+                       /* Show available fields if invalid option provided */
+                       if (!field) {
+                               fprintf(stderr, "Error: invalid sort field '%s'\n", optarg);
+                               display_available_fields();
+                               exit(1);
+                       }
+
+                       cfg.sort_field = field;
+                       break;
                default:
                        fprintf(stderr, "Try 'delaytop --help' for more information.\n");
                        exit(1);
@@ -587,19 +671,23 @@ static int compare_tasks(const void *a, const void *b)
 {
        const struct task_info *t1 = (const struct task_info *)a;
        const struct task_info *t2 = (const struct task_info *)b;
+       unsigned long long total1;
+       unsigned long long total2;
+       unsigned long count1;
+       unsigned long count2;
        double avg1, avg2;
 
-       switch (cfg.sort_field) {
-       case 'c': /* CPU */
-               avg1 = average_ms(t1->cpu_delay_total, t1->cpu_count);
-               avg2 = average_ms(t2->cpu_delay_total, t2->cpu_count);
-               if (avg1 != avg2)
-                       return avg2 > avg1 ? 1 : -1;
-               return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1;
+       total1 = *(unsigned long long *)((char *)t1 + cfg.sort_field->total_offset);
+       total2 = *(unsigned long long *)((char *)t2 + cfg.sort_field->total_offset);
+       count1 = *(unsigned long *)((char *)t1 + cfg.sort_field->count_offset);
+       count2 = *(unsigned long *)((char *)t2 + cfg.sort_field->count_offset);
 
-       default:
-               return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1;
-       }
+       avg1 = average_ms(total1, count1);
+       avg2 = average_ms(total2, count2);
+       if (avg1 != avg2)
+               return avg2 > avg1 ? 1 : -1;
+
+       return 0;
 }
 
 /* Sort tasks by selected field */
@@ -738,8 +826,9 @@ static void display_results(void)
                        container_stats.nr_stopped, container_stats.nr_uninterruptible,
                        container_stats.nr_io_wait);
        }
-       suc &= BOOL_FPRINT(out, "Top %d processes (sorted by CPU delay):\n",
-                       cfg.max_processes);
+       /* Task delay output */
+       suc &= BOOL_FPRINT(out, "Top %d processes (sorted by %s delay):\n",
+                       cfg.max_processes, get_name_by_field(cfg.sort_field));
        suc &= BOOL_FPRINT(out, "%5s  %5s  %-17s", "PID", "TGID", "COMMAND");
        suc &= BOOL_FPRINT(out, "%7s %7s %7s %7s %7s %7s %7s %7s\n",
                "CPU(ms)", "IO(ms)", "SWAP(ms)", "RCL(ms)",