]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
delaytop: add psi info to show system delay
authorWang Yaxin <wang.yaxin@zte.com.cn>
Thu, 10 Jul 2025 05:54:51 +0000 (13:54 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Sun, 20 Jul 2025 02:08:29 +0000 (19:08 -0700)
Support showing whole delay of system by reading PSI, just like the first
few lines of information output by the top command.  the output of
delaytop includes both system-wide delay and delay of individual tasks,
providing a more comprehensive reflection of system latency status.

Use case
========
bash# ./delaytop
System Pressure Information: (avg10/avg60/avg300/total)
CPU:    full:    0.0%/   0.0%/   0.0%/0           some:    0.1%/   0.0%/   0.0%/14216596
Memory: full:    0.0%/   0.0%/   0.0%/34010659    some:    0.0%/   0.0%/   0.0%/35406492
IO:     full:    0.1%/   0.0%/   0.0%/51029453    some:    0.1%/   0.0%/   0.0%/55330465
IRQ:    full:    0.0%/   0.0%/   0.0%/0

Top 20 processes (sorted by CPU delay):

  PID   TGID  COMMAND            CPU(ms)  IO(ms)        SWAP(ms) RCL(ms) THR(ms)  CMP(ms)  WP(ms)  IRQ(ms)
---------------------------------------------------------------------------------------------
   32     32  kworker/2:0H-sy   23.65     0.00     0.00     0.00    0.00     0.00     0.00     0.00
  497    497  kworker/R-scsi_    1.20     0.00     0.00     0.00    0.00     0.00     0.00     0.00
  495    495  kworker/R-scsi_    1.13     0.00     0.00     0.00    0.00     0.00     0.00     0.00
  494    494  scsi_eh_0          1.12     0.00     0.00     0.00    0.00     0.00     0.00     0.00
  485    485  kworker/R-ata_s    0.90     0.00     0.00     0.00    0.00     0.00     0.00     0.00
  574    574  kworker/R-kdmfl    0.36     0.00     0.00     0.00    0.00     0.00     0.00     0.00
   34     34  idle_inject/3      0.33     0.00     0.00     0.00    0.00     0.00     0.00     0.00
 1123   1123  nde-netfilter      0.28     0.00     0.00     0.00    0.00     0.00     0.00     0.00
   60     60  ksoftirqd/7        0.25     0.00     0.00     0.00    0.00     0.00     0.00     0.00
  114    114  kworker/0:2-cgr    0.25     0.00     0.00     0.00    0.00     0.00     0.00     0.00
  496    496  scsi_eh_1          0.24     0.00     0.00     0.00    0.00     0.00     0.00     0.00
   51     51  cpuhp/6            0.24     0.00     0.00     0.00    0.00     0.00     0.00     0.00
 1667   1667  atd                0.24     0.00     0.00     0.00    0.00     0.00     0.00     0.00
   45     45  cpuhp/5            0.23     0.00     0.00     0.00    0.00     0.00     0.00     0.00
 1102   1102  nde-backupservi    0.22     0.00     0.00     0.00    0.00     0.00     0.00     0.00
 1098   1098  systemsettings     0.21     0.00     0.00     0.00    0.00     0.00     0.00     0.00
 1100   1100  audit-monitor      0.20     0.00     0.00     0.00    0.00     0.00     0.00     0.00
   53     53  migration/6        0.20     0.00     0.00     0.00    0.00     0.00     0.00     0.00
 1482   1482  sshd               0.19     0.00     0.00     0.00    0.00     0.00     0.00     0.00
   39     39  cpuhp/4            0.19     0.00     0.00     0.00    0.00     0.00     0.00     0.00

Link: https://lkml.kernel.org/r/20250710135451340_5pOgpIFi0M5AE7H44W1D@zte.com.cn
Co-developed-by: Fan Yu <fan.yu9@zte.com.cn>
Signed-off-by: Fan Yu <fan.yu9@zte.com.cn>
Signed-off-by: Wang Yaxin <wang.yaxin@zte.com.cn>
Signed-off-by: Jiang Kun <jiang.kun2@zte.com.cn>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Peilin He <he.peilin@zte.com.cn>
Cc: Qiang Tu <tu.qiang35@zte.com.cn>
Cc: wangyong <wang.yong12@zte.com.cn>
Cc: xu xin <xu.xin16@zte.com.cn>
Cc: Yang Yang <yang.yang29@zte.com.cn>
Cc: Yunkai Zhang <zhang.yunkai@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
tools/accounting/delaytop.c

index 23e38f39e97d0aa187d2b808a528f983e657c706..cd848af9a856bc4ff14d33fd96f4ce163478f553 100644 (file)
@@ -10,9 +10,9 @@
  * individual tasks (PIDs).
  *
  * Key features:
- *   - Collects per-task delay accounting statistics via taskstats.
- *   - Supports sorting, filtering.
- *   - Supports both interactive (screen refresh).
+ *     - Collects per-task delay accounting statistics via taskstats.
+ *     - Supports sorting, filtering.
+ *     - Supports both interactive (screen refresh).
  *
  * Copyright (C) Fan Yu, ZTE Corp. 2025
  * Copyright (C) Wang Yaxin, ZTE Corp. 2025
 #include <linux/cgroupstats.h>
 #include <ncurses.h>
 
+#define PSI_CPU_SOME "/proc/pressure/cpu"
+#define PSI_CPU_FULL   "/proc/pressure/cpu"
+#define PSI_MEMORY_SOME "/proc/pressure/memory"
+#define PSI_MEMORY_FULL "/proc/pressure/memory"
+#define PSI_IO_SOME "/proc/pressure/io"
+#define PSI_IO_FULL "/proc/pressure/io"
+#define PSI_IRQ_FULL   "/proc/pressure/irq"
+
 #define NLA_NEXT(na)                   ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len)))
 #define NLA_DATA(na)                   ((void *)((char *)(na) + NLA_HDRLEN))
 #define NLA_PAYLOAD(len)               (len - NLA_HDRLEN)
@@ -66,6 +74,24 @@ struct config {
        char *container_path;   /* Path to container cgroup */
 };
 
+/* PSI statistics structure */
+struct psi_stats {
+       double cpu_some_avg10, cpu_some_avg60, cpu_some_avg300;
+       unsigned long long cpu_some_total;
+       double cpu_full_avg10, cpu_full_avg60, cpu_full_avg300;
+       unsigned long long cpu_full_total;
+       double memory_some_avg10, memory_some_avg60, memory_some_avg300;
+       unsigned long long memory_some_total;
+       double memory_full_avg10, memory_full_avg60, memory_full_avg300;
+       unsigned long long memory_full_total;
+       double io_some_avg10, io_some_avg60, io_some_avg300;
+       unsigned long long io_some_total;
+       double io_full_avg10, io_full_avg60, io_full_avg300;
+       unsigned long long io_full_total;
+       double irq_full_avg10, irq_full_avg60, irq_full_avg300;
+       unsigned long long irq_full_total;
+};
+
 /* Task delay information structure */
 struct task_info {
        int pid;
@@ -100,6 +126,7 @@ struct container_stats {
 
 /* Global variables */
 static struct config cfg;
+static struct psi_stats psi;
 static struct task_info tasks[MAX_TASKS];
 static int task_count;
 static int running = 1;
@@ -130,13 +157,13 @@ static void usage(void)
 {
        printf("Usage: delaytop [Options]\n"
        "Options:\n"
-       "  -h, --help               Show this help message and exit\n"
-       "  -d, --delay=SECONDS      Set refresh interval (default: 2 seconds, min: 1)\n"
-       "  -n, --iterations=COUNT   Set number of updates (default: 0 = infinite)\n"
-       "  -P, --processes=NUMBER   Set maximum number of processes to show (default: 20, max: 1000)\n"
-       "  -o, --once               Display once and exit\n"
-       "  -p, --pid=PID            Monitor only the specified PID\n"
-       "  -C, --container=PATH     Monitor the container at specified cgroup path\n");
+       "  -h, --help                           Show this help message and exit\n"
+       "  -d, --delay=SECONDS    Set refresh interval (default: 2 seconds, min: 1)\n"
+       "  -n, --iterations=COUNT       Set number of updates (default: 0 = infinite)\n"
+       "  -P, --processes=NUMBER       Set maximum number of processes to show (default: 20, max: 1000)\n"
+       "  -o, --once                           Display once and exit\n"
+       "  -p, --pid=PID                        Monitor only the specified PID\n"
+       "  -C, --container=PATH  Monitor the container at specified cgroup path\n");
        exit(0);
 }
 
@@ -276,7 +303,7 @@ static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
        memset(&nladdr, 0, sizeof(nladdr));
        nladdr.nl_family = AF_NETLINK;
        while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
-                                  sizeof(nladdr))) < buflen) {
+                                       sizeof(nladdr))) < buflen) {
                if (r > 0) {
                        buf += r;
                        buflen -= r;
@@ -320,6 +347,89 @@ static int get_family_id(int sd)
        return id;
 }
 
+static void read_psi_stats(void)
+{
+       FILE *fp;
+       char line[256];
+       int ret = 0;
+       /* Zero all fields */
+       memset(&psi, 0, sizeof(psi));
+       /* CPU pressure */
+       fp = fopen(PSI_CPU_SOME, "r");
+       if (fp) {
+               while (fgets(line, sizeof(line), fp)) {
+                       if (strncmp(line, "some", 4) == 0) {
+                               ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
+                                                       &psi.cpu_some_avg10, &psi.cpu_some_avg60,
+                                                       &psi.cpu_some_avg300, &psi.cpu_some_total);
+                               if (ret != 4)
+                                       fprintf(stderr, "Failed to parse CPU some PSI data\n");
+                       } else if (strncmp(line, "full", 4) == 0) {
+                               ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
+                                               &psi.cpu_full_avg10, &psi.cpu_full_avg60,
+                                               &psi.cpu_full_avg300, &psi.cpu_full_total);
+                               if (ret != 4)
+                                       fprintf(stderr, "Failed to parse CPU full PSI data\n");
+                       }
+               }
+               fclose(fp);
+       }
+       /* Memory pressure */
+       fp = fopen(PSI_MEMORY_SOME, "r");
+       if (fp) {
+               while (fgets(line, sizeof(line), fp)) {
+                       if (strncmp(line, "some", 4) == 0) {
+                               ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
+                                               &psi.memory_some_avg10, &psi.memory_some_avg60,
+                                               &psi.memory_some_avg300, &psi.memory_some_total);
+                               if (ret != 4)
+                                       fprintf(stderr, "Failed to parse Memory some PSI data\n");
+                       } else if (strncmp(line, "full", 4) == 0) {
+                               ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
+                                               &psi.memory_full_avg10, &psi.memory_full_avg60,
+                                               &psi.memory_full_avg300, &psi.memory_full_total);
+                       }
+                               if (ret != 4)
+                                       fprintf(stderr, "Failed to parse Memory full PSI data\n");
+               }
+               fclose(fp);
+       }
+       /* IO pressure */
+       fp = fopen(PSI_IO_SOME, "r");
+       if (fp) {
+               while (fgets(line, sizeof(line), fp)) {
+                       if (strncmp(line, "some", 4) == 0) {
+                               ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
+                                               &psi.io_some_avg10, &psi.io_some_avg60,
+                                               &psi.io_some_avg300, &psi.io_some_total);
+                               if (ret != 4)
+                                       fprintf(stderr, "Failed to parse IO some PSI data\n");
+                       } else if (strncmp(line, "full", 4) == 0) {
+                               ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
+                                               &psi.io_full_avg10, &psi.io_full_avg60,
+                                               &psi.io_full_avg300, &psi.io_full_total);
+                               if (ret != 4)
+                                       fprintf(stderr, "Failed to parse IO full PSI data\n");
+                       }
+               }
+               fclose(fp);
+       }
+       /* IRQ pressure (only full) */
+       fp = fopen(PSI_IRQ_FULL, "r");
+       if (fp) {
+               while (fgets(line, sizeof(line), fp)) {
+                       if (strncmp(line, "full", 4) == 0) {
+                               ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
+                                               &psi.irq_full_avg10, &psi.irq_full_avg60,
+                                               &psi.irq_full_avg300, &psi.irq_full_total);
+                               if (ret != 4)
+                                       fprintf(stderr, "Failed to parse IRQ full PSI data\n");
+                       }
+               }
+               fclose(fp);
+       }
+}
+
 static int read_comm(int pid, char *comm_buf, size_t buf_size)
 {
        char path[64];
@@ -549,7 +659,29 @@ static void display_results(void)
        FILE *out = stdout;
 
        fprintf(out, "\033[H\033[J");
-
+       /* PSI output (one-line, no cat style) */
+       fprintf(out, "System Pressure Information: ");
+       fprintf(out, "(avg10/avg60/avg300/total)\n");
+       fprintf(out, "CPU:");
+       fprintf(out, "  full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.cpu_full_avg10,
+                       psi.cpu_full_avg60, psi.cpu_full_avg300, psi.cpu_full_total);
+       fprintf(out, "  some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.cpu_some_avg10,
+                       psi.cpu_some_avg60, psi.cpu_some_avg300, psi.cpu_some_total);
+
+       fprintf(out, "Memory:");
+       fprintf(out, " full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.memory_full_avg10,
+                       psi.memory_full_avg60, psi.memory_full_avg300, psi.memory_full_total);
+       fprintf(out, "  some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.memory_some_avg10,
+                       psi.memory_some_avg60, psi.memory_some_avg300, psi.memory_some_total);
+
+       fprintf(out, "IO:");
+       fprintf(out, "  full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu", psi.io_full_avg10,
+                       psi.io_full_avg60, psi.io_full_avg300, psi.io_full_total);
+       fprintf(out, "  some: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n", psi.io_some_avg10,
+                       psi.io_some_avg60, psi.io_some_avg300, psi.io_some_total);
+       fprintf(out, "IRQ:");
+       fprintf(out, "  full: %6.1f%%/%6.1f%%/%6.1f%%/%-10llu\n\n", psi.irq_full_avg10,
+                       psi.irq_full_avg60, psi.irq_full_avg300, psi.irq_full_total);
        if (cfg.container_path) {
                fprintf(out, "Container Information (%s):\n", cfg.container_path);
                fprintf(out, "Processes: running=%d, sleeping=%d, ",
@@ -559,8 +691,8 @@ static void display_results(void)
                        container_stats.nr_io_wait);
        }
        fprintf(out, "Top %d processes (sorted by CPU delay):\n\n",
-                  cfg.max_processes);
-       fprintf(out, "  PID     TGID  COMMAND            CPU(ms)  IO(ms)   ");
+                       cfg.max_processes);
+       fprintf(out, "  PID     TGID  COMMAND            CPU(ms)  IO(ms)        ");
        fprintf(out, "SWAP(ms) RCL(ms) THR(ms)  CMP(ms)  WP(ms)  IRQ(ms)\n");
        fprintf(out, "-----------------------------------------------");
        fprintf(out, "----------------------------------------------\n");
@@ -616,6 +748,9 @@ int main(int argc, char **argv)
 
        /* Main loop */
        while (running) {
+               /* Read PSI statistics */
+               read_psi_stats();
+
                /* Get container stats if container path provided */
                if (cfg.container_path)
                        get_container_stats();