1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright (C) 2009-2013 Intel Corporation
9 Auke Kok <auke-jan.h.kok@intel.com>
11 systemd is free software; you can redistribute it and/or modify it
12 under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 2.1 of the License, or
14 (at your option) any later version.
16 systemd is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
21 You should have received a copy of the GNU Lesser General Public License
22 along with systemd; If not, see <http://www.gnu.org/licenses/>.
34 #include "bootchart.h"
35 #include "cgroup-util.h"
36 #include "dirent-util.h"
39 #include "parse-util.h"
41 #include "string-util.h"
43 #include "time-util.h"
47 * Alloc a static 4k buffer for stdio - primarily used to increase
48 * PSS buffering from the default 1k stdin buffer to reduce
51 static char smaps_buf
[4096];
54 double gettime_ns(void) {
57 clock_gettime(CLOCK_MONOTONIC
, &n
);
59 return (n
.tv_sec
+ (n
.tv_nsec
/ (double) NSEC_PER_SEC
));
62 static char *bufgetline(char *buf
) {
68 c
= strchr(buf
, '\n');
75 static int pid_cmdline_strscpy(int procfd
, char *buffer
, size_t buf_len
, int pid
) {
76 char filename
[PATH_MAX
];
77 _cleanup_close_
int fd
= -1;
80 sprintf(filename
, "%d/cmdline", pid
);
81 fd
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
85 n
= read(fd
, buffer
, buf_len
-1);
88 for (i
= 0; i
< n
; i
++)
89 if (buffer
[i
] == '\0')
97 int log_sample(DIR *proc
,
99 struct ps_struct
*ps_first
,
100 struct list_sample_data
**ptr
,
104 static int vmstat
= -1;
105 _cleanup_free_
char *buf_schedstat
= NULL
;
116 static int e_fd
= -1;
121 struct list_sample_data
*sampledata
;
122 struct ps_sched_struct
*ps_prev
= NULL
;
128 procfd
= dirfd(proc
);
134 vmstat
= openat(procfd
, "vmstat", O_RDONLY
|O_CLOEXEC
);
136 return log_error_errno(errno
, "Failed to open /proc/vmstat: %m");
139 n
= pread(vmstat
, buf
, sizeof(buf
) - 1, 0);
141 vmstat
= safe_close(vmstat
);
151 if (sscanf(m
, "%s %s", key
, val
) < 2)
153 if (streq(key
, "pgpgin"))
154 sampledata
->blockstat
.bi
= atoi(val
);
155 if (streq(key
, "pgpgout")) {
156 sampledata
->blockstat
.bo
= atoi(val
);
165 /* Parse "/proc/schedstat" for overall CPU utilization */
166 r
= read_full_file("/proc/schedstat", &buf_schedstat
, NULL
);
168 return log_error_errno(r
, "Unable to read schedstat: %m");
172 if (sscanf(m
, "%s %*s %*s %*s %*s %*s %*s %s %s", key
, rt
, wt
) < 3)
175 if (strstr(key
, "cpu")) {
176 r
= safe_atoi((const char*)(key
+3), &c
);
177 if (r
< 0 || c
> MAXCPUS
-1)
178 /* Oops, we only have room for MAXCPUS data */
180 sampledata
->runtime
[c
] = atoll(rt
);
181 sampledata
->waittime
[c
] = atoll(wt
);
194 e_fd
= openat(procfd
, "sys/kernel/random/entropy_avail", O_RDONLY
|O_CLOEXEC
);
196 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/random/entropy_avail: %m");
199 n
= pread(e_fd
, buf
, sizeof(buf
) - 1, 0);
201 e_fd
= safe_close(e_fd
);
204 sampledata
->entropy_avail
= atoi(buf
);
208 while ((ent
= readdir(proc
)) != NULL
) {
209 char filename
[PATH_MAX
];
211 struct ps_struct
*ps
;
213 if ((ent
->d_name
[0] < '0') || (ent
->d_name
[0] > '9'))
216 pid
= atoi(ent
->d_name
);
222 while (ps
->next_ps
) {
228 /* end of our LL? then append a new record */
229 if (ps
->pid
!= pid
) {
230 _cleanup_fclose_
FILE *st
= NULL
;
232 struct ps_struct
*parent
;
234 ps
->next_ps
= new0(struct ps_struct
, 1);
243 ps
->sample
= new0(struct ps_sched_struct
, 1);
247 ps
->sample
->sampledata
= sampledata
;
251 /* mark our first sample */
252 ps
->first
= ps
->last
= ps
->sample
;
253 ps
->sample
->runtime
= atoll(rt
);
254 ps
->sample
->waittime
= atoll(wt
);
256 /* get name, start time */
258 sprintf(filename
, "%d/sched", pid
);
259 ps
->sched
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
264 s
= pread(ps
->sched
, buf
, sizeof(buf
) - 1, 0);
266 ps
->sched
= safe_close(ps
->sched
);
271 if (!sscanf(buf
, "%s %*s %*s", key
))
274 strscpy(ps
->name
, sizeof(ps
->name
), key
);
277 if (arg_show_cmdline
)
278 pid_cmdline_strscpy(procfd
, ps
->name
, sizeof(ps
->name
), pid
);
289 if (!sscanf(m
, "%*s %*s %s", t
))
292 r
= safe_atod(t
, &ps
->starttime
);
296 ps
->starttime
/= 1000.0;
299 /* if this fails, that's OK */
300 cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
,
301 ps
->pid
, &ps
->cgroup
);
304 sprintf(filename
, "%d/stat", pid
);
305 fd
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
309 st
= fdopen(fd
, "re");
315 if (!fscanf(st
, "%*s %*s %*s %i", &p
))
321 * setup child pointers
323 * these are used to paint the tree coherently later
324 * each parent has a LL of children, and a LL of siblings
327 continue; /* nothing to do for init atm */
329 /* kthreadd has ppid=0, which breaks our tree ordering */
334 while ((parent
->next_ps
&& parent
->pid
!= ps
->ppid
))
335 parent
= parent
->next_ps
;
337 if (parent
->pid
!= ps
->ppid
) {
340 parent
= ps_first
->next_ps
;
345 if (!parent
->children
) {
346 /* it's the first child */
347 parent
->children
= ps
;
349 /* walk all children and append */
350 struct ps_struct
*children
;
351 children
= parent
->children
;
352 while (children
->next
)
353 children
= children
->next
;
359 /* else -> found pid, append data in ps */
361 /* below here is all continuous logging parts - we get here on every
365 if (ps
->schedstat
< 0) {
366 sprintf(filename
, "%d/schedstat", pid
);
367 ps
->schedstat
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
368 if (ps
->schedstat
< 0)
372 s
= pread(ps
->schedstat
, buf
, sizeof(buf
) - 1, 0);
374 /* clean up our file descriptors - assume that the process exited */
375 close(ps
->schedstat
);
377 ps
->sched
= safe_close(ps
->sched
);
383 if (!sscanf(buf
, "%s %s %*s", rt
, wt
))
386 ps
->sample
->next
= new0(struct ps_sched_struct
, 1);
387 if (!ps
->sample
->next
)
390 ps
->sample
->next
->prev
= ps
->sample
;
391 ps
->sample
= ps
->sample
->next
;
392 ps
->last
= ps
->sample
;
393 ps
->sample
->runtime
= atoll(rt
);
394 ps
->sample
->waittime
= atoll(wt
);
395 ps
->sample
->sampledata
= sampledata
;
396 ps
->sample
->ps_new
= ps
;
398 ps_prev
->cross
= ps
->sample
;
400 ps_prev
= ps
->sample
;
401 ps
->total
= (ps
->last
->runtime
- ps
->first
->runtime
)
404 /* Take into account CPU runtime/waittime spent in non-main threads of the process
405 * by parsing "/proc/[pid]/task/[tid]/schedstat" for all [tid] != [pid]
406 * See https://github.com/systemd/systemd/issues/139
409 /* Browse directory "/proc/[pid]/task" to know the thread ids of process [pid] */
410 snprintf(filename
, sizeof(filename
), PID_FMT
"/task", pid
);
411 taskfd
= openat(procfd
, filename
, O_RDONLY
|O_DIRECTORY
|O_CLOEXEC
);
413 _cleanup_closedir_
DIR *taskdir
= NULL
;
415 taskdir
= fdopendir(taskfd
);
420 FOREACH_DIRENT(ent
, taskdir
, break) {
422 _cleanup_close_
int tid_schedstat
= -1;
426 if ((ent
->d_name
[0] < '0') || (ent
->d_name
[0] > '9'))
429 /* Skip main thread as it was already accounted */
430 r
= safe_atoi(ent
->d_name
, &tid
);
431 if (r
< 0 || tid
== pid
)
434 /* Parse "/proc/[pid]/task/[tid]/schedstat" */
435 snprintf(filename
, sizeof(filename
), PID_FMT
"/schedstat", tid
);
436 tid_schedstat
= openat(taskfd
, filename
, O_RDONLY
|O_CLOEXEC
);
438 if (tid_schedstat
== -1)
441 s
= pread(tid_schedstat
, buf
, sizeof(buf
) - 1, 0);
446 if (!sscanf(buf
, "%s %s %*s", rt
, wt
))
449 r
= safe_atolli(rt
, &delta_rt
);
452 r
= safe_atolli(rt
, &delta_wt
);
455 ps
->sample
->runtime
+= delta_rt
;
456 ps
->sample
->waittime
+= delta_wt
;
465 sprintf(filename
, "%d/smaps", pid
);
466 fd
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
469 ps
->smaps
= fdopen(fd
, "re");
474 setvbuf(ps
->smaps
, smaps_buf
, _IOFBF
, sizeof(smaps_buf
));
479 /* test to see if we need to skip another field */
481 if (fgets(buf
, sizeof(buf
), ps
->smaps
) == NULL
) {
484 if (fread(buf
, 1, 28 * 15, ps
->smaps
) != (28 * 15)) {
487 if (buf
[392] == 'V') {
499 /* skip one line, this contains the object mapped. */
500 if (fgets(buf
, sizeof(buf
), ps
->smaps
) == NULL
) {
503 /* then there's a 28 char 14 line block */
504 if (fread(buf
, 1, 28 * 14, ps
->smaps
) != 28 * 14) {
507 pss_kb
= atoi(&buf
[61]);
508 ps
->sample
->pss
+= pss_kb
;
510 /* skip one more line if this is a newer kernel */
512 if (fgets(buf
, sizeof(buf
), ps
->smaps
) == NULL
)
517 if (ps
->sample
->pss
> ps
->pss_max
)
518 ps
->pss_max
= ps
->sample
->pss
;
521 /* catch process rename, try to randomize time */
522 mod
= (arg_hz
< 4.0) ? 4.0 : (arg_hz
/ 4.0);
523 if (((sample
- ps
->pid
) + pid
) % (int)(mod
) == 0) {
526 /* get name, start time */
528 sprintf(filename
, "%d/sched", pid
);
529 ps
->sched
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
534 s
= pread(ps
->sched
, buf
, sizeof(buf
) - 1, 0);
536 /* clean up file descriptors */
537 ps
->sched
= safe_close(ps
->sched
);
538 ps
->schedstat
= safe_close(ps
->schedstat
);
544 if (!sscanf(buf
, "%s %*s %*s", key
))
547 strscpy(ps
->name
, sizeof(ps
->name
), key
);
550 if (arg_show_cmdline
)
551 pid_cmdline_strscpy(procfd
, ps
->name
, sizeof(ps
->name
), pid
);