1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright (C) 2009-2013 Intel Corporation
9 Auke Kok <auke-jan.h.kok@intel.com>
11 systemd is free software; you can redistribute it and/or modify it
12 under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 2.1 of the License, or
14 (at your option) any later version.
16 systemd is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
21 You should have received a copy of the GNU Lesser General Public License
22 along with systemd; If not, see <http://www.gnu.org/licenses/>.
35 #include "time-util.h"
38 #include "bootchart.h"
39 #include "cgroup-util.h"
43 * Alloc a static 4k buffer for stdio - primarily used to increase
44 * PSS buffering from the default 1k stdin buffer to reduce
47 static char smaps_buf
[4096];
50 double gettime_ns(void) {
53 clock_gettime(CLOCK_MONOTONIC
, &n
);
55 return (n
.tv_sec
+ (n
.tv_nsec
/ (double) NSEC_PER_SEC
));
58 static char *bufgetline(char *buf
) {
64 c
= strchr(buf
, '\n');
71 static int pid_cmdline_strscpy(int procfd
, char *buffer
, size_t buf_len
, int pid
) {
72 char filename
[PATH_MAX
];
73 _cleanup_close_
int fd
= -1;
76 sprintf(filename
, "%d/cmdline", pid
);
77 fd
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
81 n
= read(fd
, buffer
, buf_len
-1);
84 for (i
= 0; i
< n
; i
++)
85 if (buffer
[i
] == '\0')
93 int log_sample(DIR *proc
,
95 struct ps_struct
*ps_first
,
96 struct list_sample_data
**ptr
,
100 static int vmstat
= -1;
101 _cleanup_free_
char *buf_schedstat
= NULL
;
112 static int e_fd
= -1;
117 struct list_sample_data
*sampledata
;
118 struct ps_sched_struct
*ps_prev
= NULL
;
124 procfd
= dirfd(proc
);
130 vmstat
= openat(procfd
, "vmstat", O_RDONLY
|O_CLOEXEC
);
132 return log_error_errno(errno
, "Failed to open /proc/vmstat: %m");
135 n
= pread(vmstat
, buf
, sizeof(buf
) - 1, 0);
137 vmstat
= safe_close(vmstat
);
147 if (sscanf(m
, "%s %s", key
, val
) < 2)
149 if (streq(key
, "pgpgin"))
150 sampledata
->blockstat
.bi
= atoi(val
);
151 if (streq(key
, "pgpgout")) {
152 sampledata
->blockstat
.bo
= atoi(val
);
161 /* Parse "/proc/schedstat" for overall CPU utilization */
162 r
= read_full_file("/proc/schedstat", &buf_schedstat
, NULL
);
164 return log_error_errno(r
, "Unable to read schedstat: %m");
168 if (sscanf(m
, "%s %*s %*s %*s %*s %*s %*s %s %s", key
, rt
, wt
) < 3)
171 if (strstr(key
, "cpu")) {
172 r
= safe_atoi((const char*)(key
+3), &c
);
173 if (r
< 0 || c
> MAXCPUS
-1)
174 /* Oops, we only have room for MAXCPUS data */
176 sampledata
->runtime
[c
] = atoll(rt
);
177 sampledata
->waittime
[c
] = atoll(wt
);
190 e_fd
= openat(procfd
, "sys/kernel/random/entropy_avail", O_RDONLY
|O_CLOEXEC
);
192 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/random/entropy_avail: %m");
195 n
= pread(e_fd
, buf
, sizeof(buf
) - 1, 0);
197 e_fd
= safe_close(e_fd
);
200 sampledata
->entropy_avail
= atoi(buf
);
204 while ((ent
= readdir(proc
)) != NULL
) {
205 char filename
[PATH_MAX
];
207 struct ps_struct
*ps
;
209 if ((ent
->d_name
[0] < '0') || (ent
->d_name
[0] > '9'))
212 pid
= atoi(ent
->d_name
);
218 while (ps
->next_ps
) {
224 /* end of our LL? then append a new record */
225 if (ps
->pid
!= pid
) {
226 _cleanup_fclose_
FILE *st
= NULL
;
228 struct ps_struct
*parent
;
230 ps
->next_ps
= new0(struct ps_struct
, 1);
239 ps
->sample
= new0(struct ps_sched_struct
, 1);
243 ps
->sample
->sampledata
= sampledata
;
247 /* mark our first sample */
248 ps
->first
= ps
->last
= ps
->sample
;
249 ps
->sample
->runtime
= atoll(rt
);
250 ps
->sample
->waittime
= atoll(wt
);
252 /* get name, start time */
254 sprintf(filename
, "%d/sched", pid
);
255 ps
->sched
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
260 s
= pread(ps
->sched
, buf
, sizeof(buf
) - 1, 0);
262 ps
->sched
= safe_close(ps
->sched
);
267 if (!sscanf(buf
, "%s %*s %*s", key
))
270 strscpy(ps
->name
, sizeof(ps
->name
), key
);
273 if (arg_show_cmdline
)
274 pid_cmdline_strscpy(procfd
, ps
->name
, sizeof(ps
->name
), pid
);
285 if (!sscanf(m
, "%*s %*s %s", t
))
288 r
= safe_atod(t
, &ps
->starttime
);
292 ps
->starttime
/= 1000.0;
295 /* if this fails, that's OK */
296 cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
,
297 ps
->pid
, &ps
->cgroup
);
300 sprintf(filename
, "%d/stat", pid
);
301 fd
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
305 st
= fdopen(fd
, "re");
311 if (!fscanf(st
, "%*s %*s %*s %i", &p
))
317 * setup child pointers
319 * these are used to paint the tree coherently later
320 * each parent has a LL of children, and a LL of siblings
323 continue; /* nothing to do for init atm */
325 /* kthreadd has ppid=0, which breaks our tree ordering */
330 while ((parent
->next_ps
&& parent
->pid
!= ps
->ppid
))
331 parent
= parent
->next_ps
;
333 if (parent
->pid
!= ps
->ppid
) {
336 parent
= ps_first
->next_ps
;
341 if (!parent
->children
) {
342 /* it's the first child */
343 parent
->children
= ps
;
345 /* walk all children and append */
346 struct ps_struct
*children
;
347 children
= parent
->children
;
348 while (children
->next
)
349 children
= children
->next
;
355 /* else -> found pid, append data in ps */
357 /* below here is all continuous logging parts - we get here on every
361 if (ps
->schedstat
< 0) {
362 sprintf(filename
, "%d/schedstat", pid
);
363 ps
->schedstat
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
364 if (ps
->schedstat
< 0)
368 s
= pread(ps
->schedstat
, buf
, sizeof(buf
) - 1, 0);
370 /* clean up our file descriptors - assume that the process exited */
371 close(ps
->schedstat
);
373 ps
->sched
= safe_close(ps
->sched
);
379 if (!sscanf(buf
, "%s %s %*s", rt
, wt
))
382 ps
->sample
->next
= new0(struct ps_sched_struct
, 1);
383 if (!ps
->sample
->next
)
386 ps
->sample
->next
->prev
= ps
->sample
;
387 ps
->sample
= ps
->sample
->next
;
388 ps
->last
= ps
->sample
;
389 ps
->sample
->runtime
= atoll(rt
);
390 ps
->sample
->waittime
= atoll(wt
);
391 ps
->sample
->sampledata
= sampledata
;
392 ps
->sample
->ps_new
= ps
;
394 ps_prev
->cross
= ps
->sample
;
396 ps_prev
= ps
->sample
;
397 ps
->total
= (ps
->last
->runtime
- ps
->first
->runtime
)
400 /* Take into account CPU runtime/waittime spent in non-main threads of the process
401 * by parsing "/proc/[pid]/task/[tid]/schedstat" for all [tid] != [pid]
402 * See https://github.com/systemd/systemd/issues/139
405 /* Browse directory "/proc/[pid]/task" to know the thread ids of process [pid] */
406 snprintf(filename
, sizeof(filename
), PID_FMT
"/task", pid
);
407 taskfd
= openat(procfd
, filename
, O_RDONLY
|O_DIRECTORY
|O_CLOEXEC
);
409 _cleanup_closedir_
DIR *taskdir
= NULL
;
411 taskdir
= fdopendir(taskfd
);
416 FOREACH_DIRENT(ent
, taskdir
, break) {
418 _cleanup_close_
int tid_schedstat
= -1;
422 if ((ent
->d_name
[0] < '0') || (ent
->d_name
[0] > '9'))
425 /* Skip main thread as it was already accounted */
426 r
= safe_atoi(ent
->d_name
, &tid
);
427 if (r
< 0 || tid
== pid
)
430 /* Parse "/proc/[pid]/task/[tid]/schedstat" */
431 snprintf(filename
, sizeof(filename
), PID_FMT
"/schedstat", tid
);
432 tid_schedstat
= openat(taskfd
, filename
, O_RDONLY
|O_CLOEXEC
);
434 if (tid_schedstat
== -1)
437 s
= pread(tid_schedstat
, buf
, sizeof(buf
) - 1, 0);
442 if (!sscanf(buf
, "%s %s %*s", rt
, wt
))
445 r
= safe_atolli(rt
, &delta_rt
);
448 r
= safe_atolli(rt
, &delta_wt
);
451 ps
->sample
->runtime
+= delta_rt
;
452 ps
->sample
->waittime
+= delta_wt
;
461 sprintf(filename
, "%d/smaps", pid
);
462 fd
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
465 ps
->smaps
= fdopen(fd
, "re");
470 setvbuf(ps
->smaps
, smaps_buf
, _IOFBF
, sizeof(smaps_buf
));
475 /* test to see if we need to skip another field */
477 if (fgets(buf
, sizeof(buf
), ps
->smaps
) == NULL
) {
480 if (fread(buf
, 1, 28 * 15, ps
->smaps
) != (28 * 15)) {
483 if (buf
[392] == 'V') {
495 /* skip one line, this contains the object mapped. */
496 if (fgets(buf
, sizeof(buf
), ps
->smaps
) == NULL
) {
499 /* then there's a 28 char 14 line block */
500 if (fread(buf
, 1, 28 * 14, ps
->smaps
) != 28 * 14) {
503 pss_kb
= atoi(&buf
[61]);
504 ps
->sample
->pss
+= pss_kb
;
506 /* skip one more line if this is a newer kernel */
508 if (fgets(buf
, sizeof(buf
), ps
->smaps
) == NULL
)
513 if (ps
->sample
->pss
> ps
->pss_max
)
514 ps
->pss_max
= ps
->sample
->pss
;
517 /* catch process rename, try to randomize time */
518 mod
= (arg_hz
< 4.0) ? 4.0 : (arg_hz
/ 4.0);
519 if (((sample
- ps
->pid
) + pid
) % (int)(mod
) == 0) {
522 /* get name, start time */
524 sprintf(filename
, "%d/sched", pid
);
525 ps
->sched
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
530 s
= pread(ps
->sched
, buf
, sizeof(buf
) - 1, 0);
532 /* clean up file descriptors */
533 ps
->sched
= safe_close(ps
->sched
);
534 ps
->schedstat
= safe_close(ps
->schedstat
);
540 if (!sscanf(buf
, "%s %*s %*s", key
))
543 strscpy(ps
->name
, sizeof(ps
->name
), key
);
546 if (arg_show_cmdline
)
547 pid_cmdline_strscpy(procfd
, ps
->name
, sizeof(ps
->name
), pid
);