1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright (C) 2009-2013 Intel Corporation
9 Auke Kok <auke-jan.h.kok@intel.com>
11 systemd is free software; you can redistribute it and/or modify it
12 under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 2.1 of the License, or
14 (at your option) any later version.
16 systemd is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
21 You should have received a copy of the GNU Lesser General Public License
22 along with systemd; If not, see <http://www.gnu.org/licenses/>.
34 #include "bootchart.h"
35 #include "cgroup-util.h"
39 #include "string-util.h"
41 #include "time-util.h"
45 * Alloc a static 4k buffer for stdio - primarily used to increase
46 * PSS buffering from the default 1k stdin buffer to reduce
49 static char smaps_buf
[4096];
52 double gettime_ns(void) {
55 clock_gettime(CLOCK_MONOTONIC
, &n
);
57 return (n
.tv_sec
+ (n
.tv_nsec
/ (double) NSEC_PER_SEC
));
60 static char *bufgetline(char *buf
) {
66 c
= strchr(buf
, '\n');
73 static int pid_cmdline_strscpy(int procfd
, char *buffer
, size_t buf_len
, int pid
) {
74 char filename
[PATH_MAX
];
75 _cleanup_close_
int fd
= -1;
78 sprintf(filename
, "%d/cmdline", pid
);
79 fd
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
83 n
= read(fd
, buffer
, buf_len
-1);
86 for (i
= 0; i
< n
; i
++)
87 if (buffer
[i
] == '\0')
95 int log_sample(DIR *proc
,
97 struct ps_struct
*ps_first
,
98 struct list_sample_data
**ptr
,
102 static int vmstat
= -1;
103 _cleanup_free_
char *buf_schedstat
= NULL
;
114 static int e_fd
= -1;
119 struct list_sample_data
*sampledata
;
120 struct ps_sched_struct
*ps_prev
= NULL
;
126 procfd
= dirfd(proc
);
132 vmstat
= openat(procfd
, "vmstat", O_RDONLY
|O_CLOEXEC
);
134 return log_error_errno(errno
, "Failed to open /proc/vmstat: %m");
137 n
= pread(vmstat
, buf
, sizeof(buf
) - 1, 0);
139 vmstat
= safe_close(vmstat
);
149 if (sscanf(m
, "%s %s", key
, val
) < 2)
151 if (streq(key
, "pgpgin"))
152 sampledata
->blockstat
.bi
= atoi(val
);
153 if (streq(key
, "pgpgout")) {
154 sampledata
->blockstat
.bo
= atoi(val
);
163 /* Parse "/proc/schedstat" for overall CPU utilization */
164 r
= read_full_file("/proc/schedstat", &buf_schedstat
, NULL
);
166 return log_error_errno(r
, "Unable to read schedstat: %m");
170 if (sscanf(m
, "%s %*s %*s %*s %*s %*s %*s %s %s", key
, rt
, wt
) < 3)
173 if (strstr(key
, "cpu")) {
174 r
= safe_atoi((const char*)(key
+3), &c
);
175 if (r
< 0 || c
> MAXCPUS
-1)
176 /* Oops, we only have room for MAXCPUS data */
178 sampledata
->runtime
[c
] = atoll(rt
);
179 sampledata
->waittime
[c
] = atoll(wt
);
192 e_fd
= openat(procfd
, "sys/kernel/random/entropy_avail", O_RDONLY
|O_CLOEXEC
);
194 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/random/entropy_avail: %m");
197 n
= pread(e_fd
, buf
, sizeof(buf
) - 1, 0);
199 e_fd
= safe_close(e_fd
);
202 sampledata
->entropy_avail
= atoi(buf
);
206 while ((ent
= readdir(proc
)) != NULL
) {
207 char filename
[PATH_MAX
];
209 struct ps_struct
*ps
;
211 if ((ent
->d_name
[0] < '0') || (ent
->d_name
[0] > '9'))
214 pid
= atoi(ent
->d_name
);
220 while (ps
->next_ps
) {
226 /* end of our LL? then append a new record */
227 if (ps
->pid
!= pid
) {
228 _cleanup_fclose_
FILE *st
= NULL
;
230 struct ps_struct
*parent
;
232 ps
->next_ps
= new0(struct ps_struct
, 1);
241 ps
->sample
= new0(struct ps_sched_struct
, 1);
245 ps
->sample
->sampledata
= sampledata
;
249 /* mark our first sample */
250 ps
->first
= ps
->last
= ps
->sample
;
251 ps
->sample
->runtime
= atoll(rt
);
252 ps
->sample
->waittime
= atoll(wt
);
254 /* get name, start time */
256 sprintf(filename
, "%d/sched", pid
);
257 ps
->sched
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
262 s
= pread(ps
->sched
, buf
, sizeof(buf
) - 1, 0);
264 ps
->sched
= safe_close(ps
->sched
);
269 if (!sscanf(buf
, "%s %*s %*s", key
))
272 strscpy(ps
->name
, sizeof(ps
->name
), key
);
275 if (arg_show_cmdline
)
276 pid_cmdline_strscpy(procfd
, ps
->name
, sizeof(ps
->name
), pid
);
287 if (!sscanf(m
, "%*s %*s %s", t
))
290 r
= safe_atod(t
, &ps
->starttime
);
294 ps
->starttime
/= 1000.0;
297 /* if this fails, that's OK */
298 cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
,
299 ps
->pid
, &ps
->cgroup
);
302 sprintf(filename
, "%d/stat", pid
);
303 fd
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
307 st
= fdopen(fd
, "re");
313 if (!fscanf(st
, "%*s %*s %*s %i", &p
))
319 * setup child pointers
321 * these are used to paint the tree coherently later
322 * each parent has a LL of children, and a LL of siblings
325 continue; /* nothing to do for init atm */
327 /* kthreadd has ppid=0, which breaks our tree ordering */
332 while ((parent
->next_ps
&& parent
->pid
!= ps
->ppid
))
333 parent
= parent
->next_ps
;
335 if (parent
->pid
!= ps
->ppid
) {
338 parent
= ps_first
->next_ps
;
343 if (!parent
->children
) {
344 /* it's the first child */
345 parent
->children
= ps
;
347 /* walk all children and append */
348 struct ps_struct
*children
;
349 children
= parent
->children
;
350 while (children
->next
)
351 children
= children
->next
;
357 /* else -> found pid, append data in ps */
359 /* below here is all continuous logging parts - we get here on every
363 if (ps
->schedstat
< 0) {
364 sprintf(filename
, "%d/schedstat", pid
);
365 ps
->schedstat
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
366 if (ps
->schedstat
< 0)
370 s
= pread(ps
->schedstat
, buf
, sizeof(buf
) - 1, 0);
372 /* clean up our file descriptors - assume that the process exited */
373 close(ps
->schedstat
);
375 ps
->sched
= safe_close(ps
->sched
);
381 if (!sscanf(buf
, "%s %s %*s", rt
, wt
))
384 ps
->sample
->next
= new0(struct ps_sched_struct
, 1);
385 if (!ps
->sample
->next
)
388 ps
->sample
->next
->prev
= ps
->sample
;
389 ps
->sample
= ps
->sample
->next
;
390 ps
->last
= ps
->sample
;
391 ps
->sample
->runtime
= atoll(rt
);
392 ps
->sample
->waittime
= atoll(wt
);
393 ps
->sample
->sampledata
= sampledata
;
394 ps
->sample
->ps_new
= ps
;
396 ps_prev
->cross
= ps
->sample
;
398 ps_prev
= ps
->sample
;
399 ps
->total
= (ps
->last
->runtime
- ps
->first
->runtime
)
402 /* Take into account CPU runtime/waittime spent in non-main threads of the process
403 * by parsing "/proc/[pid]/task/[tid]/schedstat" for all [tid] != [pid]
404 * See https://github.com/systemd/systemd/issues/139
407 /* Browse directory "/proc/[pid]/task" to know the thread ids of process [pid] */
408 snprintf(filename
, sizeof(filename
), PID_FMT
"/task", pid
);
409 taskfd
= openat(procfd
, filename
, O_RDONLY
|O_DIRECTORY
|O_CLOEXEC
);
411 _cleanup_closedir_
DIR *taskdir
= NULL
;
413 taskdir
= fdopendir(taskfd
);
418 FOREACH_DIRENT(ent
, taskdir
, break) {
420 _cleanup_close_
int tid_schedstat
= -1;
424 if ((ent
->d_name
[0] < '0') || (ent
->d_name
[0] > '9'))
427 /* Skip main thread as it was already accounted */
428 r
= safe_atoi(ent
->d_name
, &tid
);
429 if (r
< 0 || tid
== pid
)
432 /* Parse "/proc/[pid]/task/[tid]/schedstat" */
433 snprintf(filename
, sizeof(filename
), PID_FMT
"/schedstat", tid
);
434 tid_schedstat
= openat(taskfd
, filename
, O_RDONLY
|O_CLOEXEC
);
436 if (tid_schedstat
== -1)
439 s
= pread(tid_schedstat
, buf
, sizeof(buf
) - 1, 0);
444 if (!sscanf(buf
, "%s %s %*s", rt
, wt
))
447 r
= safe_atolli(rt
, &delta_rt
);
450 r
= safe_atolli(rt
, &delta_wt
);
453 ps
->sample
->runtime
+= delta_rt
;
454 ps
->sample
->waittime
+= delta_wt
;
463 sprintf(filename
, "%d/smaps", pid
);
464 fd
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
467 ps
->smaps
= fdopen(fd
, "re");
472 setvbuf(ps
->smaps
, smaps_buf
, _IOFBF
, sizeof(smaps_buf
));
477 /* test to see if we need to skip another field */
479 if (fgets(buf
, sizeof(buf
), ps
->smaps
) == NULL
) {
482 if (fread(buf
, 1, 28 * 15, ps
->smaps
) != (28 * 15)) {
485 if (buf
[392] == 'V') {
497 /* skip one line, this contains the object mapped. */
498 if (fgets(buf
, sizeof(buf
), ps
->smaps
) == NULL
) {
501 /* then there's a 28 char 14 line block */
502 if (fread(buf
, 1, 28 * 14, ps
->smaps
) != 28 * 14) {
505 pss_kb
= atoi(&buf
[61]);
506 ps
->sample
->pss
+= pss_kb
;
508 /* skip one more line if this is a newer kernel */
510 if (fgets(buf
, sizeof(buf
), ps
->smaps
) == NULL
)
515 if (ps
->sample
->pss
> ps
->pss_max
)
516 ps
->pss_max
= ps
->sample
->pss
;
519 /* catch process rename, try to randomize time */
520 mod
= (arg_hz
< 4.0) ? 4.0 : (arg_hz
/ 4.0);
521 if (((sample
- ps
->pid
) + pid
) % (int)(mod
) == 0) {
524 /* get name, start time */
526 sprintf(filename
, "%d/sched", pid
);
527 ps
->sched
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
532 s
= pread(ps
->sched
, buf
, sizeof(buf
) - 1, 0);
534 /* clean up file descriptors */
535 ps
->sched
= safe_close(ps
->sched
);
536 ps
->schedstat
= safe_close(ps
->schedstat
);
542 if (!sscanf(buf
, "%s %*s %*s", key
))
545 strscpy(ps
->name
, sizeof(ps
->name
), key
);
548 if (arg_show_cmdline
)
549 pid_cmdline_strscpy(procfd
, ps
->name
, sizeof(ps
->name
), pid
);