1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright (C) 2009-2013 Intel Corporation
9 Auke Kok <auke-jan.h.kok@intel.com>
11 systemd is free software; you can redistribute it and/or modify it
12 under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 2.1 of the License, or
14 (at your option) any later version.
16 systemd is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
21 You should have received a copy of the GNU Lesser General Public License
22 along with systemd; If not, see <http://www.gnu.org/licenses/>.
34 #include "bootchart.h"
35 #include "cgroup-util.h"
37 #include "string-util.h"
39 #include "time-util.h"
44 * Alloc a static 4k buffer for stdio - primarily used to increase
45 * PSS buffering from the default 1k stdin buffer to reduce
48 static char smaps_buf
[4096];
51 double gettime_ns(void) {
54 clock_gettime(CLOCK_MONOTONIC
, &n
);
56 return (n
.tv_sec
+ (n
.tv_nsec
/ (double) NSEC_PER_SEC
));
59 static char *bufgetline(char *buf
) {
65 c
= strchr(buf
, '\n');
72 static int pid_cmdline_strscpy(int procfd
, char *buffer
, size_t buf_len
, int pid
) {
73 char filename
[PATH_MAX
];
74 _cleanup_close_
int fd
= -1;
77 sprintf(filename
, "%d/cmdline", pid
);
78 fd
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
82 n
= read(fd
, buffer
, buf_len
-1);
85 for (i
= 0; i
< n
; i
++)
86 if (buffer
[i
] == '\0')
94 int log_sample(DIR *proc
,
96 struct ps_struct
*ps_first
,
97 struct list_sample_data
**ptr
,
101 static int vmstat
= -1;
102 _cleanup_free_
char *buf_schedstat
= NULL
;
113 static int e_fd
= -1;
118 struct list_sample_data
*sampledata
;
119 struct ps_sched_struct
*ps_prev
= NULL
;
125 procfd
= dirfd(proc
);
131 vmstat
= openat(procfd
, "vmstat", O_RDONLY
|O_CLOEXEC
);
133 return log_error_errno(errno
, "Failed to open /proc/vmstat: %m");
136 n
= pread(vmstat
, buf
, sizeof(buf
) - 1, 0);
138 vmstat
= safe_close(vmstat
);
148 if (sscanf(m
, "%s %s", key
, val
) < 2)
150 if (streq(key
, "pgpgin"))
151 sampledata
->blockstat
.bi
= atoi(val
);
152 if (streq(key
, "pgpgout")) {
153 sampledata
->blockstat
.bo
= atoi(val
);
162 /* Parse "/proc/schedstat" for overall CPU utilization */
163 r
= read_full_file("/proc/schedstat", &buf_schedstat
, NULL
);
165 return log_error_errno(r
, "Unable to read schedstat: %m");
169 if (sscanf(m
, "%s %*s %*s %*s %*s %*s %*s %s %s", key
, rt
, wt
) < 3)
172 if (strstr(key
, "cpu")) {
173 r
= safe_atoi((const char*)(key
+3), &c
);
174 if (r
< 0 || c
> MAXCPUS
-1)
175 /* Oops, we only have room for MAXCPUS data */
177 sampledata
->runtime
[c
] = atoll(rt
);
178 sampledata
->waittime
[c
] = atoll(wt
);
191 e_fd
= openat(procfd
, "sys/kernel/random/entropy_avail", O_RDONLY
|O_CLOEXEC
);
193 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/random/entropy_avail: %m");
196 n
= pread(e_fd
, buf
, sizeof(buf
) - 1, 0);
198 e_fd
= safe_close(e_fd
);
201 sampledata
->entropy_avail
= atoi(buf
);
205 while ((ent
= readdir(proc
)) != NULL
) {
206 char filename
[PATH_MAX
];
208 struct ps_struct
*ps
;
210 if ((ent
->d_name
[0] < '0') || (ent
->d_name
[0] > '9'))
213 pid
= atoi(ent
->d_name
);
219 while (ps
->next_ps
) {
225 /* end of our LL? then append a new record */
226 if (ps
->pid
!= pid
) {
227 _cleanup_fclose_
FILE *st
= NULL
;
229 struct ps_struct
*parent
;
231 ps
->next_ps
= new0(struct ps_struct
, 1);
240 ps
->sample
= new0(struct ps_sched_struct
, 1);
244 ps
->sample
->sampledata
= sampledata
;
248 /* mark our first sample */
249 ps
->first
= ps
->last
= ps
->sample
;
250 ps
->sample
->runtime
= atoll(rt
);
251 ps
->sample
->waittime
= atoll(wt
);
253 /* get name, start time */
255 sprintf(filename
, "%d/sched", pid
);
256 ps
->sched
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
261 s
= pread(ps
->sched
, buf
, sizeof(buf
) - 1, 0);
263 ps
->sched
= safe_close(ps
->sched
);
268 if (!sscanf(buf
, "%s %*s %*s", key
))
271 strscpy(ps
->name
, sizeof(ps
->name
), key
);
274 if (arg_show_cmdline
)
275 pid_cmdline_strscpy(procfd
, ps
->name
, sizeof(ps
->name
), pid
);
286 if (!sscanf(m
, "%*s %*s %s", t
))
289 r
= safe_atod(t
, &ps
->starttime
);
293 ps
->starttime
/= 1000.0;
296 /* if this fails, that's OK */
297 cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
,
298 ps
->pid
, &ps
->cgroup
);
301 sprintf(filename
, "%d/stat", pid
);
302 fd
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
306 st
= fdopen(fd
, "re");
312 if (!fscanf(st
, "%*s %*s %*s %i", &p
))
318 * setup child pointers
320 * these are used to paint the tree coherently later
321 * each parent has a LL of children, and a LL of siblings
324 continue; /* nothing to do for init atm */
326 /* kthreadd has ppid=0, which breaks our tree ordering */
331 while ((parent
->next_ps
&& parent
->pid
!= ps
->ppid
))
332 parent
= parent
->next_ps
;
334 if (parent
->pid
!= ps
->ppid
) {
337 parent
= ps_first
->next_ps
;
342 if (!parent
->children
) {
343 /* it's the first child */
344 parent
->children
= ps
;
346 /* walk all children and append */
347 struct ps_struct
*children
;
348 children
= parent
->children
;
349 while (children
->next
)
350 children
= children
->next
;
356 /* else -> found pid, append data in ps */
358 /* below here is all continuous logging parts - we get here on every
362 if (ps
->schedstat
< 0) {
363 sprintf(filename
, "%d/schedstat", pid
);
364 ps
->schedstat
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
365 if (ps
->schedstat
< 0)
369 s
= pread(ps
->schedstat
, buf
, sizeof(buf
) - 1, 0);
371 /* clean up our file descriptors - assume that the process exited */
372 close(ps
->schedstat
);
374 ps
->sched
= safe_close(ps
->sched
);
380 if (!sscanf(buf
, "%s %s %*s", rt
, wt
))
383 ps
->sample
->next
= new0(struct ps_sched_struct
, 1);
384 if (!ps
->sample
->next
)
387 ps
->sample
->next
->prev
= ps
->sample
;
388 ps
->sample
= ps
->sample
->next
;
389 ps
->last
= ps
->sample
;
390 ps
->sample
->runtime
= atoll(rt
);
391 ps
->sample
->waittime
= atoll(wt
);
392 ps
->sample
->sampledata
= sampledata
;
393 ps
->sample
->ps_new
= ps
;
395 ps_prev
->cross
= ps
->sample
;
397 ps_prev
= ps
->sample
;
398 ps
->total
= (ps
->last
->runtime
- ps
->first
->runtime
)
401 /* Take into account CPU runtime/waittime spent in non-main threads of the process
402 * by parsing "/proc/[pid]/task/[tid]/schedstat" for all [tid] != [pid]
403 * See https://github.com/systemd/systemd/issues/139
406 /* Browse directory "/proc/[pid]/task" to know the thread ids of process [pid] */
407 snprintf(filename
, sizeof(filename
), PID_FMT
"/task", pid
);
408 taskfd
= openat(procfd
, filename
, O_RDONLY
|O_DIRECTORY
|O_CLOEXEC
);
410 _cleanup_closedir_
DIR *taskdir
= NULL
;
412 taskdir
= fdopendir(taskfd
);
417 FOREACH_DIRENT(ent
, taskdir
, break) {
419 _cleanup_close_
int tid_schedstat
= -1;
423 if ((ent
->d_name
[0] < '0') || (ent
->d_name
[0] > '9'))
426 /* Skip main thread as it was already accounted */
427 r
= safe_atoi(ent
->d_name
, &tid
);
428 if (r
< 0 || tid
== pid
)
431 /* Parse "/proc/[pid]/task/[tid]/schedstat" */
432 snprintf(filename
, sizeof(filename
), PID_FMT
"/schedstat", tid
);
433 tid_schedstat
= openat(taskfd
, filename
, O_RDONLY
|O_CLOEXEC
);
435 if (tid_schedstat
== -1)
438 s
= pread(tid_schedstat
, buf
, sizeof(buf
) - 1, 0);
443 if (!sscanf(buf
, "%s %s %*s", rt
, wt
))
446 r
= safe_atolli(rt
, &delta_rt
);
449 r
= safe_atolli(rt
, &delta_wt
);
452 ps
->sample
->runtime
+= delta_rt
;
453 ps
->sample
->waittime
+= delta_wt
;
462 sprintf(filename
, "%d/smaps", pid
);
463 fd
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
466 ps
->smaps
= fdopen(fd
, "re");
471 setvbuf(ps
->smaps
, smaps_buf
, _IOFBF
, sizeof(smaps_buf
));
476 /* test to see if we need to skip another field */
478 if (fgets(buf
, sizeof(buf
), ps
->smaps
) == NULL
) {
481 if (fread(buf
, 1, 28 * 15, ps
->smaps
) != (28 * 15)) {
484 if (buf
[392] == 'V') {
496 /* skip one line, this contains the object mapped. */
497 if (fgets(buf
, sizeof(buf
), ps
->smaps
) == NULL
) {
500 /* then there's a 28 char 14 line block */
501 if (fread(buf
, 1, 28 * 14, ps
->smaps
) != 28 * 14) {
504 pss_kb
= atoi(&buf
[61]);
505 ps
->sample
->pss
+= pss_kb
;
507 /* skip one more line if this is a newer kernel */
509 if (fgets(buf
, sizeof(buf
), ps
->smaps
) == NULL
)
514 if (ps
->sample
->pss
> ps
->pss_max
)
515 ps
->pss_max
= ps
->sample
->pss
;
518 /* catch process rename, try to randomize time */
519 mod
= (arg_hz
< 4.0) ? 4.0 : (arg_hz
/ 4.0);
520 if (((sample
- ps
->pid
) + pid
) % (int)(mod
) == 0) {
523 /* get name, start time */
525 sprintf(filename
, "%d/sched", pid
);
526 ps
->sched
= openat(procfd
, filename
, O_RDONLY
|O_CLOEXEC
);
531 s
= pread(ps
->sched
, buf
, sizeof(buf
) - 1, 0);
533 /* clean up file descriptors */
534 ps
->sched
= safe_close(ps
->sched
);
535 ps
->schedstat
= safe_close(ps
->schedstat
);
541 if (!sscanf(buf
, "%s %*s %*s", key
))
544 strscpy(ps
->name
, sizeof(ps
->name
), key
);
547 if (arg_show_cmdline
)
548 pid_cmdline_strscpy(procfd
, ps
->name
, sizeof(ps
->name
), pid
);