]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/bootchart/store.c
util-lib: introduce dirent-util.[ch] for directory entry calls
[thirdparty/systemd.git] / src / bootchart / store.c
CommitLineData
6d031c0b
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
cd3bccaa 3/***
6d031c0b 4 This file is part of systemd.
cd3bccaa 5
3c527fd1 6 Copyright (C) 2009-2013 Intel Corporation
cd3bccaa
AK
7
8 Authors:
9 Auke Kok <auke-jan.h.kok@intel.com>
10
11 systemd is free software; you can redistribute it and/or modify it
12 under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 2.1 of the License, or
14 (at your option) any later version.
15
16 systemd is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
20
21 You should have received a copy of the GNU Lesser General Public License
22 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 ***/
83fdc450 24
07630cea
LP
25#include <dirent.h>
26#include <fcntl.h>
83fdc450 27#include <limits.h>
83fdc450 28#include <stdio.h>
07630cea 29#include <stdlib.h>
83fdc450 30#include <string.h>
83fdc450 31#include <time.h>
07630cea 32#include <unistd.h>
83fdc450 33
6d031c0b 34#include "bootchart.h"
49e5b2a9 35#include "cgroup-util.h"
a0956174 36#include "dirent-util.h"
3ffd4af2 37#include "fd-util.h"
c91d0fd2 38#include "fileio.h"
6bedfcbb 39#include "parse-util.h"
3ffd4af2 40#include "store.h"
07630cea
LP
41#include "string-util.h"
42#include "strxcpyx.h"
43#include "time-util.h"
44#include "util.h"
83fdc450
AK
45
46/*
47 * Alloc a static 4k buffer for stdio - primarily used to increase
48 * PSS buffering from the default 1k stdin buffer to reduce
49 * read() overhead.
50 */
51static char smaps_buf[4096];
8dfb6e71 52static int skip = 0;
83fdc450 53
6d031c0b 54double gettime_ns(void) {
2c408fbf 55 struct timespec n;
83fdc450 56
2c408fbf 57 clock_gettime(CLOCK_MONOTONIC, &n);
83fdc450 58
ece74070 59 return (n.tv_sec + (n.tv_nsec / (double) NSEC_PER_SEC));
83fdc450
AK
60}
61
6d031c0b 62static char *bufgetline(char *buf) {
28989b63 63 char *c;
83fdc450 64
28989b63
TA
65 if (!buf)
66 return NULL;
83fdc450 67
28989b63
TA
68 c = strchr(buf, '\n');
69 if (c)
70 c++;
af672f03 71
28989b63 72 return c;
83fdc450
AK
73}
74
f9178132 75static int pid_cmdline_strscpy(int procfd, char *buffer, size_t buf_len, int pid) {
6d031c0b 76 char filename[PATH_MAX];
af672f03 77 _cleanup_close_ int fd = -1;
6d031c0b 78 ssize_t n;
e90f9fa4 79
6d031c0b 80 sprintf(filename, "%d/cmdline", pid);
af672f03 81 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
6d031c0b
LP
82 if (fd < 0)
83 return -errno;
e90f9fa4 84
6d031c0b 85 n = read(fd, buffer, buf_len-1);
e90f9fa4
HH
86 if (n > 0) {
87 int i;
88 for (i = 0; i < n; i++)
89 if (buffer[i] == '\0')
90 buffer[i] = ' ';
91 buffer[n] = '\0';
92 }
af672f03 93
6d031c0b 94 return 0;
e90f9fa4 95}
83fdc450 96
1f2ecb03
DM
97int log_sample(DIR *proc,
98 int sample,
99 struct ps_struct *ps_first,
100 struct list_sample_data **ptr,
101 int *pscount,
102 int *cpus) {
103
9964a9eb 104 static int vmstat = -1;
c91d0fd2 105 _cleanup_free_ char *buf_schedstat = NULL;
522cd7f1 106 char buf[4096];
28989b63
TA
107 char key[256];
108 char val[256];
109 char rt[256];
110 char wt[256];
111 char *m;
c91d0fd2 112 int r;
28989b63
TA
113 int c;
114 int p;
115 int mod;
c87664fe 116 static int e_fd = -1;
28989b63
TA
117 ssize_t s;
118 ssize_t n;
119 struct dirent *ent;
f2f85884 120 int fd;
8dfb6e71
NC
121 struct list_sample_data *sampledata;
122 struct ps_sched_struct *ps_prev = NULL;
f9178132 123 int procfd;
caa43397 124 int taskfd = -1;
8dfb6e71 125
8dfb6e71 126 sampledata = *ptr;
f2f85884 127
f9178132
DM
128 procfd = dirfd(proc);
129 if (procfd < 0)
130 return -errno;
28989b63 131
9964a9eb 132 if (vmstat < 0) {
28989b63 133 /* block stuff */
af672f03
DM
134 vmstat = openat(procfd, "vmstat", O_RDONLY|O_CLOEXEC);
135 if (vmstat < 0)
03995863 136 return log_error_errno(errno, "Failed to open /proc/vmstat: %m");
28989b63
TA
137 }
138
139 n = pread(vmstat, buf, sizeof(buf) - 1, 0);
140 if (n <= 0) {
34a4071e
DM
141 vmstat = safe_close(vmstat);
142 if (n < 0)
143 return -errno;
144 return -ENODATA;
28989b63 145 }
af672f03 146
28989b63
TA
147 buf[n] = '\0';
148
149 m = buf;
150 while (m) {
151 if (sscanf(m, "%s %s", key, val) < 2)
152 goto vmstat_next;
53f5329f 153 if (streq(key, "pgpgin"))
8dfb6e71 154 sampledata->blockstat.bi = atoi(val);
53f5329f 155 if (streq(key, "pgpgout")) {
8dfb6e71 156 sampledata->blockstat.bo = atoi(val);
28989b63
TA
157 break;
158 }
83fdc450 159vmstat_next:
28989b63
TA
160 m = bufgetline(m);
161 if (!m)
162 break;
163 }
164
c91d0fd2
GM
165 /* Parse "/proc/schedstat" for overall CPU utilization */
166 r = read_full_file("/proc/schedstat", &buf_schedstat, NULL);
167 if (r < 0)
168 return log_error_errno(r, "Unable to read schedstat: %m");
28989b63 169
c91d0fd2 170 m = buf_schedstat;
28989b63
TA
171 while (m) {
172 if (sscanf(m, "%s %*s %*s %*s %*s %*s %*s %s %s", key, rt, wt) < 3)
173 goto schedstat_next;
174
175 if (strstr(key, "cpu")) {
9bcf7507 176 r = safe_atoi((const char*)(key+3), &c);
c119700c 177 if (r < 0 || c > MAXCPUS -1)
28989b63
TA
178 /* Oops, we only have room for MAXCPUS data */
179 break;
8dfb6e71
NC
180 sampledata->runtime[c] = atoll(rt);
181 sampledata->waittime[c] = atoll(wt);
28989b63 182
1f2ecb03
DM
183 if (c == *cpus)
184 *cpus = c + 1;
28989b63 185 }
83fdc450 186schedstat_next:
28989b63
TA
187 m = bufgetline(m);
188 if (!m)
189 break;
190 }
191
6d031c0b 192 if (arg_entropy) {
c87664fe 193 if (e_fd < 0) {
af672f03
DM
194 e_fd = openat(procfd, "sys/kernel/random/entropy_avail", O_RDONLY|O_CLOEXEC);
195 if (e_fd < 0)
34a4071e 196 return log_error_errno(errno, "Failed to open /proc/sys/kernel/random/entropy_avail: %m");
28989b63
TA
197 }
198
c87664fe
AS
199 n = pread(e_fd, buf, sizeof(buf) - 1, 0);
200 if (n <= 0) {
af672f03 201 e_fd = safe_close(e_fd);
c87664fe
AS
202 } else {
203 buf[n] = '\0';
204 sampledata->entropy_avail = atoi(buf);
28989b63
TA
205 }
206 }
207
28989b63
TA
208 while ((ent = readdir(proc)) != NULL) {
209 char filename[PATH_MAX];
210 int pid;
211 struct ps_struct *ps;
212
213 if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9'))
214 continue;
215
216 pid = atoi(ent->d_name);
217
218 if (pid >= MAXPIDS)
219 continue;
220
221 ps = ps_first;
222 while (ps->next_ps) {
223 ps = ps->next_ps;
224 if (ps->pid == pid)
225 break;
226 }
227
228 /* end of our LL? then append a new record */
229 if (ps->pid != pid) {
7fd1b19b 230 _cleanup_fclose_ FILE *st = NULL;
28989b63
TA
231 char t[32];
232 struct ps_struct *parent;
233
955d98c9 234 ps->next_ps = new0(struct ps_struct, 1);
34a4071e
DM
235 if (!ps->next_ps)
236 return log_oom();
237
28989b63
TA
238 ps = ps->next_ps;
239 ps->pid = pid;
9964a9eb
AS
240 ps->sched = -1;
241 ps->schedstat = -1;
28989b63 242
955d98c9 243 ps->sample = new0(struct ps_sched_struct, 1);
34a4071e
DM
244 if (!ps->sample)
245 return log_oom();
246
8dfb6e71 247 ps->sample->sampledata = sampledata;
28989b63 248
1f2ecb03 249 (*pscount)++;
28989b63
TA
250
251 /* mark our first sample */
306e6650 252 ps->first = ps->last = ps->sample;
8dfb6e71
NC
253 ps->sample->runtime = atoll(rt);
254 ps->sample->waittime = atoll(wt);
28989b63
TA
255
256 /* get name, start time */
9964a9eb 257 if (ps->sched < 0) {
f2f85884 258 sprintf(filename, "%d/sched", pid);
af672f03
DM
259 ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
260 if (ps->sched < 0)
28989b63
TA
261 continue;
262 }
263
264 s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
265 if (s <= 0) {
af672f03 266 ps->sched = safe_close(ps->sched);
28989b63
TA
267 continue;
268 }
ef2648c1 269 buf[s] = '\0';
28989b63
TA
270
271 if (!sscanf(buf, "%s %*s %*s", key))
272 continue;
273
c309a713 274 strscpy(ps->name, sizeof(ps->name), key);
e90f9fa4
HH
275
276 /* cmdline */
6d031c0b 277 if (arg_show_cmdline)
f9178132 278 pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid);
e90f9fa4 279
28989b63
TA
280 /* discard line 2 */
281 m = bufgetline(buf);
282 if (!m)
283 continue;
284
285 m = bufgetline(m);
286 if (!m)
287 continue;
288
289 if (!sscanf(m, "%*s %*s %s", t))
290 continue;
291
e10f3c43
TG
292 r = safe_atod(t, &ps->starttime);
293 if (r < 0)
294 continue;
295
296 ps->starttime /= 1000.0;
28989b63 297
49e5b2a9
WC
298 if (arg_show_cgroup)
299 /* if this fails, that's OK */
300 cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER,
301 ps->pid, &ps->cgroup);
302
28989b63 303 /* ppid */
f2f85884 304 sprintf(filename, "%d/stat", pid);
af672f03
DM
305 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
306 if (fd < 0)
58ec01b3 307 continue;
af672f03
DM
308
309 st = fdopen(fd, "re");
58ec01b3
AS
310 if (!st) {
311 close(fd);
28989b63 312 continue;
58ec01b3 313 }
af672f03
DM
314
315 if (!fscanf(st, "%*s %*s %*s %i", &p))
28989b63 316 continue;
af672f03 317
28989b63
TA
318 ps->ppid = p;
319
320 /*
321 * setup child pointers
322 *
323 * these are used to paint the tree coherently later
324 * each parent has a LL of children, and a LL of siblings
325 */
326 if (pid == 1)
327 continue; /* nothing to do for init atm */
328
329 /* kthreadd has ppid=0, which breaks our tree ordering */
330 if (ps->ppid == 0)
331 ps->ppid = 1;
332
333 parent = ps_first;
334 while ((parent->next_ps && parent->pid != ps->ppid))
335 parent = parent->next_ps;
336
226b735a 337 if (parent->pid != ps->ppid) {
28989b63
TA
338 /* orphan */
339 ps->ppid = 1;
340 parent = ps_first->next_ps;
341 }
342
343 ps->parent = parent;
344
345 if (!parent->children) {
346 /* it's the first child */
347 parent->children = ps;
348 } else {
349 /* walk all children and append */
350 struct ps_struct *children;
351 children = parent->children;
352 while (children->next)
353 children = children->next;
af672f03 354
28989b63
TA
355 children->next = ps;
356 }
357 }
358
359 /* else -> found pid, append data in ps */
360
361 /* below here is all continuous logging parts - we get here on every
362 * iteration */
363
364 /* rt, wt */
9964a9eb 365 if (ps->schedstat < 0) {
f2f85884 366 sprintf(filename, "%d/schedstat", pid);
af672f03
DM
367 ps->schedstat = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
368 if (ps->schedstat < 0)
28989b63
TA
369 continue;
370 }
af672f03 371
ef2648c1
LN
372 s = pread(ps->schedstat, buf, sizeof(buf) - 1, 0);
373 if (s <= 0) {
28989b63
TA
374 /* clean up our file descriptors - assume that the process exited */
375 close(ps->schedstat);
9964a9eb 376 ps->schedstat = -1;
af672f03 377 ps->sched = safe_close(ps->sched);
28989b63
TA
378 continue;
379 }
af672f03 380
ef2648c1
LN
381 buf[s] = '\0';
382
28989b63
TA
383 if (!sscanf(buf, "%s %s %*s", rt, wt))
384 continue;
385
955d98c9 386 ps->sample->next = new0(struct ps_sched_struct, 1);
34a4071e
DM
387 if (!ps->sample->next)
388 return log_oom();
389
8dfb6e71
NC
390 ps->sample->next->prev = ps->sample;
391 ps->sample = ps->sample->next;
392 ps->last = ps->sample;
393 ps->sample->runtime = atoll(rt);
394 ps->sample->waittime = atoll(wt);
395 ps->sample->sampledata = sampledata;
396 ps->sample->ps_new = ps;
af672f03 397 if (ps_prev)
8dfb6e71 398 ps_prev->cross = ps->sample;
af672f03 399
8dfb6e71
NC
400 ps_prev = ps->sample;
401 ps->total = (ps->last->runtime - ps->first->runtime)
402 / 1000000000.0;
28989b63 403
caa43397
GM
404 /* Take into account CPU runtime/waittime spent in non-main threads of the process
405 * by parsing "/proc/[pid]/task/[tid]/schedstat" for all [tid] != [pid]
406 * See https://github.com/systemd/systemd/issues/139
407 */
408
409 /* Browse directory "/proc/[pid]/task" to know the thread ids of process [pid] */
410 snprintf(filename, sizeof(filename), PID_FMT "/task", pid);
411 taskfd = openat(procfd, filename, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
412 if (taskfd >= 0) {
413 _cleanup_closedir_ DIR *taskdir = NULL;
414
415 taskdir = fdopendir(taskfd);
416 if (!taskdir) {
417 safe_close(taskfd);
418 return -errno;
419 }
420 FOREACH_DIRENT(ent, taskdir, break) {
caa43397
GM
421 int tid = -1;
422 _cleanup_close_ int tid_schedstat = -1;
423 long long delta_rt;
424 long long delta_wt;
425
426 if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9'))
427 continue;
428
429 /* Skip main thread as it was already accounted */
430 r = safe_atoi(ent->d_name, &tid);
431 if (r < 0 || tid == pid)
432 continue;
433
434 /* Parse "/proc/[pid]/task/[tid]/schedstat" */
435 snprintf(filename, sizeof(filename), PID_FMT "/schedstat", tid);
436 tid_schedstat = openat(taskfd, filename, O_RDONLY|O_CLOEXEC);
437
438 if (tid_schedstat == -1)
439 continue;
440
441 s = pread(tid_schedstat, buf, sizeof(buf) - 1, 0);
442 if (s <= 0)
443 continue;
444 buf[s] = '\0';
445
446 if (!sscanf(buf, "%s %s %*s", rt, wt))
447 continue;
448
449 r = safe_atolli(rt, &delta_rt);
450 if (r < 0)
451 continue;
452 r = safe_atolli(rt, &delta_wt);
453 if (r < 0)
454 continue;
455 ps->sample->runtime += delta_rt;
456 ps->sample->waittime += delta_wt;
457 }
458 }
459
6d031c0b 460 if (!arg_pss)
28989b63 461 goto catch_rename;
8dfb6e71 462
28989b63
TA
463 /* Pss */
464 if (!ps->smaps) {
f2f85884 465 sprintf(filename, "%d/smaps", pid);
af672f03
DM
466 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
467 if (fd < 0)
58ec01b3 468 continue;
af672f03 469 ps->smaps = fdopen(fd, "re");
58ec01b3
AS
470 if (!ps->smaps) {
471 close(fd);
28989b63 472 continue;
58ec01b3 473 }
28989b63 474 setvbuf(ps->smaps, smaps_buf, _IOFBF, sizeof(smaps_buf));
af672f03 475 } else {
8dfb6e71
NC
476 rewind(ps->smaps);
477 }
af672f03 478
8dfb6e71
NC
479 /* test to see if we need to skip another field */
480 if (skip == 0) {
481 if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
482 continue;
483 }
484 if (fread(buf, 1, 28 * 15, ps->smaps) != (28 * 15)) {
485 continue;
486 }
487 if (buf[392] == 'V') {
488 skip = 2;
489 }
490 else {
491 skip = 1;
492 }
28989b63
TA
493 rewind(ps->smaps);
494 }
af672f03 495
28989b63
TA
496 while (1) {
497 int pss_kb;
498
8dfb6e71
NC
499 /* skip one line, this contains the object mapped. */
500 if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
28989b63 501 break;
8dfb6e71 502 }
28989b63 503 /* then there's a 28 char 14 line block */
8dfb6e71 504 if (fread(buf, 1, 28 * 14, ps->smaps) != 28 * 14) {
28989b63 505 break;
8dfb6e71 506 }
28989b63 507 pss_kb = atoi(&buf[61]);
8dfb6e71 508 ps->sample->pss += pss_kb;
28989b63 509
8dfb6e71
NC
510 /* skip one more line if this is a newer kernel */
511 if (skip == 2) {
512 if (fgets(buf, sizeof(buf), ps->smaps) == NULL)
513 break;
514 }
515 }
af672f03 516
8dfb6e71
NC
517 if (ps->sample->pss > ps->pss_max)
518 ps->pss_max = ps->sample->pss;
83fdc450
AK
519
520catch_rename:
28989b63 521 /* catch process rename, try to randomize time */
6d031c0b 522 mod = (arg_hz < 4.0) ? 4.0 : (arg_hz / 4.0);
1f2ecb03 523 if (((sample - ps->pid) + pid) % (int)(mod) == 0) {
28989b63
TA
524
525 /* re-fetch name */
526 /* get name, start time */
eaf15609 527 if (ps->sched < 0) {
f2f85884 528 sprintf(filename, "%d/sched", pid);
af672f03
DM
529 ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
530 if (ps->sched < 0)
28989b63
TA
531 continue;
532 }
af672f03 533
ef2648c1
LN
534 s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
535 if (s <= 0) {
28989b63 536 /* clean up file descriptors */
af672f03
DM
537 ps->sched = safe_close(ps->sched);
538 ps->schedstat = safe_close(ps->schedstat);
28989b63
TA
539 continue;
540 }
af672f03 541
ef2648c1 542 buf[s] = '\0';
28989b63
TA
543
544 if (!sscanf(buf, "%s %*s %*s", key))
545 continue;
546
c309a713 547 strscpy(ps->name, sizeof(ps->name), key);
e90f9fa4
HH
548
549 /* cmdline */
6d031c0b 550 if (arg_show_cmdline)
f9178132 551 pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid);
28989b63
TA
552 }
553 }
34a4071e
DM
554
555 return 0;
83fdc450 556}