]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/bootchart/store.c
util-lib: introduce dirent-util.[ch] for directory entry calls
[thirdparty/systemd.git] / src / bootchart / store.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright (C) 2009-2013 Intel Corporation
7
8 Authors:
9 Auke Kok <auke-jan.h.kok@intel.com>
10
11 systemd is free software; you can redistribute it and/or modify it
12 under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 2.1 of the License, or
14 (at your option) any later version.
15
16 systemd is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
20
21 You should have received a copy of the GNU Lesser General Public License
22 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 ***/
24
25 #include <dirent.h>
26 #include <fcntl.h>
27 #include <limits.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <time.h>
32 #include <unistd.h>
33
34 #include "bootchart.h"
35 #include "cgroup-util.h"
36 #include "dirent-util.h"
37 #include "fd-util.h"
38 #include "fileio.h"
39 #include "parse-util.h"
40 #include "store.h"
41 #include "string-util.h"
42 #include "strxcpyx.h"
43 #include "time-util.h"
44 #include "util.h"
45
46 /*
47 * Alloc a static 4k buffer for stdio - primarily used to increase
48 * PSS buffering from the default 1k stdin buffer to reduce
49 * read() overhead.
50 */
51 static char smaps_buf[4096];
52 static int skip = 0;
53
54 double gettime_ns(void) {
55 struct timespec n;
56
57 clock_gettime(CLOCK_MONOTONIC, &n);
58
59 return (n.tv_sec + (n.tv_nsec / (double) NSEC_PER_SEC));
60 }
61
62 static char *bufgetline(char *buf) {
63 char *c;
64
65 if (!buf)
66 return NULL;
67
68 c = strchr(buf, '\n');
69 if (c)
70 c++;
71
72 return c;
73 }
74
75 static int pid_cmdline_strscpy(int procfd, char *buffer, size_t buf_len, int pid) {
76 char filename[PATH_MAX];
77 _cleanup_close_ int fd = -1;
78 ssize_t n;
79
80 sprintf(filename, "%d/cmdline", pid);
81 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
82 if (fd < 0)
83 return -errno;
84
85 n = read(fd, buffer, buf_len-1);
86 if (n > 0) {
87 int i;
88 for (i = 0; i < n; i++)
89 if (buffer[i] == '\0')
90 buffer[i] = ' ';
91 buffer[n] = '\0';
92 }
93
94 return 0;
95 }
96
97 int log_sample(DIR *proc,
98 int sample,
99 struct ps_struct *ps_first,
100 struct list_sample_data **ptr,
101 int *pscount,
102 int *cpus) {
103
104 static int vmstat = -1;
105 _cleanup_free_ char *buf_schedstat = NULL;
106 char buf[4096];
107 char key[256];
108 char val[256];
109 char rt[256];
110 char wt[256];
111 char *m;
112 int r;
113 int c;
114 int p;
115 int mod;
116 static int e_fd = -1;
117 ssize_t s;
118 ssize_t n;
119 struct dirent *ent;
120 int fd;
121 struct list_sample_data *sampledata;
122 struct ps_sched_struct *ps_prev = NULL;
123 int procfd;
124 int taskfd = -1;
125
126 sampledata = *ptr;
127
128 procfd = dirfd(proc);
129 if (procfd < 0)
130 return -errno;
131
132 if (vmstat < 0) {
133 /* block stuff */
134 vmstat = openat(procfd, "vmstat", O_RDONLY|O_CLOEXEC);
135 if (vmstat < 0)
136 return log_error_errno(errno, "Failed to open /proc/vmstat: %m");
137 }
138
139 n = pread(vmstat, buf, sizeof(buf) - 1, 0);
140 if (n <= 0) {
141 vmstat = safe_close(vmstat);
142 if (n < 0)
143 return -errno;
144 return -ENODATA;
145 }
146
147 buf[n] = '\0';
148
149 m = buf;
150 while (m) {
151 if (sscanf(m, "%s %s", key, val) < 2)
152 goto vmstat_next;
153 if (streq(key, "pgpgin"))
154 sampledata->blockstat.bi = atoi(val);
155 if (streq(key, "pgpgout")) {
156 sampledata->blockstat.bo = atoi(val);
157 break;
158 }
159 vmstat_next:
160 m = bufgetline(m);
161 if (!m)
162 break;
163 }
164
165 /* Parse "/proc/schedstat" for overall CPU utilization */
166 r = read_full_file("/proc/schedstat", &buf_schedstat, NULL);
167 if (r < 0)
168 return log_error_errno(r, "Unable to read schedstat: %m");
169
170 m = buf_schedstat;
171 while (m) {
172 if (sscanf(m, "%s %*s %*s %*s %*s %*s %*s %s %s", key, rt, wt) < 3)
173 goto schedstat_next;
174
175 if (strstr(key, "cpu")) {
176 r = safe_atoi((const char*)(key+3), &c);
177 if (r < 0 || c > MAXCPUS -1)
178 /* Oops, we only have room for MAXCPUS data */
179 break;
180 sampledata->runtime[c] = atoll(rt);
181 sampledata->waittime[c] = atoll(wt);
182
183 if (c == *cpus)
184 *cpus = c + 1;
185 }
186 schedstat_next:
187 m = bufgetline(m);
188 if (!m)
189 break;
190 }
191
192 if (arg_entropy) {
193 if (e_fd < 0) {
194 e_fd = openat(procfd, "sys/kernel/random/entropy_avail", O_RDONLY|O_CLOEXEC);
195 if (e_fd < 0)
196 return log_error_errno(errno, "Failed to open /proc/sys/kernel/random/entropy_avail: %m");
197 }
198
199 n = pread(e_fd, buf, sizeof(buf) - 1, 0);
200 if (n <= 0) {
201 e_fd = safe_close(e_fd);
202 } else {
203 buf[n] = '\0';
204 sampledata->entropy_avail = atoi(buf);
205 }
206 }
207
208 while ((ent = readdir(proc)) != NULL) {
209 char filename[PATH_MAX];
210 int pid;
211 struct ps_struct *ps;
212
213 if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9'))
214 continue;
215
216 pid = atoi(ent->d_name);
217
218 if (pid >= MAXPIDS)
219 continue;
220
221 ps = ps_first;
222 while (ps->next_ps) {
223 ps = ps->next_ps;
224 if (ps->pid == pid)
225 break;
226 }
227
228 /* end of our LL? then append a new record */
229 if (ps->pid != pid) {
230 _cleanup_fclose_ FILE *st = NULL;
231 char t[32];
232 struct ps_struct *parent;
233
234 ps->next_ps = new0(struct ps_struct, 1);
235 if (!ps->next_ps)
236 return log_oom();
237
238 ps = ps->next_ps;
239 ps->pid = pid;
240 ps->sched = -1;
241 ps->schedstat = -1;
242
243 ps->sample = new0(struct ps_sched_struct, 1);
244 if (!ps->sample)
245 return log_oom();
246
247 ps->sample->sampledata = sampledata;
248
249 (*pscount)++;
250
251 /* mark our first sample */
252 ps->first = ps->last = ps->sample;
253 ps->sample->runtime = atoll(rt);
254 ps->sample->waittime = atoll(wt);
255
256 /* get name, start time */
257 if (ps->sched < 0) {
258 sprintf(filename, "%d/sched", pid);
259 ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
260 if (ps->sched < 0)
261 continue;
262 }
263
264 s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
265 if (s <= 0) {
266 ps->sched = safe_close(ps->sched);
267 continue;
268 }
269 buf[s] = '\0';
270
271 if (!sscanf(buf, "%s %*s %*s", key))
272 continue;
273
274 strscpy(ps->name, sizeof(ps->name), key);
275
276 /* cmdline */
277 if (arg_show_cmdline)
278 pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid);
279
280 /* discard line 2 */
281 m = bufgetline(buf);
282 if (!m)
283 continue;
284
285 m = bufgetline(m);
286 if (!m)
287 continue;
288
289 if (!sscanf(m, "%*s %*s %s", t))
290 continue;
291
292 r = safe_atod(t, &ps->starttime);
293 if (r < 0)
294 continue;
295
296 ps->starttime /= 1000.0;
297
298 if (arg_show_cgroup)
299 /* if this fails, that's OK */
300 cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER,
301 ps->pid, &ps->cgroup);
302
303 /* ppid */
304 sprintf(filename, "%d/stat", pid);
305 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
306 if (fd < 0)
307 continue;
308
309 st = fdopen(fd, "re");
310 if (!st) {
311 close(fd);
312 continue;
313 }
314
315 if (!fscanf(st, "%*s %*s %*s %i", &p))
316 continue;
317
318 ps->ppid = p;
319
320 /*
321 * setup child pointers
322 *
323 * these are used to paint the tree coherently later
324 * each parent has a LL of children, and a LL of siblings
325 */
326 if (pid == 1)
327 continue; /* nothing to do for init atm */
328
329 /* kthreadd has ppid=0, which breaks our tree ordering */
330 if (ps->ppid == 0)
331 ps->ppid = 1;
332
333 parent = ps_first;
334 while ((parent->next_ps && parent->pid != ps->ppid))
335 parent = parent->next_ps;
336
337 if (parent->pid != ps->ppid) {
338 /* orphan */
339 ps->ppid = 1;
340 parent = ps_first->next_ps;
341 }
342
343 ps->parent = parent;
344
345 if (!parent->children) {
346 /* it's the first child */
347 parent->children = ps;
348 } else {
349 /* walk all children and append */
350 struct ps_struct *children;
351 children = parent->children;
352 while (children->next)
353 children = children->next;
354
355 children->next = ps;
356 }
357 }
358
359 /* else -> found pid, append data in ps */
360
361 /* below here is all continuous logging parts - we get here on every
362 * iteration */
363
364 /* rt, wt */
365 if (ps->schedstat < 0) {
366 sprintf(filename, "%d/schedstat", pid);
367 ps->schedstat = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
368 if (ps->schedstat < 0)
369 continue;
370 }
371
372 s = pread(ps->schedstat, buf, sizeof(buf) - 1, 0);
373 if (s <= 0) {
374 /* clean up our file descriptors - assume that the process exited */
375 close(ps->schedstat);
376 ps->schedstat = -1;
377 ps->sched = safe_close(ps->sched);
378 continue;
379 }
380
381 buf[s] = '\0';
382
383 if (!sscanf(buf, "%s %s %*s", rt, wt))
384 continue;
385
386 ps->sample->next = new0(struct ps_sched_struct, 1);
387 if (!ps->sample->next)
388 return log_oom();
389
390 ps->sample->next->prev = ps->sample;
391 ps->sample = ps->sample->next;
392 ps->last = ps->sample;
393 ps->sample->runtime = atoll(rt);
394 ps->sample->waittime = atoll(wt);
395 ps->sample->sampledata = sampledata;
396 ps->sample->ps_new = ps;
397 if (ps_prev)
398 ps_prev->cross = ps->sample;
399
400 ps_prev = ps->sample;
401 ps->total = (ps->last->runtime - ps->first->runtime)
402 / 1000000000.0;
403
404 /* Take into account CPU runtime/waittime spent in non-main threads of the process
405 * by parsing "/proc/[pid]/task/[tid]/schedstat" for all [tid] != [pid]
406 * See https://github.com/systemd/systemd/issues/139
407 */
408
409 /* Browse directory "/proc/[pid]/task" to know the thread ids of process [pid] */
410 snprintf(filename, sizeof(filename), PID_FMT "/task", pid);
411 taskfd = openat(procfd, filename, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
412 if (taskfd >= 0) {
413 _cleanup_closedir_ DIR *taskdir = NULL;
414
415 taskdir = fdopendir(taskfd);
416 if (!taskdir) {
417 safe_close(taskfd);
418 return -errno;
419 }
420 FOREACH_DIRENT(ent, taskdir, break) {
421 int tid = -1;
422 _cleanup_close_ int tid_schedstat = -1;
423 long long delta_rt;
424 long long delta_wt;
425
426 if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9'))
427 continue;
428
429 /* Skip main thread as it was already accounted */
430 r = safe_atoi(ent->d_name, &tid);
431 if (r < 0 || tid == pid)
432 continue;
433
434 /* Parse "/proc/[pid]/task/[tid]/schedstat" */
435 snprintf(filename, sizeof(filename), PID_FMT "/schedstat", tid);
436 tid_schedstat = openat(taskfd, filename, O_RDONLY|O_CLOEXEC);
437
438 if (tid_schedstat == -1)
439 continue;
440
441 s = pread(tid_schedstat, buf, sizeof(buf) - 1, 0);
442 if (s <= 0)
443 continue;
444 buf[s] = '\0';
445
446 if (!sscanf(buf, "%s %s %*s", rt, wt))
447 continue;
448
449 r = safe_atolli(rt, &delta_rt);
450 if (r < 0)
451 continue;
452 r = safe_atolli(rt, &delta_wt);
453 if (r < 0)
454 continue;
455 ps->sample->runtime += delta_rt;
456 ps->sample->waittime += delta_wt;
457 }
458 }
459
460 if (!arg_pss)
461 goto catch_rename;
462
463 /* Pss */
464 if (!ps->smaps) {
465 sprintf(filename, "%d/smaps", pid);
466 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
467 if (fd < 0)
468 continue;
469 ps->smaps = fdopen(fd, "re");
470 if (!ps->smaps) {
471 close(fd);
472 continue;
473 }
474 setvbuf(ps->smaps, smaps_buf, _IOFBF, sizeof(smaps_buf));
475 } else {
476 rewind(ps->smaps);
477 }
478
479 /* test to see if we need to skip another field */
480 if (skip == 0) {
481 if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
482 continue;
483 }
484 if (fread(buf, 1, 28 * 15, ps->smaps) != (28 * 15)) {
485 continue;
486 }
487 if (buf[392] == 'V') {
488 skip = 2;
489 }
490 else {
491 skip = 1;
492 }
493 rewind(ps->smaps);
494 }
495
496 while (1) {
497 int pss_kb;
498
499 /* skip one line, this contains the object mapped. */
500 if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
501 break;
502 }
503 /* then there's a 28 char 14 line block */
504 if (fread(buf, 1, 28 * 14, ps->smaps) != 28 * 14) {
505 break;
506 }
507 pss_kb = atoi(&buf[61]);
508 ps->sample->pss += pss_kb;
509
510 /* skip one more line if this is a newer kernel */
511 if (skip == 2) {
512 if (fgets(buf, sizeof(buf), ps->smaps) == NULL)
513 break;
514 }
515 }
516
517 if (ps->sample->pss > ps->pss_max)
518 ps->pss_max = ps->sample->pss;
519
520 catch_rename:
521 /* catch process rename, try to randomize time */
522 mod = (arg_hz < 4.0) ? 4.0 : (arg_hz / 4.0);
523 if (((sample - ps->pid) + pid) % (int)(mod) == 0) {
524
525 /* re-fetch name */
526 /* get name, start time */
527 if (ps->sched < 0) {
528 sprintf(filename, "%d/sched", pid);
529 ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
530 if (ps->sched < 0)
531 continue;
532 }
533
534 s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
535 if (s <= 0) {
536 /* clean up file descriptors */
537 ps->sched = safe_close(ps->sched);
538 ps->schedstat = safe_close(ps->schedstat);
539 continue;
540 }
541
542 buf[s] = '\0';
543
544 if (!sscanf(buf, "%s %*s %*s", key))
545 continue;
546
547 strscpy(ps->name, sizeof(ps->name), key);
548
549 /* cmdline */
550 if (arg_show_cmdline)
551 pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid);
552 }
553 }
554
555 return 0;
556 }