]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/bootchart/store.c
Merge pull request #362 from d-hatayama/fix_selinux_unit_check_v2
[thirdparty/systemd.git] / src / bootchart / store.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright (C) 2009-2013 Intel Corporation
7
8 Authors:
9 Auke Kok <auke-jan.h.kok@intel.com>
10
11 systemd is free software; you can redistribute it and/or modify it
12 under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 2.1 of the License, or
14 (at your option) any later version.
15
16 systemd is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
20
21 You should have received a copy of the GNU Lesser General Public License
22 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 ***/
24
25 #include <unistd.h>
26 #include <stdlib.h>
27 #include <limits.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <dirent.h>
31 #include <fcntl.h>
32 #include <time.h>
33
34 #include "util.h"
35 #include "time-util.h"
36 #include "strxcpyx.h"
37 #include "store.h"
38 #include "bootchart.h"
39 #include "cgroup-util.h"
40 #include "fileio.h"
41
42 /*
43 * Alloc a static 4k buffer for stdio - primarily used to increase
44 * PSS buffering from the default 1k stdin buffer to reduce
45 * read() overhead.
46 */
47 static char smaps_buf[4096];
48 static int skip = 0;
49
50 double gettime_ns(void) {
51 struct timespec n;
52
53 clock_gettime(CLOCK_MONOTONIC, &n);
54
55 return (n.tv_sec + (n.tv_nsec / (double) NSEC_PER_SEC));
56 }
57
58 static char *bufgetline(char *buf) {
59 char *c;
60
61 if (!buf)
62 return NULL;
63
64 c = strchr(buf, '\n');
65 if (c)
66 c++;
67
68 return c;
69 }
70
71 static int pid_cmdline_strscpy(int procfd, char *buffer, size_t buf_len, int pid) {
72 char filename[PATH_MAX];
73 _cleanup_close_ int fd = -1;
74 ssize_t n;
75
76 sprintf(filename, "%d/cmdline", pid);
77 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
78 if (fd < 0)
79 return -errno;
80
81 n = read(fd, buffer, buf_len-1);
82 if (n > 0) {
83 int i;
84 for (i = 0; i < n; i++)
85 if (buffer[i] == '\0')
86 buffer[i] = ' ';
87 buffer[n] = '\0';
88 }
89
90 return 0;
91 }
92
93 int log_sample(DIR *proc,
94 int sample,
95 struct ps_struct *ps_first,
96 struct list_sample_data **ptr,
97 int *pscount,
98 int *cpus) {
99
100 static int vmstat = -1;
101 _cleanup_free_ char *buf_schedstat = NULL;
102 char buf[4096];
103 char key[256];
104 char val[256];
105 char rt[256];
106 char wt[256];
107 char *m;
108 int r;
109 int c;
110 int p;
111 int mod;
112 static int e_fd = -1;
113 ssize_t s;
114 ssize_t n;
115 struct dirent *ent;
116 int fd;
117 struct list_sample_data *sampledata;
118 struct ps_sched_struct *ps_prev = NULL;
119 int procfd;
120 int taskfd = -1;
121
122 sampledata = *ptr;
123
124 procfd = dirfd(proc);
125 if (procfd < 0)
126 return -errno;
127
128 if (vmstat < 0) {
129 /* block stuff */
130 vmstat = openat(procfd, "vmstat", O_RDONLY|O_CLOEXEC);
131 if (vmstat < 0)
132 return log_error_errno(errno, "Failed to open /proc/vmstat: %m");
133 }
134
135 n = pread(vmstat, buf, sizeof(buf) - 1, 0);
136 if (n <= 0) {
137 vmstat = safe_close(vmstat);
138 if (n < 0)
139 return -errno;
140 return -ENODATA;
141 }
142
143 buf[n] = '\0';
144
145 m = buf;
146 while (m) {
147 if (sscanf(m, "%s %s", key, val) < 2)
148 goto vmstat_next;
149 if (streq(key, "pgpgin"))
150 sampledata->blockstat.bi = atoi(val);
151 if (streq(key, "pgpgout")) {
152 sampledata->blockstat.bo = atoi(val);
153 break;
154 }
155 vmstat_next:
156 m = bufgetline(m);
157 if (!m)
158 break;
159 }
160
161 /* Parse "/proc/schedstat" for overall CPU utilization */
162 r = read_full_file("/proc/schedstat", &buf_schedstat, NULL);
163 if (r < 0)
164 return log_error_errno(r, "Unable to read schedstat: %m");
165
166 m = buf_schedstat;
167 while (m) {
168 if (sscanf(m, "%s %*s %*s %*s %*s %*s %*s %s %s", key, rt, wt) < 3)
169 goto schedstat_next;
170
171 if (strstr(key, "cpu")) {
172 r = safe_atoi((const char*)(key+3), &c);
173 if (r < 0 || c > MAXCPUS -1)
174 /* Oops, we only have room for MAXCPUS data */
175 break;
176 sampledata->runtime[c] = atoll(rt);
177 sampledata->waittime[c] = atoll(wt);
178
179 if (c == *cpus)
180 *cpus = c + 1;
181 }
182 schedstat_next:
183 m = bufgetline(m);
184 if (!m)
185 break;
186 }
187
188 if (arg_entropy) {
189 if (e_fd < 0) {
190 e_fd = openat(procfd, "sys/kernel/random/entropy_avail", O_RDONLY|O_CLOEXEC);
191 if (e_fd < 0)
192 return log_error_errno(errno, "Failed to open /proc/sys/kernel/random/entropy_avail: %m");
193 }
194
195 n = pread(e_fd, buf, sizeof(buf) - 1, 0);
196 if (n <= 0) {
197 e_fd = safe_close(e_fd);
198 } else {
199 buf[n] = '\0';
200 sampledata->entropy_avail = atoi(buf);
201 }
202 }
203
204 while ((ent = readdir(proc)) != NULL) {
205 char filename[PATH_MAX];
206 int pid;
207 struct ps_struct *ps;
208
209 if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9'))
210 continue;
211
212 pid = atoi(ent->d_name);
213
214 if (pid >= MAXPIDS)
215 continue;
216
217 ps = ps_first;
218 while (ps->next_ps) {
219 ps = ps->next_ps;
220 if (ps->pid == pid)
221 break;
222 }
223
224 /* end of our LL? then append a new record */
225 if (ps->pid != pid) {
226 _cleanup_fclose_ FILE *st = NULL;
227 char t[32];
228 struct ps_struct *parent;
229
230 ps->next_ps = new0(struct ps_struct, 1);
231 if (!ps->next_ps)
232 return log_oom();
233
234 ps = ps->next_ps;
235 ps->pid = pid;
236 ps->sched = -1;
237 ps->schedstat = -1;
238
239 ps->sample = new0(struct ps_sched_struct, 1);
240 if (!ps->sample)
241 return log_oom();
242
243 ps->sample->sampledata = sampledata;
244
245 (*pscount)++;
246
247 /* mark our first sample */
248 ps->first = ps->last = ps->sample;
249 ps->sample->runtime = atoll(rt);
250 ps->sample->waittime = atoll(wt);
251
252 /* get name, start time */
253 if (ps->sched < 0) {
254 sprintf(filename, "%d/sched", pid);
255 ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
256 if (ps->sched < 0)
257 continue;
258 }
259
260 s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
261 if (s <= 0) {
262 ps->sched = safe_close(ps->sched);
263 continue;
264 }
265 buf[s] = '\0';
266
267 if (!sscanf(buf, "%s %*s %*s", key))
268 continue;
269
270 strscpy(ps->name, sizeof(ps->name), key);
271
272 /* cmdline */
273 if (arg_show_cmdline)
274 pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid);
275
276 /* discard line 2 */
277 m = bufgetline(buf);
278 if (!m)
279 continue;
280
281 m = bufgetline(m);
282 if (!m)
283 continue;
284
285 if (!sscanf(m, "%*s %*s %s", t))
286 continue;
287
288 r = safe_atod(t, &ps->starttime);
289 if (r < 0)
290 continue;
291
292 ps->starttime /= 1000.0;
293
294 if (arg_show_cgroup)
295 /* if this fails, that's OK */
296 cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER,
297 ps->pid, &ps->cgroup);
298
299 /* ppid */
300 sprintf(filename, "%d/stat", pid);
301 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
302 if (fd < 0)
303 continue;
304
305 st = fdopen(fd, "re");
306 if (!st) {
307 close(fd);
308 continue;
309 }
310
311 if (!fscanf(st, "%*s %*s %*s %i", &p))
312 continue;
313
314 ps->ppid = p;
315
316 /*
317 * setup child pointers
318 *
319 * these are used to paint the tree coherently later
320 * each parent has a LL of children, and a LL of siblings
321 */
322 if (pid == 1)
323 continue; /* nothing to do for init atm */
324
325 /* kthreadd has ppid=0, which breaks our tree ordering */
326 if (ps->ppid == 0)
327 ps->ppid = 1;
328
329 parent = ps_first;
330 while ((parent->next_ps && parent->pid != ps->ppid))
331 parent = parent->next_ps;
332
333 if (parent->pid != ps->ppid) {
334 /* orphan */
335 ps->ppid = 1;
336 parent = ps_first->next_ps;
337 }
338
339 ps->parent = parent;
340
341 if (!parent->children) {
342 /* it's the first child */
343 parent->children = ps;
344 } else {
345 /* walk all children and append */
346 struct ps_struct *children;
347 children = parent->children;
348 while (children->next)
349 children = children->next;
350
351 children->next = ps;
352 }
353 }
354
355 /* else -> found pid, append data in ps */
356
357 /* below here is all continuous logging parts - we get here on every
358 * iteration */
359
360 /* rt, wt */
361 if (ps->schedstat < 0) {
362 sprintf(filename, "%d/schedstat", pid);
363 ps->schedstat = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
364 if (ps->schedstat < 0)
365 continue;
366 }
367
368 s = pread(ps->schedstat, buf, sizeof(buf) - 1, 0);
369 if (s <= 0) {
370 /* clean up our file descriptors - assume that the process exited */
371 close(ps->schedstat);
372 ps->schedstat = -1;
373 ps->sched = safe_close(ps->sched);
374 continue;
375 }
376
377 buf[s] = '\0';
378
379 if (!sscanf(buf, "%s %s %*s", rt, wt))
380 continue;
381
382 ps->sample->next = new0(struct ps_sched_struct, 1);
383 if (!ps->sample->next)
384 return log_oom();
385
386 ps->sample->next->prev = ps->sample;
387 ps->sample = ps->sample->next;
388 ps->last = ps->sample;
389 ps->sample->runtime = atoll(rt);
390 ps->sample->waittime = atoll(wt);
391 ps->sample->sampledata = sampledata;
392 ps->sample->ps_new = ps;
393 if (ps_prev)
394 ps_prev->cross = ps->sample;
395
396 ps_prev = ps->sample;
397 ps->total = (ps->last->runtime - ps->first->runtime)
398 / 1000000000.0;
399
400 /* Take into account CPU runtime/waittime spent in non-main threads of the process
401 * by parsing "/proc/[pid]/task/[tid]/schedstat" for all [tid] != [pid]
402 * See https://github.com/systemd/systemd/issues/139
403 */
404
405 /* Browse directory "/proc/[pid]/task" to know the thread ids of process [pid] */
406 snprintf(filename, sizeof(filename), PID_FMT "/task", pid);
407 taskfd = openat(procfd, filename, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
408 if (taskfd >= 0) {
409 _cleanup_closedir_ DIR *taskdir = NULL;
410
411 taskdir = fdopendir(taskfd);
412 if (!taskdir) {
413 safe_close(taskfd);
414 return -errno;
415 }
416 FOREACH_DIRENT(ent, taskdir, break) {
417 int tid = -1;
418 _cleanup_close_ int tid_schedstat = -1;
419 long long delta_rt;
420 long long delta_wt;
421
422 if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9'))
423 continue;
424
425 /* Skip main thread as it was already accounted */
426 r = safe_atoi(ent->d_name, &tid);
427 if (r < 0 || tid == pid)
428 continue;
429
430 /* Parse "/proc/[pid]/task/[tid]/schedstat" */
431 snprintf(filename, sizeof(filename), PID_FMT "/schedstat", tid);
432 tid_schedstat = openat(taskfd, filename, O_RDONLY|O_CLOEXEC);
433
434 if (tid_schedstat == -1)
435 continue;
436
437 s = pread(tid_schedstat, buf, sizeof(buf) - 1, 0);
438 if (s <= 0)
439 continue;
440 buf[s] = '\0';
441
442 if (!sscanf(buf, "%s %s %*s", rt, wt))
443 continue;
444
445 r = safe_atolli(rt, &delta_rt);
446 if (r < 0)
447 continue;
448 r = safe_atolli(rt, &delta_wt);
449 if (r < 0)
450 continue;
451 ps->sample->runtime += delta_rt;
452 ps->sample->waittime += delta_wt;
453 }
454 }
455
456 if (!arg_pss)
457 goto catch_rename;
458
459 /* Pss */
460 if (!ps->smaps) {
461 sprintf(filename, "%d/smaps", pid);
462 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
463 if (fd < 0)
464 continue;
465 ps->smaps = fdopen(fd, "re");
466 if (!ps->smaps) {
467 close(fd);
468 continue;
469 }
470 setvbuf(ps->smaps, smaps_buf, _IOFBF, sizeof(smaps_buf));
471 } else {
472 rewind(ps->smaps);
473 }
474
475 /* test to see if we need to skip another field */
476 if (skip == 0) {
477 if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
478 continue;
479 }
480 if (fread(buf, 1, 28 * 15, ps->smaps) != (28 * 15)) {
481 continue;
482 }
483 if (buf[392] == 'V') {
484 skip = 2;
485 }
486 else {
487 skip = 1;
488 }
489 rewind(ps->smaps);
490 }
491
492 while (1) {
493 int pss_kb;
494
495 /* skip one line, this contains the object mapped. */
496 if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
497 break;
498 }
499 /* then there's a 28 char 14 line block */
500 if (fread(buf, 1, 28 * 14, ps->smaps) != 28 * 14) {
501 break;
502 }
503 pss_kb = atoi(&buf[61]);
504 ps->sample->pss += pss_kb;
505
506 /* skip one more line if this is a newer kernel */
507 if (skip == 2) {
508 if (fgets(buf, sizeof(buf), ps->smaps) == NULL)
509 break;
510 }
511 }
512
513 if (ps->sample->pss > ps->pss_max)
514 ps->pss_max = ps->sample->pss;
515
516 catch_rename:
517 /* catch process rename, try to randomize time */
518 mod = (arg_hz < 4.0) ? 4.0 : (arg_hz / 4.0);
519 if (((sample - ps->pid) + pid) % (int)(mod) == 0) {
520
521 /* re-fetch name */
522 /* get name, start time */
523 if (ps->sched < 0) {
524 sprintf(filename, "%d/sched", pid);
525 ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
526 if (ps->sched < 0)
527 continue;
528 }
529
530 s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
531 if (s <= 0) {
532 /* clean up file descriptors */
533 ps->sched = safe_close(ps->sched);
534 ps->schedstat = safe_close(ps->schedstat);
535 continue;
536 }
537
538 buf[s] = '\0';
539
540 if (!sscanf(buf, "%s %*s %*s", key))
541 continue;
542
543 strscpy(ps->name, sizeof(ps->name), key);
544
545 /* cmdline */
546 if (arg_show_cmdline)
547 pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid);
548 }
549 }
550
551 return 0;
552 }