]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/bootchart/store.c
Merge pull request #1668 from ssahani/net1
[thirdparty/systemd.git] / src / bootchart / store.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright (C) 2009-2013 Intel Corporation
7
8 Authors:
9 Auke Kok <auke-jan.h.kok@intel.com>
10
11 systemd is free software; you can redistribute it and/or modify it
12 under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 2.1 of the License, or
14 (at your option) any later version.
15
16 systemd is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
20
21 You should have received a copy of the GNU Lesser General Public License
22 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 ***/
24
25 #include <dirent.h>
26 #include <fcntl.h>
27 #include <limits.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <time.h>
32 #include <unistd.h>
33
34 #include "bootchart.h"
35 #include "cgroup-util.h"
36 #include "fd-util.h"
37 #include "fileio.h"
38 #include "store.h"
39 #include "string-util.h"
40 #include "strxcpyx.h"
41 #include "time-util.h"
42 #include "util.h"
43
44 /*
45 * Alloc a static 4k buffer for stdio - primarily used to increase
46 * PSS buffering from the default 1k stdin buffer to reduce
47 * read() overhead.
48 */
49 static char smaps_buf[4096];
50 static int skip = 0;
51
52 double gettime_ns(void) {
53 struct timespec n;
54
55 clock_gettime(CLOCK_MONOTONIC, &n);
56
57 return (n.tv_sec + (n.tv_nsec / (double) NSEC_PER_SEC));
58 }
59
60 static char *bufgetline(char *buf) {
61 char *c;
62
63 if (!buf)
64 return NULL;
65
66 c = strchr(buf, '\n');
67 if (c)
68 c++;
69
70 return c;
71 }
72
73 static int pid_cmdline_strscpy(int procfd, char *buffer, size_t buf_len, int pid) {
74 char filename[PATH_MAX];
75 _cleanup_close_ int fd = -1;
76 ssize_t n;
77
78 sprintf(filename, "%d/cmdline", pid);
79 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
80 if (fd < 0)
81 return -errno;
82
83 n = read(fd, buffer, buf_len-1);
84 if (n > 0) {
85 int i;
86 for (i = 0; i < n; i++)
87 if (buffer[i] == '\0')
88 buffer[i] = ' ';
89 buffer[n] = '\0';
90 }
91
92 return 0;
93 }
94
95 int log_sample(DIR *proc,
96 int sample,
97 struct ps_struct *ps_first,
98 struct list_sample_data **ptr,
99 int *pscount,
100 int *cpus) {
101
102 static int vmstat = -1;
103 _cleanup_free_ char *buf_schedstat = NULL;
104 char buf[4096];
105 char key[256];
106 char val[256];
107 char rt[256];
108 char wt[256];
109 char *m;
110 int r;
111 int c;
112 int p;
113 int mod;
114 static int e_fd = -1;
115 ssize_t s;
116 ssize_t n;
117 struct dirent *ent;
118 int fd;
119 struct list_sample_data *sampledata;
120 struct ps_sched_struct *ps_prev = NULL;
121 int procfd;
122 int taskfd = -1;
123
124 sampledata = *ptr;
125
126 procfd = dirfd(proc);
127 if (procfd < 0)
128 return -errno;
129
130 if (vmstat < 0) {
131 /* block stuff */
132 vmstat = openat(procfd, "vmstat", O_RDONLY|O_CLOEXEC);
133 if (vmstat < 0)
134 return log_error_errno(errno, "Failed to open /proc/vmstat: %m");
135 }
136
137 n = pread(vmstat, buf, sizeof(buf) - 1, 0);
138 if (n <= 0) {
139 vmstat = safe_close(vmstat);
140 if (n < 0)
141 return -errno;
142 return -ENODATA;
143 }
144
145 buf[n] = '\0';
146
147 m = buf;
148 while (m) {
149 if (sscanf(m, "%s %s", key, val) < 2)
150 goto vmstat_next;
151 if (streq(key, "pgpgin"))
152 sampledata->blockstat.bi = atoi(val);
153 if (streq(key, "pgpgout")) {
154 sampledata->blockstat.bo = atoi(val);
155 break;
156 }
157 vmstat_next:
158 m = bufgetline(m);
159 if (!m)
160 break;
161 }
162
163 /* Parse "/proc/schedstat" for overall CPU utilization */
164 r = read_full_file("/proc/schedstat", &buf_schedstat, NULL);
165 if (r < 0)
166 return log_error_errno(r, "Unable to read schedstat: %m");
167
168 m = buf_schedstat;
169 while (m) {
170 if (sscanf(m, "%s %*s %*s %*s %*s %*s %*s %s %s", key, rt, wt) < 3)
171 goto schedstat_next;
172
173 if (strstr(key, "cpu")) {
174 r = safe_atoi((const char*)(key+3), &c);
175 if (r < 0 || c > MAXCPUS -1)
176 /* Oops, we only have room for MAXCPUS data */
177 break;
178 sampledata->runtime[c] = atoll(rt);
179 sampledata->waittime[c] = atoll(wt);
180
181 if (c == *cpus)
182 *cpus = c + 1;
183 }
184 schedstat_next:
185 m = bufgetline(m);
186 if (!m)
187 break;
188 }
189
190 if (arg_entropy) {
191 if (e_fd < 0) {
192 e_fd = openat(procfd, "sys/kernel/random/entropy_avail", O_RDONLY|O_CLOEXEC);
193 if (e_fd < 0)
194 return log_error_errno(errno, "Failed to open /proc/sys/kernel/random/entropy_avail: %m");
195 }
196
197 n = pread(e_fd, buf, sizeof(buf) - 1, 0);
198 if (n <= 0) {
199 e_fd = safe_close(e_fd);
200 } else {
201 buf[n] = '\0';
202 sampledata->entropy_avail = atoi(buf);
203 }
204 }
205
206 while ((ent = readdir(proc)) != NULL) {
207 char filename[PATH_MAX];
208 int pid;
209 struct ps_struct *ps;
210
211 if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9'))
212 continue;
213
214 pid = atoi(ent->d_name);
215
216 if (pid >= MAXPIDS)
217 continue;
218
219 ps = ps_first;
220 while (ps->next_ps) {
221 ps = ps->next_ps;
222 if (ps->pid == pid)
223 break;
224 }
225
226 /* end of our LL? then append a new record */
227 if (ps->pid != pid) {
228 _cleanup_fclose_ FILE *st = NULL;
229 char t[32];
230 struct ps_struct *parent;
231
232 ps->next_ps = new0(struct ps_struct, 1);
233 if (!ps->next_ps)
234 return log_oom();
235
236 ps = ps->next_ps;
237 ps->pid = pid;
238 ps->sched = -1;
239 ps->schedstat = -1;
240
241 ps->sample = new0(struct ps_sched_struct, 1);
242 if (!ps->sample)
243 return log_oom();
244
245 ps->sample->sampledata = sampledata;
246
247 (*pscount)++;
248
249 /* mark our first sample */
250 ps->first = ps->last = ps->sample;
251 ps->sample->runtime = atoll(rt);
252 ps->sample->waittime = atoll(wt);
253
254 /* get name, start time */
255 if (ps->sched < 0) {
256 sprintf(filename, "%d/sched", pid);
257 ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
258 if (ps->sched < 0)
259 continue;
260 }
261
262 s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
263 if (s <= 0) {
264 ps->sched = safe_close(ps->sched);
265 continue;
266 }
267 buf[s] = '\0';
268
269 if (!sscanf(buf, "%s %*s %*s", key))
270 continue;
271
272 strscpy(ps->name, sizeof(ps->name), key);
273
274 /* cmdline */
275 if (arg_show_cmdline)
276 pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid);
277
278 /* discard line 2 */
279 m = bufgetline(buf);
280 if (!m)
281 continue;
282
283 m = bufgetline(m);
284 if (!m)
285 continue;
286
287 if (!sscanf(m, "%*s %*s %s", t))
288 continue;
289
290 r = safe_atod(t, &ps->starttime);
291 if (r < 0)
292 continue;
293
294 ps->starttime /= 1000.0;
295
296 if (arg_show_cgroup)
297 /* if this fails, that's OK */
298 cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER,
299 ps->pid, &ps->cgroup);
300
301 /* ppid */
302 sprintf(filename, "%d/stat", pid);
303 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
304 if (fd < 0)
305 continue;
306
307 st = fdopen(fd, "re");
308 if (!st) {
309 close(fd);
310 continue;
311 }
312
313 if (!fscanf(st, "%*s %*s %*s %i", &p))
314 continue;
315
316 ps->ppid = p;
317
318 /*
319 * setup child pointers
320 *
321 * these are used to paint the tree coherently later
322 * each parent has a LL of children, and a LL of siblings
323 */
324 if (pid == 1)
325 continue; /* nothing to do for init atm */
326
327 /* kthreadd has ppid=0, which breaks our tree ordering */
328 if (ps->ppid == 0)
329 ps->ppid = 1;
330
331 parent = ps_first;
332 while ((parent->next_ps && parent->pid != ps->ppid))
333 parent = parent->next_ps;
334
335 if (parent->pid != ps->ppid) {
336 /* orphan */
337 ps->ppid = 1;
338 parent = ps_first->next_ps;
339 }
340
341 ps->parent = parent;
342
343 if (!parent->children) {
344 /* it's the first child */
345 parent->children = ps;
346 } else {
347 /* walk all children and append */
348 struct ps_struct *children;
349 children = parent->children;
350 while (children->next)
351 children = children->next;
352
353 children->next = ps;
354 }
355 }
356
357 /* else -> found pid, append data in ps */
358
359 /* below here is all continuous logging parts - we get here on every
360 * iteration */
361
362 /* rt, wt */
363 if (ps->schedstat < 0) {
364 sprintf(filename, "%d/schedstat", pid);
365 ps->schedstat = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
366 if (ps->schedstat < 0)
367 continue;
368 }
369
370 s = pread(ps->schedstat, buf, sizeof(buf) - 1, 0);
371 if (s <= 0) {
372 /* clean up our file descriptors - assume that the process exited */
373 close(ps->schedstat);
374 ps->schedstat = -1;
375 ps->sched = safe_close(ps->sched);
376 continue;
377 }
378
379 buf[s] = '\0';
380
381 if (!sscanf(buf, "%s %s %*s", rt, wt))
382 continue;
383
384 ps->sample->next = new0(struct ps_sched_struct, 1);
385 if (!ps->sample->next)
386 return log_oom();
387
388 ps->sample->next->prev = ps->sample;
389 ps->sample = ps->sample->next;
390 ps->last = ps->sample;
391 ps->sample->runtime = atoll(rt);
392 ps->sample->waittime = atoll(wt);
393 ps->sample->sampledata = sampledata;
394 ps->sample->ps_new = ps;
395 if (ps_prev)
396 ps_prev->cross = ps->sample;
397
398 ps_prev = ps->sample;
399 ps->total = (ps->last->runtime - ps->first->runtime)
400 / 1000000000.0;
401
402 /* Take into account CPU runtime/waittime spent in non-main threads of the process
403 * by parsing "/proc/[pid]/task/[tid]/schedstat" for all [tid] != [pid]
404 * See https://github.com/systemd/systemd/issues/139
405 */
406
407 /* Browse directory "/proc/[pid]/task" to know the thread ids of process [pid] */
408 snprintf(filename, sizeof(filename), PID_FMT "/task", pid);
409 taskfd = openat(procfd, filename, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
410 if (taskfd >= 0) {
411 _cleanup_closedir_ DIR *taskdir = NULL;
412
413 taskdir = fdopendir(taskfd);
414 if (!taskdir) {
415 safe_close(taskfd);
416 return -errno;
417 }
418 FOREACH_DIRENT(ent, taskdir, break) {
419 int tid = -1;
420 _cleanup_close_ int tid_schedstat = -1;
421 long long delta_rt;
422 long long delta_wt;
423
424 if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9'))
425 continue;
426
427 /* Skip main thread as it was already accounted */
428 r = safe_atoi(ent->d_name, &tid);
429 if (r < 0 || tid == pid)
430 continue;
431
432 /* Parse "/proc/[pid]/task/[tid]/schedstat" */
433 snprintf(filename, sizeof(filename), PID_FMT "/schedstat", tid);
434 tid_schedstat = openat(taskfd, filename, O_RDONLY|O_CLOEXEC);
435
436 if (tid_schedstat == -1)
437 continue;
438
439 s = pread(tid_schedstat, buf, sizeof(buf) - 1, 0);
440 if (s <= 0)
441 continue;
442 buf[s] = '\0';
443
444 if (!sscanf(buf, "%s %s %*s", rt, wt))
445 continue;
446
447 r = safe_atolli(rt, &delta_rt);
448 if (r < 0)
449 continue;
450 r = safe_atolli(rt, &delta_wt);
451 if (r < 0)
452 continue;
453 ps->sample->runtime += delta_rt;
454 ps->sample->waittime += delta_wt;
455 }
456 }
457
458 if (!arg_pss)
459 goto catch_rename;
460
461 /* Pss */
462 if (!ps->smaps) {
463 sprintf(filename, "%d/smaps", pid);
464 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
465 if (fd < 0)
466 continue;
467 ps->smaps = fdopen(fd, "re");
468 if (!ps->smaps) {
469 close(fd);
470 continue;
471 }
472 setvbuf(ps->smaps, smaps_buf, _IOFBF, sizeof(smaps_buf));
473 } else {
474 rewind(ps->smaps);
475 }
476
477 /* test to see if we need to skip another field */
478 if (skip == 0) {
479 if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
480 continue;
481 }
482 if (fread(buf, 1, 28 * 15, ps->smaps) != (28 * 15)) {
483 continue;
484 }
485 if (buf[392] == 'V') {
486 skip = 2;
487 }
488 else {
489 skip = 1;
490 }
491 rewind(ps->smaps);
492 }
493
494 while (1) {
495 int pss_kb;
496
497 /* skip one line, this contains the object mapped. */
498 if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
499 break;
500 }
501 /* then there's a 28 char 14 line block */
502 if (fread(buf, 1, 28 * 14, ps->smaps) != 28 * 14) {
503 break;
504 }
505 pss_kb = atoi(&buf[61]);
506 ps->sample->pss += pss_kb;
507
508 /* skip one more line if this is a newer kernel */
509 if (skip == 2) {
510 if (fgets(buf, sizeof(buf), ps->smaps) == NULL)
511 break;
512 }
513 }
514
515 if (ps->sample->pss > ps->pss_max)
516 ps->pss_max = ps->sample->pss;
517
518 catch_rename:
519 /* catch process rename, try to randomize time */
520 mod = (arg_hz < 4.0) ? 4.0 : (arg_hz / 4.0);
521 if (((sample - ps->pid) + pid) % (int)(mod) == 0) {
522
523 /* re-fetch name */
524 /* get name, start time */
525 if (ps->sched < 0) {
526 sprintf(filename, "%d/sched", pid);
527 ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
528 if (ps->sched < 0)
529 continue;
530 }
531
532 s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
533 if (s <= 0) {
534 /* clean up file descriptors */
535 ps->sched = safe_close(ps->sched);
536 ps->schedstat = safe_close(ps->schedstat);
537 continue;
538 }
539
540 buf[s] = '\0';
541
542 if (!sscanf(buf, "%s %*s %*s", key))
543 continue;
544
545 strscpy(ps->name, sizeof(ps->name), key);
546
547 /* cmdline */
548 if (arg_show_cmdline)
549 pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid);
550 }
551 }
552
553 return 0;
554 }