]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/bootchart/store.c
util-lib: split our string related calls from util.[ch] into its own file string...
[thirdparty/systemd.git] / src / bootchart / store.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright (C) 2009-2013 Intel Corporation
7
8 Authors:
9 Auke Kok <auke-jan.h.kok@intel.com>
10
11 systemd is free software; you can redistribute it and/or modify it
12 under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 2.1 of the License, or
14 (at your option) any later version.
15
16 systemd is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
20
21 You should have received a copy of the GNU Lesser General Public License
22 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 ***/
24
25 #include <dirent.h>
26 #include <fcntl.h>
27 #include <limits.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <time.h>
32 #include <unistd.h>
33
34 #include "bootchart.h"
35 #include "cgroup-util.h"
36 #include "fileio.h"
37 #include "string-util.h"
38 #include "strxcpyx.h"
39 #include "time-util.h"
40 #include "util.h"
41 #include "store.h"
42
43 /*
44 * Alloc a static 4k buffer for stdio - primarily used to increase
45 * PSS buffering from the default 1k stdin buffer to reduce
46 * read() overhead.
47 */
48 static char smaps_buf[4096];
49 static int skip = 0;
50
51 double gettime_ns(void) {
52 struct timespec n;
53
54 clock_gettime(CLOCK_MONOTONIC, &n);
55
56 return (n.tv_sec + (n.tv_nsec / (double) NSEC_PER_SEC));
57 }
58
59 static char *bufgetline(char *buf) {
60 char *c;
61
62 if (!buf)
63 return NULL;
64
65 c = strchr(buf, '\n');
66 if (c)
67 c++;
68
69 return c;
70 }
71
72 static int pid_cmdline_strscpy(int procfd, char *buffer, size_t buf_len, int pid) {
73 char filename[PATH_MAX];
74 _cleanup_close_ int fd = -1;
75 ssize_t n;
76
77 sprintf(filename, "%d/cmdline", pid);
78 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
79 if (fd < 0)
80 return -errno;
81
82 n = read(fd, buffer, buf_len-1);
83 if (n > 0) {
84 int i;
85 for (i = 0; i < n; i++)
86 if (buffer[i] == '\0')
87 buffer[i] = ' ';
88 buffer[n] = '\0';
89 }
90
91 return 0;
92 }
93
94 int log_sample(DIR *proc,
95 int sample,
96 struct ps_struct *ps_first,
97 struct list_sample_data **ptr,
98 int *pscount,
99 int *cpus) {
100
101 static int vmstat = -1;
102 _cleanup_free_ char *buf_schedstat = NULL;
103 char buf[4096];
104 char key[256];
105 char val[256];
106 char rt[256];
107 char wt[256];
108 char *m;
109 int r;
110 int c;
111 int p;
112 int mod;
113 static int e_fd = -1;
114 ssize_t s;
115 ssize_t n;
116 struct dirent *ent;
117 int fd;
118 struct list_sample_data *sampledata;
119 struct ps_sched_struct *ps_prev = NULL;
120 int procfd;
121 int taskfd = -1;
122
123 sampledata = *ptr;
124
125 procfd = dirfd(proc);
126 if (procfd < 0)
127 return -errno;
128
129 if (vmstat < 0) {
130 /* block stuff */
131 vmstat = openat(procfd, "vmstat", O_RDONLY|O_CLOEXEC);
132 if (vmstat < 0)
133 return log_error_errno(errno, "Failed to open /proc/vmstat: %m");
134 }
135
136 n = pread(vmstat, buf, sizeof(buf) - 1, 0);
137 if (n <= 0) {
138 vmstat = safe_close(vmstat);
139 if (n < 0)
140 return -errno;
141 return -ENODATA;
142 }
143
144 buf[n] = '\0';
145
146 m = buf;
147 while (m) {
148 if (sscanf(m, "%s %s", key, val) < 2)
149 goto vmstat_next;
150 if (streq(key, "pgpgin"))
151 sampledata->blockstat.bi = atoi(val);
152 if (streq(key, "pgpgout")) {
153 sampledata->blockstat.bo = atoi(val);
154 break;
155 }
156 vmstat_next:
157 m = bufgetline(m);
158 if (!m)
159 break;
160 }
161
162 /* Parse "/proc/schedstat" for overall CPU utilization */
163 r = read_full_file("/proc/schedstat", &buf_schedstat, NULL);
164 if (r < 0)
165 return log_error_errno(r, "Unable to read schedstat: %m");
166
167 m = buf_schedstat;
168 while (m) {
169 if (sscanf(m, "%s %*s %*s %*s %*s %*s %*s %s %s", key, rt, wt) < 3)
170 goto schedstat_next;
171
172 if (strstr(key, "cpu")) {
173 r = safe_atoi((const char*)(key+3), &c);
174 if (r < 0 || c > MAXCPUS -1)
175 /* Oops, we only have room for MAXCPUS data */
176 break;
177 sampledata->runtime[c] = atoll(rt);
178 sampledata->waittime[c] = atoll(wt);
179
180 if (c == *cpus)
181 *cpus = c + 1;
182 }
183 schedstat_next:
184 m = bufgetline(m);
185 if (!m)
186 break;
187 }
188
189 if (arg_entropy) {
190 if (e_fd < 0) {
191 e_fd = openat(procfd, "sys/kernel/random/entropy_avail", O_RDONLY|O_CLOEXEC);
192 if (e_fd < 0)
193 return log_error_errno(errno, "Failed to open /proc/sys/kernel/random/entropy_avail: %m");
194 }
195
196 n = pread(e_fd, buf, sizeof(buf) - 1, 0);
197 if (n <= 0) {
198 e_fd = safe_close(e_fd);
199 } else {
200 buf[n] = '\0';
201 sampledata->entropy_avail = atoi(buf);
202 }
203 }
204
205 while ((ent = readdir(proc)) != NULL) {
206 char filename[PATH_MAX];
207 int pid;
208 struct ps_struct *ps;
209
210 if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9'))
211 continue;
212
213 pid = atoi(ent->d_name);
214
215 if (pid >= MAXPIDS)
216 continue;
217
218 ps = ps_first;
219 while (ps->next_ps) {
220 ps = ps->next_ps;
221 if (ps->pid == pid)
222 break;
223 }
224
225 /* end of our LL? then append a new record */
226 if (ps->pid != pid) {
227 _cleanup_fclose_ FILE *st = NULL;
228 char t[32];
229 struct ps_struct *parent;
230
231 ps->next_ps = new0(struct ps_struct, 1);
232 if (!ps->next_ps)
233 return log_oom();
234
235 ps = ps->next_ps;
236 ps->pid = pid;
237 ps->sched = -1;
238 ps->schedstat = -1;
239
240 ps->sample = new0(struct ps_sched_struct, 1);
241 if (!ps->sample)
242 return log_oom();
243
244 ps->sample->sampledata = sampledata;
245
246 (*pscount)++;
247
248 /* mark our first sample */
249 ps->first = ps->last = ps->sample;
250 ps->sample->runtime = atoll(rt);
251 ps->sample->waittime = atoll(wt);
252
253 /* get name, start time */
254 if (ps->sched < 0) {
255 sprintf(filename, "%d/sched", pid);
256 ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
257 if (ps->sched < 0)
258 continue;
259 }
260
261 s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
262 if (s <= 0) {
263 ps->sched = safe_close(ps->sched);
264 continue;
265 }
266 buf[s] = '\0';
267
268 if (!sscanf(buf, "%s %*s %*s", key))
269 continue;
270
271 strscpy(ps->name, sizeof(ps->name), key);
272
273 /* cmdline */
274 if (arg_show_cmdline)
275 pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid);
276
277 /* discard line 2 */
278 m = bufgetline(buf);
279 if (!m)
280 continue;
281
282 m = bufgetline(m);
283 if (!m)
284 continue;
285
286 if (!sscanf(m, "%*s %*s %s", t))
287 continue;
288
289 r = safe_atod(t, &ps->starttime);
290 if (r < 0)
291 continue;
292
293 ps->starttime /= 1000.0;
294
295 if (arg_show_cgroup)
296 /* if this fails, that's OK */
297 cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER,
298 ps->pid, &ps->cgroup);
299
300 /* ppid */
301 sprintf(filename, "%d/stat", pid);
302 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
303 if (fd < 0)
304 continue;
305
306 st = fdopen(fd, "re");
307 if (!st) {
308 close(fd);
309 continue;
310 }
311
312 if (!fscanf(st, "%*s %*s %*s %i", &p))
313 continue;
314
315 ps->ppid = p;
316
317 /*
318 * setup child pointers
319 *
320 * these are used to paint the tree coherently later
321 * each parent has a LL of children, and a LL of siblings
322 */
323 if (pid == 1)
324 continue; /* nothing to do for init atm */
325
326 /* kthreadd has ppid=0, which breaks our tree ordering */
327 if (ps->ppid == 0)
328 ps->ppid = 1;
329
330 parent = ps_first;
331 while ((parent->next_ps && parent->pid != ps->ppid))
332 parent = parent->next_ps;
333
334 if (parent->pid != ps->ppid) {
335 /* orphan */
336 ps->ppid = 1;
337 parent = ps_first->next_ps;
338 }
339
340 ps->parent = parent;
341
342 if (!parent->children) {
343 /* it's the first child */
344 parent->children = ps;
345 } else {
346 /* walk all children and append */
347 struct ps_struct *children;
348 children = parent->children;
349 while (children->next)
350 children = children->next;
351
352 children->next = ps;
353 }
354 }
355
356 /* else -> found pid, append data in ps */
357
358 /* below here is all continuous logging parts - we get here on every
359 * iteration */
360
361 /* rt, wt */
362 if (ps->schedstat < 0) {
363 sprintf(filename, "%d/schedstat", pid);
364 ps->schedstat = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
365 if (ps->schedstat < 0)
366 continue;
367 }
368
369 s = pread(ps->schedstat, buf, sizeof(buf) - 1, 0);
370 if (s <= 0) {
371 /* clean up our file descriptors - assume that the process exited */
372 close(ps->schedstat);
373 ps->schedstat = -1;
374 ps->sched = safe_close(ps->sched);
375 continue;
376 }
377
378 buf[s] = '\0';
379
380 if (!sscanf(buf, "%s %s %*s", rt, wt))
381 continue;
382
383 ps->sample->next = new0(struct ps_sched_struct, 1);
384 if (!ps->sample->next)
385 return log_oom();
386
387 ps->sample->next->prev = ps->sample;
388 ps->sample = ps->sample->next;
389 ps->last = ps->sample;
390 ps->sample->runtime = atoll(rt);
391 ps->sample->waittime = atoll(wt);
392 ps->sample->sampledata = sampledata;
393 ps->sample->ps_new = ps;
394 if (ps_prev)
395 ps_prev->cross = ps->sample;
396
397 ps_prev = ps->sample;
398 ps->total = (ps->last->runtime - ps->first->runtime)
399 / 1000000000.0;
400
401 /* Take into account CPU runtime/waittime spent in non-main threads of the process
402 * by parsing "/proc/[pid]/task/[tid]/schedstat" for all [tid] != [pid]
403 * See https://github.com/systemd/systemd/issues/139
404 */
405
406 /* Browse directory "/proc/[pid]/task" to know the thread ids of process [pid] */
407 snprintf(filename, sizeof(filename), PID_FMT "/task", pid);
408 taskfd = openat(procfd, filename, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
409 if (taskfd >= 0) {
410 _cleanup_closedir_ DIR *taskdir = NULL;
411
412 taskdir = fdopendir(taskfd);
413 if (!taskdir) {
414 safe_close(taskfd);
415 return -errno;
416 }
417 FOREACH_DIRENT(ent, taskdir, break) {
418 int tid = -1;
419 _cleanup_close_ int tid_schedstat = -1;
420 long long delta_rt;
421 long long delta_wt;
422
423 if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9'))
424 continue;
425
426 /* Skip main thread as it was already accounted */
427 r = safe_atoi(ent->d_name, &tid);
428 if (r < 0 || tid == pid)
429 continue;
430
431 /* Parse "/proc/[pid]/task/[tid]/schedstat" */
432 snprintf(filename, sizeof(filename), PID_FMT "/schedstat", tid);
433 tid_schedstat = openat(taskfd, filename, O_RDONLY|O_CLOEXEC);
434
435 if (tid_schedstat == -1)
436 continue;
437
438 s = pread(tid_schedstat, buf, sizeof(buf) - 1, 0);
439 if (s <= 0)
440 continue;
441 buf[s] = '\0';
442
443 if (!sscanf(buf, "%s %s %*s", rt, wt))
444 continue;
445
446 r = safe_atolli(rt, &delta_rt);
447 if (r < 0)
448 continue;
449 r = safe_atolli(rt, &delta_wt);
450 if (r < 0)
451 continue;
452 ps->sample->runtime += delta_rt;
453 ps->sample->waittime += delta_wt;
454 }
455 }
456
457 if (!arg_pss)
458 goto catch_rename;
459
460 /* Pss */
461 if (!ps->smaps) {
462 sprintf(filename, "%d/smaps", pid);
463 fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
464 if (fd < 0)
465 continue;
466 ps->smaps = fdopen(fd, "re");
467 if (!ps->smaps) {
468 close(fd);
469 continue;
470 }
471 setvbuf(ps->smaps, smaps_buf, _IOFBF, sizeof(smaps_buf));
472 } else {
473 rewind(ps->smaps);
474 }
475
476 /* test to see if we need to skip another field */
477 if (skip == 0) {
478 if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
479 continue;
480 }
481 if (fread(buf, 1, 28 * 15, ps->smaps) != (28 * 15)) {
482 continue;
483 }
484 if (buf[392] == 'V') {
485 skip = 2;
486 }
487 else {
488 skip = 1;
489 }
490 rewind(ps->smaps);
491 }
492
493 while (1) {
494 int pss_kb;
495
496 /* skip one line, this contains the object mapped. */
497 if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
498 break;
499 }
500 /* then there's a 28 char 14 line block */
501 if (fread(buf, 1, 28 * 14, ps->smaps) != 28 * 14) {
502 break;
503 }
504 pss_kb = atoi(&buf[61]);
505 ps->sample->pss += pss_kb;
506
507 /* skip one more line if this is a newer kernel */
508 if (skip == 2) {
509 if (fgets(buf, sizeof(buf), ps->smaps) == NULL)
510 break;
511 }
512 }
513
514 if (ps->sample->pss > ps->pss_max)
515 ps->pss_max = ps->sample->pss;
516
517 catch_rename:
518 /* catch process rename, try to randomize time */
519 mod = (arg_hz < 4.0) ? 4.0 : (arg_hz / 4.0);
520 if (((sample - ps->pid) + pid) % (int)(mod) == 0) {
521
522 /* re-fetch name */
523 /* get name, start time */
524 if (ps->sched < 0) {
525 sprintf(filename, "%d/sched", pid);
526 ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
527 if (ps->sched < 0)
528 continue;
529 }
530
531 s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
532 if (s <= 0) {
533 /* clean up file descriptors */
534 ps->sched = safe_close(ps->sched);
535 ps->schedstat = safe_close(ps->schedstat);
536 continue;
537 }
538
539 buf[s] = '\0';
540
541 if (!sscanf(buf, "%s %*s %*s", key))
542 continue;
543
544 strscpy(ps->name, sizeof(ps->name), key);
545
546 /* cmdline */
547 if (arg_show_cmdline)
548 pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid);
549 }
550 }
551
552 return 0;
553 }