+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
/***
- log.c - This file is part of systemd-bootchart
+ This file is part of systemd.
- Copyright (C) 2009-2013 Intel Coproration
+ Copyright (C) 2009-2013 Intel Corporation
Authors:
Auke Kok <auke-jan.h.kok@intel.com>
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
-#define _GNU_SOURCE 1
-#include <unistd.h>
-#include <stdlib.h>
+#include <dirent.h>
+#include <fcntl.h>
#include <limits.h>
-#include <sys/types.h>
-#include <sys/stat.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
-#include <dirent.h>
-#include <fcntl.h>
#include <time.h>
-
+#include <unistd.h>
#include "bootchart.h"
+#include "cgroup-util.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "parse-util.h"
+#include "store.h"
+#include "string-util.h"
+#include "strxcpyx.h"
+#include "time-util.h"
#include "util.h"
/*
* read() overhead.
*/
static char smaps_buf[4096];
-DIR *proc;
-int procfd=-1;
+static int skip = 0;
-double gettime_ns(void)
-{
+double gettime_ns(void) {
struct timespec n;
clock_gettime(CLOCK_MONOTONIC, &n);
- return (n.tv_sec + (n.tv_nsec / 1000000000.0));
+ return (n.tv_sec + (n.tv_nsec / (double) NSEC_PER_SEC));
}
-
-void log_uptime(void)
-{
- FILE _cleanup_fclose_ *f = NULL;
- char str[32];
- double uptime;
-
- f = fopen("/proc/uptime", "r");
-
- if (!f)
- return;
- if (!fscanf(f, "%s %*s", str))
- return;
-
- uptime = strtod(str, NULL);
-
- log_start = gettime_ns();
-
- /* start graph at kernel boot time */
- if (relative)
- graph_start = log_start;
- else
- graph_start = log_start - uptime;
-}
-
-
-static char *bufgetline(char *buf)
-{
+static char *bufgetline(char *buf) {
char *c;
if (!buf)
c = strchr(buf, '\n');
if (c)
c++;
+
return c;
}
-static int pid_cmdline_strncpy(char *buffer, int pid, size_t buf_len) {
- char filename[PATH_MAX];
- int _cleanup_close_ fd=-1;
- ssize_t n;
+static int pid_cmdline_strscpy(int procfd, char *buffer, size_t buf_len, int pid) {
+ char filename[PATH_MAX];
+ _cleanup_close_ int fd = -1;
+ ssize_t n;
- sprintf(filename, "%d/cmdline", pid);
- fd = openat(procfd, filename, O_RDONLY);
- if (fd < 0)
- return -errno;
+ sprintf(filename, "%d/cmdline", pid);
+ fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
- n = read(fd, buffer, buf_len-1);
+ n = read(fd, buffer, buf_len-1);
if (n > 0) {
int i;
for (i = 0; i < n; i++)
buffer[i] = ' ';
buffer[n] = '\0';
}
- return 0;
+
+ return 0;
}
-void log_sample(int sample)
-{
- static int vmstat;
- static int schedstat;
- char buf[4095];
+int log_sample(DIR *proc,
+ int sample,
+ struct ps_struct *ps_first,
+ struct list_sample_data **ptr,
+ int *pscount,
+ int *cpus) {
+
+ static int vmstat = -1;
+ _cleanup_free_ char *buf_schedstat = NULL;
+ char buf[4096];
char key[256];
char val[256];
char rt[256];
char wt[256];
char *m;
+ int r;
int c;
int p;
int mod;
- static int e_fd;
+ static int e_fd = -1;
ssize_t s;
ssize_t n;
struct dirent *ent;
int fd;
+ struct list_sample_data *sampledata;
+ struct ps_sched_struct *ps_prev = NULL;
+ int procfd;
+ int taskfd = -1;
- /* all the per-process stuff goes here */
- if (!proc) {
- /* find all processes */
- proc = opendir("/proc");
- if (!proc)
- return;
- procfd = dirfd(proc);
- } else {
- rewinddir(proc);
- }
+ sampledata = *ptr;
- if (!vmstat) {
+ procfd = dirfd(proc);
+ if (procfd < 0)
+ return -errno;
+
+ if (vmstat < 0) {
/* block stuff */
- vmstat = openat(procfd, "vmstat", O_RDONLY);
- if (vmstat == -1) {
- perror("open /proc/vmstat");
- exit (EXIT_FAILURE);
- }
+ vmstat = openat(procfd, "vmstat", O_RDONLY|O_CLOEXEC);
+ if (vmstat < 0)
+ return log_error_errno(errno, "Failed to open /proc/vmstat: %m");
}
n = pread(vmstat, buf, sizeof(buf) - 1, 0);
if (n <= 0) {
- close(vmstat);
- return;
+ vmstat = safe_close(vmstat);
+ if (n < 0)
+ return -errno;
+ return -ENODATA;
}
+
buf[n] = '\0';
m = buf;
if (sscanf(m, "%s %s", key, val) < 2)
goto vmstat_next;
if (streq(key, "pgpgin"))
- blockstat[sample].bi = atoi(val);
+ sampledata->blockstat.bi = atoi(val);
if (streq(key, "pgpgout")) {
- blockstat[sample].bo = atoi(val);
+ sampledata->blockstat.bo = atoi(val);
break;
}
vmstat_next:
break;
}
- if (!schedstat) {
- /* overall CPU utilization */
- schedstat = openat(procfd, "schedstat", O_RDONLY);
- if (schedstat == -1) {
- perror("open /proc/schedstat");
- exit (EXIT_FAILURE);
- }
- }
+ /* Parse "/proc/schedstat" for overall CPU utilization */
+ r = read_full_file("/proc/schedstat", &buf_schedstat, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Unable to read schedstat: %m");
- n = pread(schedstat, buf, sizeof(buf) - 1, 0);
- if (n <= 0) {
- close(schedstat);
- return;
- }
- buf[n] = '\0';
-
- m = buf;
+ m = buf_schedstat;
while (m) {
if (sscanf(m, "%s %*s %*s %*s %*s %*s %*s %s %s", key, rt, wt) < 3)
goto schedstat_next;
if (strstr(key, "cpu")) {
- c = atoi((const char*)(key+3));
- if (c > MAXCPUS)
+ r = safe_atoi((const char*)(key+3), &c);
+ if (r < 0 || c > MAXCPUS -1)
/* Oops, we only have room for MAXCPUS data */
break;
- cpustat[c].sample[sample].runtime = atoll(rt);
- cpustat[c].sample[sample].waittime = atoll(wt);
+ sampledata->runtime[c] = atoll(rt);
+ sampledata->waittime[c] = atoll(wt);
- if (c == cpus)
- cpus = c + 1;
+ if (c == *cpus)
+ *cpus = c + 1;
}
schedstat_next:
m = bufgetline(m);
break;
}
- if (entropy) {
- if (!e_fd) {
- e_fd = openat(procfd, "sys/kernel/random/entropy_avail", O_RDONLY);
+ if (arg_entropy) {
+ if (e_fd < 0) {
+ e_fd = openat(procfd, "sys/kernel/random/entropy_avail", O_RDONLY|O_CLOEXEC);
+ if (e_fd < 0)
+ return log_error_errno(errno, "Failed to open /proc/sys/kernel/random/entropy_avail: %m");
}
- if (e_fd) {
- n = pread(e_fd, buf, sizeof(buf) - 1, 0);
- if (n > 0) {
- buf[n] = '\0';
- entropy_avail[sample] = atoi(buf);
- }
+ n = pread(e_fd, buf, sizeof(buf) - 1, 0);
+ if (n <= 0) {
+ e_fd = safe_close(e_fd);
+ } else {
+ buf[n] = '\0';
+ sampledata->entropy_avail = atoi(buf);
}
}
/* end of our LL? then append a new record */
if (ps->pid != pid) {
- FILE _cleanup_fclose_ *st = NULL;
+ _cleanup_fclose_ FILE *st = NULL;
char t[32];
struct ps_struct *parent;
- ps->next_ps = calloc(1, sizeof(struct ps_struct));
- if (!ps->next_ps) {
- perror("calloc(ps_struct)");
- exit (EXIT_FAILURE);
- }
+ ps->next_ps = new0(struct ps_struct, 1);
+ if (!ps->next_ps)
+ return log_oom();
+
ps = ps->next_ps;
ps->pid = pid;
+ ps->sched = -1;
+ ps->schedstat = -1;
- ps->sample = calloc(samples_len + 1, sizeof(struct ps_sched_struct));
- if (!ps->sample) {
- perror("calloc(ps_struct)");
- exit (EXIT_FAILURE);
- }
+ ps->sample = new0(struct ps_sched_struct, 1);
+ if (!ps->sample)
+ return log_oom();
+
+ ps->sample->sampledata = sampledata;
- pscount++;
+ (*pscount)++;
/* mark our first sample */
- ps->first = sample;
+ ps->first = ps->last = ps->sample;
+ ps->sample->runtime = atoll(rt);
+ ps->sample->waittime = atoll(wt);
/* get name, start time */
- if (!ps->sched) {
+ if (ps->sched < 0) {
sprintf(filename, "%d/sched", pid);
- ps->sched = openat(procfd, filename, O_RDONLY);
- if (ps->sched == -1)
+ ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
+ if (ps->sched < 0)
continue;
}
s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
if (s <= 0) {
- close(ps->sched);
+ ps->sched = safe_close(ps->sched);
continue;
}
buf[s] = '\0';
if (!sscanf(buf, "%s %*s %*s", key))
continue;
- strncpy(ps->name, key, 256);
+ strscpy(ps->name, sizeof(ps->name), key);
/* cmdline */
- if (show_cmdline)
- pid_cmdline_strncpy(ps->name, pid, 256);
+ if (arg_show_cmdline)
+ pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid);
/* discard line 2 */
m = bufgetline(buf);
if (!sscanf(m, "%*s %*s %s", t))
continue;
- ps->starttime = strtod(t, NULL) / 1000.0;
+ r = safe_atod(t, &ps->starttime);
+ if (r < 0)
+ continue;
+
+ ps->starttime /= 1000.0;
+
+ if (arg_show_cgroup)
+ /* if this fails, that's OK */
+ cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER,
+ ps->pid, &ps->cgroup);
/* ppid */
sprintf(filename, "%d/stat", pid);
- fd = openat(procfd, filename, O_RDONLY);
- st = fdopen(fd, "r");
- if (!st)
+ fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
continue;
- if (!fscanf(st, "%*s %*s %*s %i", &p)) {
+
+ st = fdopen(fd, "re");
+ if (!st) {
+ close(fd);
continue;
}
+
+ if (!fscanf(st, "%*s %*s %*s %i", &p))
+ continue;
+
ps->ppid = p;
/*
while ((parent->next_ps && parent->pid != ps->ppid))
parent = parent->next_ps;
- if ((!parent) || (parent->pid != ps->ppid)) {
+ if (parent->pid != ps->ppid) {
/* orphan */
ps->ppid = 1;
parent = ps_first->next_ps;
children = parent->children;
while (children->next)
children = children->next;
+
children->next = ps;
}
}
* iteration */
/* rt, wt */
- if (!ps->schedstat) {
+ if (ps->schedstat < 0) {
sprintf(filename, "%d/schedstat", pid);
- ps->schedstat = openat(procfd, filename, O_RDONLY);
- if (ps->schedstat == -1)
+ ps->schedstat = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
+ if (ps->schedstat < 0)
continue;
}
+
s = pread(ps->schedstat, buf, sizeof(buf) - 1, 0);
if (s <= 0) {
/* clean up our file descriptors - assume that the process exited */
close(ps->schedstat);
- if (ps->sched)
- close(ps->sched);
- //if (ps->smaps)
- // fclose(ps->smaps);
+ ps->schedstat = -1;
+ ps->sched = safe_close(ps->sched);
continue;
}
+
buf[s] = '\0';
if (!sscanf(buf, "%s %s %*s", rt, wt))
continue;
- ps->last = sample;
- ps->sample[sample].runtime = atoll(rt);
- ps->sample[sample].waittime = atoll(wt);
+ ps->sample->next = new0(struct ps_sched_struct, 1);
+ if (!ps->sample->next)
+ return log_oom();
+
+ ps->sample->next->prev = ps->sample;
+ ps->sample = ps->sample->next;
+ ps->last = ps->sample;
+ ps->sample->runtime = atoll(rt);
+ ps->sample->waittime = atoll(wt);
+ ps->sample->sampledata = sampledata;
+ ps->sample->ps_new = ps;
+ if (ps_prev)
+ ps_prev->cross = ps->sample;
+
+ ps_prev = ps->sample;
+ ps->total = (ps->last->runtime - ps->first->runtime)
+ / 1000000000.0;
+
+ /* Take into account CPU runtime/waittime spent in non-main threads of the process
+ * by parsing "/proc/[pid]/task/[tid]/schedstat" for all [tid] != [pid]
+ * See https://github.com/systemd/systemd/issues/139
+ */
+
+ /* Browse directory "/proc/[pid]/task" to know the thread ids of process [pid] */
+ snprintf(filename, sizeof(filename), PID_FMT "/task", pid);
+ taskfd = openat(procfd, filename, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+ if (taskfd >= 0) {
+ _cleanup_closedir_ DIR *taskdir = NULL;
+
+ taskdir = fdopendir(taskfd);
+ if (!taskdir) {
+ safe_close(taskfd);
+ return -errno;
+ }
+ FOREACH_DIRENT(ent, taskdir, break) {
+ int tid = -1;
+ _cleanup_close_ int tid_schedstat = -1;
+ long long delta_rt;
+ long long delta_wt;
+
+ if ((ent->d_name[0] < '0') || (ent->d_name[0] > '9'))
+ continue;
- ps->total = (ps->sample[ps->last].runtime
- - ps->sample[ps->first].runtime)
- / 1000000000.0;
+ /* Skip main thread as it was already accounted */
+ r = safe_atoi(ent->d_name, &tid);
+ if (r < 0 || tid == pid)
+ continue;
+
+ /* Parse "/proc/[pid]/task/[tid]/schedstat" */
+ snprintf(filename, sizeof(filename), PID_FMT "/schedstat", tid);
+ tid_schedstat = openat(taskfd, filename, O_RDONLY|O_CLOEXEC);
+
+ if (tid_schedstat == -1)
+ continue;
+
+ s = pread(tid_schedstat, buf, sizeof(buf) - 1, 0);
+ if (s <= 0)
+ continue;
+ buf[s] = '\0';
+
+ if (!sscanf(buf, "%s %s %*s", rt, wt))
+ continue;
+
+ r = safe_atolli(rt, &delta_rt);
+ if (r < 0)
+ continue;
+ r = safe_atolli(rt, &delta_wt);
+ if (r < 0)
+ continue;
+ ps->sample->runtime += delta_rt;
+ ps->sample->waittime += delta_wt;
+ }
+ }
- if (!pss)
+ if (!arg_pss)
goto catch_rename;
+
/* Pss */
if (!ps->smaps) {
sprintf(filename, "%d/smaps", pid);
- fd = openat(procfd, filename, O_RDONLY);
- ps->smaps = fdopen(fd, "r");
- if (!ps->smaps)
+ fd = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
+ if (fd < 0)
+ continue;
+ ps->smaps = fdopen(fd, "re");
+ if (!ps->smaps) {
+ close(fd);
continue;
+ }
setvbuf(ps->smaps, smaps_buf, _IOFBF, sizeof(smaps_buf));
} else {
rewind(ps->smaps);
}
+ /* test to see if we need to skip another field */
+ if (skip == 0) {
+ if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
+ continue;
+ }
+ if (fread(buf, 1, 28 * 15, ps->smaps) != (28 * 15)) {
+ continue;
+ }
+ if (buf[392] == 'V') {
+ skip = 2;
+ }
+ else {
+ skip = 1;
+ }
+ rewind(ps->smaps);
+ }
+
while (1) {
int pss_kb;
- /* skip one line, this contains the object mapped */
- if (fgets(buf, sizeof(buf), ps->smaps) == NULL)
+ /* skip one line, this contains the object mapped. */
+ if (fgets(buf, sizeof(buf), ps->smaps) == NULL) {
break;
+ }
/* then there's a 28 char 14 line block */
- if (fread(buf, 1, 28 * 14, ps->smaps) != 28 * 14)
+ if (fread(buf, 1, 28 * 14, ps->smaps) != 28 * 14) {
break;
-
+ }
pss_kb = atoi(&buf[61]);
- ps->sample[sample].pss += pss_kb;
+ ps->sample->pss += pss_kb;
+
+ /* skip one more line if this is a newer kernel */
+ if (skip == 2) {
+ if (fgets(buf, sizeof(buf), ps->smaps) == NULL)
+ break;
+ }
}
- if (ps->sample[sample].pss > ps->pss_max)
- ps->pss_max = ps->sample[sample].pss;
+ if (ps->sample->pss > ps->pss_max)
+ ps->pss_max = ps->sample->pss;
catch_rename:
/* catch process rename, try to randomize time */
- mod = (hz < 4.0) ? 4.0 : (hz / 4.0);
- if (((samples - ps->first) + pid) % (int)(mod) == 0) {
+ mod = (arg_hz < 4.0) ? 4.0 : (arg_hz / 4.0);
+ if (((sample - ps->pid) + pid) % (int)(mod) == 0) {
/* re-fetch name */
/* get name, start time */
- if (!ps->sched) {
+ if (ps->sched < 0) {
sprintf(filename, "%d/sched", pid);
- ps->sched = openat(procfd, filename, O_RDONLY);
- if (ps->sched == -1)
+ ps->sched = openat(procfd, filename, O_RDONLY|O_CLOEXEC);
+ if (ps->sched < 0)
continue;
}
+
s = pread(ps->sched, buf, sizeof(buf) - 1, 0);
if (s <= 0) {
/* clean up file descriptors */
- close(ps->sched);
- if (ps->schedstat)
- close(ps->schedstat);
- //if (ps->smaps)
- // fclose(ps->smaps);
+ ps->sched = safe_close(ps->sched);
+ ps->schedstat = safe_close(ps->schedstat);
continue;
}
+
buf[s] = '\0';
if (!sscanf(buf, "%s %*s %*s", key))
continue;
- strncpy(ps->name, key, 256);
+ strscpy(ps->name, sizeof(ps->name), key);
/* cmdline */
- if (show_cmdline)
- pid_cmdline_strncpy(ps->name, pid, 256);
+ if (arg_show_cmdline)
+ pid_cmdline_strscpy(procfd, ps->name, sizeof(ps->name), pid);
}
}
+
+ return 0;
}