]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-kmsg.c
core: still make progress when throttling the manager loop
[thirdparty/systemd.git] / src / journal / journald-kmsg.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <fcntl.h>
23 #include <sys/epoll.h>
24 #include <sys/mman.h>
25 #include <sys/socket.h>
26 #include <unistd.h>
27
28 #include "libudev.h"
29 #include "sd-messages.h"
30
31 #include "escape.h"
32 #include "fd-util.h"
33 #include "formats-util.h"
34 #include "io-util.h"
35 #include "journald-kmsg.h"
36 #include "journald-server.h"
37 #include "journald-syslog.h"
38 #include "parse-util.h"
39 #include "process-util.h"
40 #include "stdio-util.h"
41 #include "string-util.h"
42
43 void server_forward_kmsg(
44 Server *s,
45 int priority,
46 const char *identifier,
47 const char *message,
48 const struct ucred *ucred) {
49
50 struct iovec iovec[5];
51 char header_priority[DECIMAL_STR_MAX(priority) + 3],
52 header_pid[sizeof("[]: ")-1 + DECIMAL_STR_MAX(pid_t) + 1];
53 int n = 0;
54 char *ident_buf = NULL;
55
56 assert(s);
57 assert(priority >= 0);
58 assert(priority <= 999);
59 assert(message);
60
61 if (_unlikely_(LOG_PRI(priority) > s->max_level_kmsg))
62 return;
63
64 if (_unlikely_(s->dev_kmsg_fd < 0))
65 return;
66
67 /* Never allow messages with kernel facility to be written to
68 * kmsg, regardless where the data comes from. */
69 priority = syslog_fixup_facility(priority);
70
71 /* First: priority field */
72 xsprintf(header_priority, "<%i>", priority);
73 IOVEC_SET_STRING(iovec[n++], header_priority);
74
75 /* Second: identifier and PID */
76 if (ucred) {
77 if (!identifier) {
78 get_process_comm(ucred->pid, &ident_buf);
79 identifier = ident_buf;
80 }
81
82 xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
83
84 if (identifier)
85 IOVEC_SET_STRING(iovec[n++], identifier);
86
87 IOVEC_SET_STRING(iovec[n++], header_pid);
88 } else if (identifier) {
89 IOVEC_SET_STRING(iovec[n++], identifier);
90 IOVEC_SET_STRING(iovec[n++], ": ");
91 }
92
93 /* Fourth: message */
94 IOVEC_SET_STRING(iovec[n++], message);
95 IOVEC_SET_STRING(iovec[n++], "\n");
96
97 if (writev(s->dev_kmsg_fd, iovec, n) < 0)
98 log_debug_errno(errno, "Failed to write to /dev/kmsg for logging: %m");
99
100 free(ident_buf);
101 }
102
103 static bool is_us(const char *pid) {
104 pid_t t;
105
106 assert(pid);
107
108 if (parse_pid(pid, &t) < 0)
109 return false;
110
111 return t == getpid();
112 }
113
114 static void dev_kmsg_record(Server *s, const char *p, size_t l) {
115 struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS + 2 + N_IOVEC_UDEV_FIELDS];
116 char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
117 int priority, r;
118 unsigned n = 0, z = 0, j;
119 unsigned long long usec;
120 char *identifier = NULL, *pid = NULL, *e, *f, *k;
121 uint64_t serial;
122 size_t pl;
123 char *kernel_device = NULL;
124
125 assert(s);
126 assert(p);
127
128 if (l <= 0)
129 return;
130
131 e = memchr(p, ',', l);
132 if (!e)
133 return;
134 *e = 0;
135
136 r = safe_atoi(p, &priority);
137 if (r < 0 || priority < 0 || priority > 999)
138 return;
139
140 if (s->forward_to_kmsg && (priority & LOG_FACMASK) != LOG_KERN)
141 return;
142
143 l -= (e - p) + 1;
144 p = e + 1;
145 e = memchr(p, ',', l);
146 if (!e)
147 return;
148 *e = 0;
149
150 r = safe_atou64(p, &serial);
151 if (r < 0)
152 return;
153
154 if (s->kernel_seqnum) {
155 /* We already read this one? */
156 if (serial < *s->kernel_seqnum)
157 return;
158
159 /* Did we lose any? */
160 if (serial > *s->kernel_seqnum)
161 server_driver_message(s, SD_MESSAGE_JOURNAL_MISSED, "Missed %"PRIu64" kernel messages",
162 serial - *s->kernel_seqnum);
163
164 /* Make sure we never read this one again. Note that
165 * we always store the next message serial we expect
166 * here, simply because this makes handling the first
167 * message with serial 0 easy. */
168 *s->kernel_seqnum = serial + 1;
169 }
170
171 l -= (e - p) + 1;
172 p = e + 1;
173 f = memchr(p, ';', l);
174 if (!f)
175 return;
176 /* Kernel 3.6 has the flags field, kernel 3.5 lacks that */
177 e = memchr(p, ',', l);
178 if (!e || f < e)
179 e = f;
180 *e = 0;
181
182 r = safe_atollu(p, &usec);
183 if (r < 0)
184 return;
185
186 l -= (f - p) + 1;
187 p = f + 1;
188 e = memchr(p, '\n', l);
189 if (!e)
190 return;
191 *e = 0;
192
193 pl = e - p;
194 l -= (e - p) + 1;
195 k = e + 1;
196
197 for (j = 0; l > 0 && j < N_IOVEC_KERNEL_FIELDS; j++) {
198 char *m;
199 /* Metadata fields attached */
200
201 if (*k != ' ')
202 break;
203
204 k ++, l --;
205
206 e = memchr(k, '\n', l);
207 if (!e)
208 return;
209
210 *e = 0;
211
212 if (cunescape_length_with_prefix(k, e - k, "_KERNEL_", UNESCAPE_RELAX, &m) < 0)
213 break;
214
215 if (startswith(m, "_KERNEL_DEVICE="))
216 kernel_device = m + 15;
217
218 IOVEC_SET_STRING(iovec[n++], m);
219 z++;
220
221 l -= (e - k) + 1;
222 k = e + 1;
223 }
224
225 if (kernel_device) {
226 struct udev_device *ud;
227
228 ud = udev_device_new_from_device_id(s->udev, kernel_device);
229 if (ud) {
230 const char *g;
231 struct udev_list_entry *ll;
232 char *b;
233
234 g = udev_device_get_devnode(ud);
235 if (g) {
236 b = strappend("_UDEV_DEVNODE=", g);
237 if (b) {
238 IOVEC_SET_STRING(iovec[n++], b);
239 z++;
240 }
241 }
242
243 g = udev_device_get_sysname(ud);
244 if (g) {
245 b = strappend("_UDEV_SYSNAME=", g);
246 if (b) {
247 IOVEC_SET_STRING(iovec[n++], b);
248 z++;
249 }
250 }
251
252 j = 0;
253 ll = udev_device_get_devlinks_list_entry(ud);
254 udev_list_entry_foreach(ll, ll) {
255
256 if (j > N_IOVEC_UDEV_FIELDS)
257 break;
258
259 g = udev_list_entry_get_name(ll);
260 if (g) {
261 b = strappend("_UDEV_DEVLINK=", g);
262 if (b) {
263 IOVEC_SET_STRING(iovec[n++], b);
264 z++;
265 }
266 }
267
268 j++;
269 }
270
271 udev_device_unref(ud);
272 }
273 }
274
275 if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu", usec) >= 0)
276 IOVEC_SET_STRING(iovec[n++], source_time);
277
278 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
279
280 if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
281 IOVEC_SET_STRING(iovec[n++], syslog_priority);
282
283 if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
284 IOVEC_SET_STRING(iovec[n++], syslog_facility);
285
286 if ((priority & LOG_FACMASK) == LOG_KERN)
287 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
288 else {
289 pl -= syslog_parse_identifier((const char**) &p, &identifier, &pid);
290
291 /* Avoid any messages we generated ourselves via
292 * log_info() and friends. */
293 if (pid && is_us(pid))
294 goto finish;
295
296 if (identifier) {
297 syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
298 if (syslog_identifier)
299 IOVEC_SET_STRING(iovec[n++], syslog_identifier);
300 }
301
302 if (pid) {
303 syslog_pid = strappend("SYSLOG_PID=", pid);
304 if (syslog_pid)
305 IOVEC_SET_STRING(iovec[n++], syslog_pid);
306 }
307 }
308
309 if (cunescape_length_with_prefix(p, pl, "MESSAGE=", UNESCAPE_RELAX, &message) >= 0)
310 IOVEC_SET_STRING(iovec[n++], message);
311
312 server_dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, NULL, 0, NULL, priority, 0);
313
314 finish:
315 for (j = 0; j < z; j++)
316 free(iovec[j].iov_base);
317
318 free(message);
319 free(syslog_priority);
320 free(syslog_identifier);
321 free(syslog_pid);
322 free(syslog_facility);
323 free(source_time);
324 free(identifier);
325 free(pid);
326 }
327
328 static int server_read_dev_kmsg(Server *s) {
329 char buffer[8192+1]; /* the kernel-side limit per record is 8K currently */
330 ssize_t l;
331
332 assert(s);
333 assert(s->dev_kmsg_fd >= 0);
334
335 l = read(s->dev_kmsg_fd, buffer, sizeof(buffer) - 1);
336 if (l == 0)
337 return 0;
338 if (l < 0) {
339 /* Old kernels who don't allow reading from /dev/kmsg
340 * return EINVAL when we try. So handle this cleanly,
341 * but don' try to ever read from it again. */
342 if (errno == EINVAL) {
343 s->dev_kmsg_event_source = sd_event_source_unref(s->dev_kmsg_event_source);
344 return 0;
345 }
346
347 if (errno == EAGAIN || errno == EINTR || errno == EPIPE)
348 return 0;
349
350 log_error_errno(errno, "Failed to read from kernel: %m");
351 return -errno;
352 }
353
354 dev_kmsg_record(s, buffer, l);
355 return 1;
356 }
357
358 int server_flush_dev_kmsg(Server *s) {
359 int r;
360
361 assert(s);
362
363 if (s->dev_kmsg_fd < 0)
364 return 0;
365
366 if (!s->dev_kmsg_readable)
367 return 0;
368
369 log_debug("Flushing /dev/kmsg...");
370
371 for (;;) {
372 r = server_read_dev_kmsg(s);
373 if (r < 0)
374 return r;
375
376 if (r == 0)
377 break;
378 }
379
380 return 0;
381 }
382
383 static int dispatch_dev_kmsg(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
384 Server *s = userdata;
385
386 assert(es);
387 assert(fd == s->dev_kmsg_fd);
388 assert(s);
389
390 if (revents & EPOLLERR)
391 log_warning("/dev/kmsg buffer overrun, some messages lost.");
392
393 if (!(revents & EPOLLIN))
394 log_error("Got invalid event from epoll for /dev/kmsg: %"PRIx32, revents);
395
396 return server_read_dev_kmsg(s);
397 }
398
399 int server_open_dev_kmsg(Server *s) {
400 int r;
401
402 assert(s);
403
404 s->dev_kmsg_fd = open("/dev/kmsg", O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
405 if (s->dev_kmsg_fd < 0) {
406 log_full(errno == ENOENT ? LOG_DEBUG : LOG_WARNING,
407 "Failed to open /dev/kmsg, ignoring: %m");
408 return 0;
409 }
410
411 r = sd_event_add_io(s->event, &s->dev_kmsg_event_source, s->dev_kmsg_fd, EPOLLIN, dispatch_dev_kmsg, s);
412 if (r < 0) {
413
414 /* This will fail with EPERM on older kernels where
415 * /dev/kmsg is not readable. */
416 if (r == -EPERM) {
417 r = 0;
418 goto fail;
419 }
420
421 log_error_errno(r, "Failed to add /dev/kmsg fd to event loop: %m");
422 goto fail;
423 }
424
425 r = sd_event_source_set_priority(s->dev_kmsg_event_source, SD_EVENT_PRIORITY_IMPORTANT+10);
426 if (r < 0) {
427 log_error_errno(r, "Failed to adjust priority of kmsg event source: %m");
428 goto fail;
429 }
430
431 s->dev_kmsg_readable = true;
432
433 return 0;
434
435 fail:
436 s->dev_kmsg_event_source = sd_event_source_unref(s->dev_kmsg_event_source);
437 s->dev_kmsg_fd = safe_close(s->dev_kmsg_fd);
438
439 return r;
440 }
441
442 int server_open_kernel_seqnum(Server *s) {
443 _cleanup_close_ int fd;
444 uint64_t *p;
445
446 assert(s);
447
448 /* We store the seqnum we last read in an mmaped file. That
449 * way we can just use it like a variable, but it is
450 * persistent and automatically flushed at reboot. */
451
452 fd = open("/run/systemd/journal/kernel-seqnum", O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0644);
453 if (fd < 0) {
454 log_error_errno(errno, "Failed to open /run/systemd/journal/kernel-seqnum, ignoring: %m");
455 return 0;
456 }
457
458 if (posix_fallocate(fd, 0, sizeof(uint64_t)) < 0) {
459 log_error_errno(errno, "Failed to allocate sequential number file, ignoring: %m");
460 return 0;
461 }
462
463 p = mmap(NULL, sizeof(uint64_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
464 if (p == MAP_FAILED) {
465 log_error_errno(errno, "Failed to map sequential number file, ignoring: %m");
466 return 0;
467 }
468
469 s->kernel_seqnum = p;
470
471 return 0;
472 }