]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/sd-journal.c
Merge pull request #8417 from brauner/2018-03-09/add_bind_mount_fallback_to_private_d...
[thirdparty/systemd.git] / src / journal / sd-journal.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2011 Lennart Poettering
6 ***/
7
8 #include <errno.h>
9 #include <fcntl.h>
10 #include <inttypes.h>
11 #include <linux/magic.h>
12 #include <poll.h>
13 #include <stddef.h>
14 #include <sys/inotify.h>
15 #include <sys/vfs.h>
16 #include <unistd.h>
17
18 #include "sd-journal.h"
19
20 #include "alloc-util.h"
21 #include "catalog.h"
22 #include "compress.h"
23 #include "dirent-util.h"
24 #include "fd-util.h"
25 #include "fileio.h"
26 #include "format-util.h"
27 #include "fs-util.h"
28 #include "hashmap.h"
29 #include "hostname-util.h"
30 #include "id128-util.h"
31 #include "io-util.h"
32 #include "journal-def.h"
33 #include "journal-file.h"
34 #include "journal-internal.h"
35 #include "list.h"
36 #include "lookup3.h"
37 #include "missing.h"
38 #include "path-util.h"
39 #include "process-util.h"
40 #include "replace-var.h"
41 #include "stat-util.h"
42 #include "stat-util.h"
43 #include "stdio-util.h"
44 #include "string-util.h"
45 #include "strv.h"
46
47 #define JOURNAL_FILES_MAX 7168
48
49 #define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
50
51 #define REPLACE_VAR_MAX 256
52
53 #define DEFAULT_DATA_THRESHOLD (64*1024)
54
55 static void remove_file_real(sd_journal *j, JournalFile *f);
56
57 static bool journal_pid_changed(sd_journal *j) {
58 assert(j);
59
60 /* We don't support people creating a journal object and
61 * keeping it around over a fork(). Let's complain. */
62
63 return j->original_pid != getpid_cached();
64 }
65
66 static int journal_put_error(sd_journal *j, int r, const char *path) {
67 char *copy;
68 int k;
69
70 /* Memorize an error we encountered, and store which
71 * file/directory it was generated from. Note that we store
72 * only *one* path per error code, as the error code is the
73 * key into the hashmap, and the path is the value. This means
74 * we keep track only of all error kinds, but not of all error
75 * locations. This has the benefit that the hashmap cannot
76 * grow beyond bounds.
77 *
78 * We return an error here only if we didn't manage to
79 * memorize the real error. */
80
81 if (r >= 0)
82 return r;
83
84 k = hashmap_ensure_allocated(&j->errors, NULL);
85 if (k < 0)
86 return k;
87
88 if (path) {
89 copy = strdup(path);
90 if (!copy)
91 return -ENOMEM;
92 } else
93 copy = NULL;
94
95 k = hashmap_put(j->errors, INT_TO_PTR(r), copy);
96 if (k < 0) {
97 free(copy);
98
99 if (k == -EEXIST)
100 return 0;
101
102 return k;
103 }
104
105 return 0;
106 }
107
108 static void detach_location(sd_journal *j) {
109 Iterator i;
110 JournalFile *f;
111
112 assert(j);
113
114 j->current_file = NULL;
115 j->current_field = 0;
116
117 ORDERED_HASHMAP_FOREACH(f, j->files, i)
118 journal_file_reset_location(f);
119 }
120
121 static void reset_location(sd_journal *j) {
122 assert(j);
123
124 detach_location(j);
125 zero(j->current_location);
126 }
127
128 static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) {
129 assert(l);
130 assert(IN_SET(type, LOCATION_DISCRETE, LOCATION_SEEK));
131 assert(f);
132 assert(o->object.type == OBJECT_ENTRY);
133
134 l->type = type;
135 l->seqnum = le64toh(o->entry.seqnum);
136 l->seqnum_id = f->header->seqnum_id;
137 l->realtime = le64toh(o->entry.realtime);
138 l->monotonic = le64toh(o->entry.monotonic);
139 l->boot_id = o->entry.boot_id;
140 l->xor_hash = le64toh(o->entry.xor_hash);
141
142 l->seqnum_set = l->realtime_set = l->monotonic_set = l->xor_hash_set = true;
143 }
144
145 static void set_location(sd_journal *j, JournalFile *f, Object *o) {
146 assert(j);
147 assert(f);
148 assert(o);
149
150 init_location(&j->current_location, LOCATION_DISCRETE, f, o);
151
152 j->current_file = f;
153 j->current_field = 0;
154
155 /* Let f know its candidate entry was picked. */
156 assert(f->location_type == LOCATION_SEEK);
157 f->location_type = LOCATION_DISCRETE;
158 }
159
160 static int match_is_valid(const void *data, size_t size) {
161 const char *b, *p;
162
163 assert(data);
164
165 if (size < 2)
166 return false;
167
168 if (startswith(data, "__"))
169 return false;
170
171 b = data;
172 for (p = b; p < b + size; p++) {
173
174 if (*p == '=')
175 return p > b;
176
177 if (*p == '_')
178 continue;
179
180 if (*p >= 'A' && *p <= 'Z')
181 continue;
182
183 if (*p >= '0' && *p <= '9')
184 continue;
185
186 return false;
187 }
188
189 return false;
190 }
191
192 static bool same_field(const void *_a, size_t s, const void *_b, size_t t) {
193 const uint8_t *a = _a, *b = _b;
194 size_t j;
195
196 for (j = 0; j < s && j < t; j++) {
197
198 if (a[j] != b[j])
199 return false;
200
201 if (a[j] == '=')
202 return true;
203 }
204
205 assert_not_reached("\"=\" not found");
206 }
207
208 static Match *match_new(Match *p, MatchType t) {
209 Match *m;
210
211 m = new0(Match, 1);
212 if (!m)
213 return NULL;
214
215 m->type = t;
216
217 if (p) {
218 m->parent = p;
219 LIST_PREPEND(matches, p->matches, m);
220 }
221
222 return m;
223 }
224
225 static void match_free(Match *m) {
226 assert(m);
227
228 while (m->matches)
229 match_free(m->matches);
230
231 if (m->parent)
232 LIST_REMOVE(matches, m->parent->matches, m);
233
234 free(m->data);
235 free(m);
236 }
237
238 static void match_free_if_empty(Match *m) {
239 if (!m || m->matches)
240 return;
241
242 match_free(m);
243 }
244
245 _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
246 Match *l3, *l4, *add_here = NULL, *m;
247 le64_t le_hash;
248
249 assert_return(j, -EINVAL);
250 assert_return(!journal_pid_changed(j), -ECHILD);
251 assert_return(data, -EINVAL);
252
253 if (size == 0)
254 size = strlen(data);
255
256 assert_return(match_is_valid(data, size), -EINVAL);
257
258 /* level 0: AND term
259 * level 1: OR terms
260 * level 2: AND terms
261 * level 3: OR terms
262 * level 4: concrete matches */
263
264 if (!j->level0) {
265 j->level0 = match_new(NULL, MATCH_AND_TERM);
266 if (!j->level0)
267 return -ENOMEM;
268 }
269
270 if (!j->level1) {
271 j->level1 = match_new(j->level0, MATCH_OR_TERM);
272 if (!j->level1)
273 return -ENOMEM;
274 }
275
276 if (!j->level2) {
277 j->level2 = match_new(j->level1, MATCH_AND_TERM);
278 if (!j->level2)
279 return -ENOMEM;
280 }
281
282 assert(j->level0->type == MATCH_AND_TERM);
283 assert(j->level1->type == MATCH_OR_TERM);
284 assert(j->level2->type == MATCH_AND_TERM);
285
286 le_hash = htole64(hash64(data, size));
287
288 LIST_FOREACH(matches, l3, j->level2->matches) {
289 assert(l3->type == MATCH_OR_TERM);
290
291 LIST_FOREACH(matches, l4, l3->matches) {
292 assert(l4->type == MATCH_DISCRETE);
293
294 /* Exactly the same match already? Then ignore
295 * this addition */
296 if (l4->le_hash == le_hash &&
297 l4->size == size &&
298 memcmp(l4->data, data, size) == 0)
299 return 0;
300
301 /* Same field? Then let's add this to this OR term */
302 if (same_field(data, size, l4->data, l4->size)) {
303 add_here = l3;
304 break;
305 }
306 }
307
308 if (add_here)
309 break;
310 }
311
312 if (!add_here) {
313 add_here = match_new(j->level2, MATCH_OR_TERM);
314 if (!add_here)
315 goto fail;
316 }
317
318 m = match_new(add_here, MATCH_DISCRETE);
319 if (!m)
320 goto fail;
321
322 m->le_hash = le_hash;
323 m->size = size;
324 m->data = memdup(data, size);
325 if (!m->data)
326 goto fail;
327
328 detach_location(j);
329
330 return 0;
331
332 fail:
333 match_free_if_empty(add_here);
334 match_free_if_empty(j->level2);
335 match_free_if_empty(j->level1);
336 match_free_if_empty(j->level0);
337
338 return -ENOMEM;
339 }
340
341 _public_ int sd_journal_add_conjunction(sd_journal *j) {
342 assert_return(j, -EINVAL);
343 assert_return(!journal_pid_changed(j), -ECHILD);
344
345 if (!j->level0)
346 return 0;
347
348 if (!j->level1)
349 return 0;
350
351 if (!j->level1->matches)
352 return 0;
353
354 j->level1 = NULL;
355 j->level2 = NULL;
356
357 return 0;
358 }
359
360 _public_ int sd_journal_add_disjunction(sd_journal *j) {
361 assert_return(j, -EINVAL);
362 assert_return(!journal_pid_changed(j), -ECHILD);
363
364 if (!j->level0)
365 return 0;
366
367 if (!j->level1)
368 return 0;
369
370 if (!j->level2)
371 return 0;
372
373 if (!j->level2->matches)
374 return 0;
375
376 j->level2 = NULL;
377 return 0;
378 }
379
380 static char *match_make_string(Match *m) {
381 char *p = NULL, *r;
382 Match *i;
383 bool enclose = false;
384
385 if (!m)
386 return strdup("none");
387
388 if (m->type == MATCH_DISCRETE)
389 return strndup(m->data, m->size);
390
391 LIST_FOREACH(matches, i, m->matches) {
392 char *t, *k;
393
394 t = match_make_string(i);
395 if (!t)
396 return mfree(p);
397
398 if (p) {
399 k = strjoin(p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t);
400 free(p);
401 free(t);
402
403 if (!k)
404 return NULL;
405
406 p = k;
407
408 enclose = true;
409 } else
410 p = t;
411 }
412
413 if (enclose) {
414 r = strjoin("(", p, ")");
415 free(p);
416 return r;
417 }
418
419 return p;
420 }
421
422 char *journal_make_match_string(sd_journal *j) {
423 assert(j);
424
425 return match_make_string(j->level0);
426 }
427
428 _public_ void sd_journal_flush_matches(sd_journal *j) {
429 if (!j)
430 return;
431
432 if (j->level0)
433 match_free(j->level0);
434
435 j->level0 = j->level1 = j->level2 = NULL;
436
437 detach_location(j);
438 }
439
440 _pure_ static int compare_with_location(JournalFile *f, Location *l) {
441 assert(f);
442 assert(l);
443 assert(f->location_type == LOCATION_SEEK);
444 assert(IN_SET(l->type, LOCATION_DISCRETE, LOCATION_SEEK));
445
446 if (l->monotonic_set &&
447 sd_id128_equal(f->current_boot_id, l->boot_id) &&
448 l->realtime_set &&
449 f->current_realtime == l->realtime &&
450 l->xor_hash_set &&
451 f->current_xor_hash == l->xor_hash)
452 return 0;
453
454 if (l->seqnum_set &&
455 sd_id128_equal(f->header->seqnum_id, l->seqnum_id)) {
456
457 if (f->current_seqnum < l->seqnum)
458 return -1;
459 if (f->current_seqnum > l->seqnum)
460 return 1;
461 }
462
463 if (l->monotonic_set &&
464 sd_id128_equal(f->current_boot_id, l->boot_id)) {
465
466 if (f->current_monotonic < l->monotonic)
467 return -1;
468 if (f->current_monotonic > l->monotonic)
469 return 1;
470 }
471
472 if (l->realtime_set) {
473
474 if (f->current_realtime < l->realtime)
475 return -1;
476 if (f->current_realtime > l->realtime)
477 return 1;
478 }
479
480 if (l->xor_hash_set) {
481
482 if (f->current_xor_hash < l->xor_hash)
483 return -1;
484 if (f->current_xor_hash > l->xor_hash)
485 return 1;
486 }
487
488 return 0;
489 }
490
491 static int next_for_match(
492 sd_journal *j,
493 Match *m,
494 JournalFile *f,
495 uint64_t after_offset,
496 direction_t direction,
497 Object **ret,
498 uint64_t *offset) {
499
500 int r;
501 uint64_t np = 0;
502 Object *n;
503
504 assert(j);
505 assert(m);
506 assert(f);
507
508 if (m->type == MATCH_DISCRETE) {
509 uint64_t dp;
510
511 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
512 if (r <= 0)
513 return r;
514
515 return journal_file_move_to_entry_by_offset_for_data(f, dp, after_offset, direction, ret, offset);
516
517 } else if (m->type == MATCH_OR_TERM) {
518 Match *i;
519
520 /* Find the earliest match beyond after_offset */
521
522 LIST_FOREACH(matches, i, m->matches) {
523 uint64_t cp;
524
525 r = next_for_match(j, i, f, after_offset, direction, NULL, &cp);
526 if (r < 0)
527 return r;
528 else if (r > 0) {
529 if (np == 0 || (direction == DIRECTION_DOWN ? cp < np : cp > np))
530 np = cp;
531 }
532 }
533
534 if (np == 0)
535 return 0;
536
537 } else if (m->type == MATCH_AND_TERM) {
538 Match *i, *last_moved;
539
540 /* Always jump to the next matching entry and repeat
541 * this until we find an offset that matches for all
542 * matches. */
543
544 if (!m->matches)
545 return 0;
546
547 r = next_for_match(j, m->matches, f, after_offset, direction, NULL, &np);
548 if (r <= 0)
549 return r;
550
551 assert(direction == DIRECTION_DOWN ? np >= after_offset : np <= after_offset);
552 last_moved = m->matches;
553
554 LIST_LOOP_BUT_ONE(matches, i, m->matches, last_moved) {
555 uint64_t cp;
556
557 r = next_for_match(j, i, f, np, direction, NULL, &cp);
558 if (r <= 0)
559 return r;
560
561 assert(direction == DIRECTION_DOWN ? cp >= np : cp <= np);
562 if (direction == DIRECTION_DOWN ? cp > np : cp < np) {
563 np = cp;
564 last_moved = i;
565 }
566 }
567 }
568
569 assert(np > 0);
570
571 r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
572 if (r < 0)
573 return r;
574
575 if (ret)
576 *ret = n;
577 if (offset)
578 *offset = np;
579
580 return 1;
581 }
582
583 static int find_location_for_match(
584 sd_journal *j,
585 Match *m,
586 JournalFile *f,
587 direction_t direction,
588 Object **ret,
589 uint64_t *offset) {
590
591 int r;
592
593 assert(j);
594 assert(m);
595 assert(f);
596
597 if (m->type == MATCH_DISCRETE) {
598 uint64_t dp;
599
600 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
601 if (r <= 0)
602 return r;
603
604 /* FIXME: missing: find by monotonic */
605
606 if (j->current_location.type == LOCATION_HEAD)
607 return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_DOWN, ret, offset);
608 if (j->current_location.type == LOCATION_TAIL)
609 return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_UP, ret, offset);
610 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
611 return journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, ret, offset);
612 if (j->current_location.monotonic_set) {
613 r = journal_file_move_to_entry_by_monotonic_for_data(f, dp, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
614 if (r != -ENOENT)
615 return r;
616 }
617 if (j->current_location.realtime_set)
618 return journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, ret, offset);
619
620 return journal_file_next_entry_for_data(f, NULL, 0, dp, direction, ret, offset);
621
622 } else if (m->type == MATCH_OR_TERM) {
623 uint64_t np = 0;
624 Object *n;
625 Match *i;
626
627 /* Find the earliest match */
628
629 LIST_FOREACH(matches, i, m->matches) {
630 uint64_t cp;
631
632 r = find_location_for_match(j, i, f, direction, NULL, &cp);
633 if (r < 0)
634 return r;
635 else if (r > 0) {
636 if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
637 np = cp;
638 }
639 }
640
641 if (np == 0)
642 return 0;
643
644 r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
645 if (r < 0)
646 return r;
647
648 if (ret)
649 *ret = n;
650 if (offset)
651 *offset = np;
652
653 return 1;
654
655 } else {
656 Match *i;
657 uint64_t np = 0;
658
659 assert(m->type == MATCH_AND_TERM);
660
661 /* First jump to the last match, and then find the
662 * next one where all matches match */
663
664 if (!m->matches)
665 return 0;
666
667 LIST_FOREACH(matches, i, m->matches) {
668 uint64_t cp;
669
670 r = find_location_for_match(j, i, f, direction, NULL, &cp);
671 if (r <= 0)
672 return r;
673
674 if (np == 0 || (direction == DIRECTION_DOWN ? cp > np : cp < np))
675 np = cp;
676 }
677
678 return next_for_match(j, m, f, np, direction, ret, offset);
679 }
680 }
681
682 static int find_location_with_matches(
683 sd_journal *j,
684 JournalFile *f,
685 direction_t direction,
686 Object **ret,
687 uint64_t *offset) {
688
689 int r;
690
691 assert(j);
692 assert(f);
693 assert(ret);
694 assert(offset);
695
696 if (!j->level0) {
697 /* No matches is simple */
698
699 if (j->current_location.type == LOCATION_HEAD)
700 return journal_file_next_entry(f, 0, DIRECTION_DOWN, ret, offset);
701 if (j->current_location.type == LOCATION_TAIL)
702 return journal_file_next_entry(f, 0, DIRECTION_UP, ret, offset);
703 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
704 return journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, ret, offset);
705 if (j->current_location.monotonic_set) {
706 r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
707 if (r != -ENOENT)
708 return r;
709 }
710 if (j->current_location.realtime_set)
711 return journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, ret, offset);
712
713 return journal_file_next_entry(f, 0, direction, ret, offset);
714 } else
715 return find_location_for_match(j, j->level0, f, direction, ret, offset);
716 }
717
718 static int next_with_matches(
719 sd_journal *j,
720 JournalFile *f,
721 direction_t direction,
722 Object **ret,
723 uint64_t *offset) {
724
725 assert(j);
726 assert(f);
727 assert(ret);
728 assert(offset);
729
730 /* No matches is easy. We simple advance the file
731 * pointer by one. */
732 if (!j->level0)
733 return journal_file_next_entry(f, f->current_offset, direction, ret, offset);
734
735 /* If we have a match then we look for the next matching entry
736 * with an offset at least one step larger */
737 return next_for_match(j, j->level0, f,
738 direction == DIRECTION_DOWN ? f->current_offset + 1
739 : f->current_offset - 1,
740 direction, ret, offset);
741 }
742
743 static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction) {
744 Object *c;
745 uint64_t cp, n_entries;
746 int r;
747
748 assert(j);
749 assert(f);
750
751 n_entries = le64toh(f->header->n_entries);
752
753 /* If we hit EOF before, we don't need to look into this file again
754 * unless direction changed or new entries appeared. */
755 if (f->last_direction == direction && f->location_type == LOCATION_TAIL &&
756 n_entries == f->last_n_entries)
757 return 0;
758
759 f->last_n_entries = n_entries;
760
761 if (f->last_direction == direction && f->current_offset > 0) {
762 /* LOCATION_SEEK here means we did the work in a previous
763 * iteration and the current location already points to a
764 * candidate entry. */
765 if (f->location_type != LOCATION_SEEK) {
766 r = next_with_matches(j, f, direction, &c, &cp);
767 if (r <= 0)
768 return r;
769
770 journal_file_save_location(f, c, cp);
771 }
772 } else {
773 f->last_direction = direction;
774
775 r = find_location_with_matches(j, f, direction, &c, &cp);
776 if (r <= 0)
777 return r;
778
779 journal_file_save_location(f, c, cp);
780 }
781
782 /* OK, we found the spot, now let's advance until an entry
783 * that is actually different from what we were previously
784 * looking at. This is necessary to handle entries which exist
785 * in two (or more) journal files, and which shall all be
786 * suppressed but one. */
787
788 for (;;) {
789 bool found;
790
791 if (j->current_location.type == LOCATION_DISCRETE) {
792 int k;
793
794 k = compare_with_location(f, &j->current_location);
795
796 found = direction == DIRECTION_DOWN ? k > 0 : k < 0;
797 } else
798 found = true;
799
800 if (found)
801 return 1;
802
803 r = next_with_matches(j, f, direction, &c, &cp);
804 if (r <= 0)
805 return r;
806
807 journal_file_save_location(f, c, cp);
808 }
809 }
810
811 static int real_journal_next(sd_journal *j, direction_t direction) {
812 JournalFile *new_file = NULL;
813 unsigned i, n_files;
814 const void **files;
815 Object *o;
816 int r;
817
818 assert_return(j, -EINVAL);
819 assert_return(!journal_pid_changed(j), -ECHILD);
820
821 r = iterated_cache_get(j->files_cache, NULL, &files, &n_files);
822 if (r < 0)
823 return r;
824
825 for (i = 0; i < n_files; i++) {
826 JournalFile *f = (JournalFile *)files[i];
827 bool found;
828
829 r = next_beyond_location(j, f, direction);
830 if (r < 0) {
831 log_debug_errno(r, "Can't iterate through %s, ignoring: %m", f->path);
832 remove_file_real(j, f);
833 continue;
834 } else if (r == 0) {
835 f->location_type = LOCATION_TAIL;
836 continue;
837 }
838
839 if (!new_file)
840 found = true;
841 else {
842 int k;
843
844 k = journal_file_compare_locations(f, new_file);
845
846 found = direction == DIRECTION_DOWN ? k < 0 : k > 0;
847 }
848
849 if (found)
850 new_file = f;
851 }
852
853 if (!new_file)
854 return 0;
855
856 r = journal_file_move_to_object(new_file, OBJECT_ENTRY, new_file->current_offset, &o);
857 if (r < 0)
858 return r;
859
860 set_location(j, new_file, o);
861
862 return 1;
863 }
864
865 _public_ int sd_journal_next(sd_journal *j) {
866 return real_journal_next(j, DIRECTION_DOWN);
867 }
868
869 _public_ int sd_journal_previous(sd_journal *j) {
870 return real_journal_next(j, DIRECTION_UP);
871 }
872
873 static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) {
874 int c = 0, r;
875
876 assert_return(j, -EINVAL);
877 assert_return(!journal_pid_changed(j), -ECHILD);
878
879 if (skip == 0) {
880 /* If this is not a discrete skip, then at least
881 * resolve the current location */
882 if (j->current_location.type != LOCATION_DISCRETE) {
883 r = real_journal_next(j, direction);
884 if (r < 0)
885 return r;
886 }
887
888 return 0;
889 }
890
891 do {
892 r = real_journal_next(j, direction);
893 if (r < 0)
894 return r;
895
896 if (r == 0)
897 return c;
898
899 skip--;
900 c++;
901 } while (skip > 0);
902
903 return c;
904 }
905
906 _public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) {
907 return real_journal_next_skip(j, DIRECTION_DOWN, skip);
908 }
909
910 _public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) {
911 return real_journal_next_skip(j, DIRECTION_UP, skip);
912 }
913
914 _public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) {
915 Object *o;
916 int r;
917 char bid[33], sid[33];
918
919 assert_return(j, -EINVAL);
920 assert_return(!journal_pid_changed(j), -ECHILD);
921 assert_return(cursor, -EINVAL);
922
923 if (!j->current_file || j->current_file->current_offset <= 0)
924 return -EADDRNOTAVAIL;
925
926 r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
927 if (r < 0)
928 return r;
929
930 sd_id128_to_string(j->current_file->header->seqnum_id, sid);
931 sd_id128_to_string(o->entry.boot_id, bid);
932
933 if (asprintf(cursor,
934 "s=%s;i=%"PRIx64";b=%s;m=%"PRIx64";t=%"PRIx64";x=%"PRIx64,
935 sid, le64toh(o->entry.seqnum),
936 bid, le64toh(o->entry.monotonic),
937 le64toh(o->entry.realtime),
938 le64toh(o->entry.xor_hash)) < 0)
939 return -ENOMEM;
940
941 return 0;
942 }
943
944 _public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
945 const char *word, *state;
946 size_t l;
947 unsigned long long seqnum, monotonic, realtime, xor_hash;
948 bool
949 seqnum_id_set = false,
950 seqnum_set = false,
951 boot_id_set = false,
952 monotonic_set = false,
953 realtime_set = false,
954 xor_hash_set = false;
955 sd_id128_t seqnum_id, boot_id;
956
957 assert_return(j, -EINVAL);
958 assert_return(!journal_pid_changed(j), -ECHILD);
959 assert_return(!isempty(cursor), -EINVAL);
960
961 FOREACH_WORD_SEPARATOR(word, l, cursor, ";", state) {
962 char *item;
963 int k = 0;
964
965 if (l < 2 || word[1] != '=')
966 return -EINVAL;
967
968 item = strndup(word, l);
969 if (!item)
970 return -ENOMEM;
971
972 switch (word[0]) {
973
974 case 's':
975 seqnum_id_set = true;
976 k = sd_id128_from_string(item+2, &seqnum_id);
977 break;
978
979 case 'i':
980 seqnum_set = true;
981 if (sscanf(item+2, "%llx", &seqnum) != 1)
982 k = -EINVAL;
983 break;
984
985 case 'b':
986 boot_id_set = true;
987 k = sd_id128_from_string(item+2, &boot_id);
988 break;
989
990 case 'm':
991 monotonic_set = true;
992 if (sscanf(item+2, "%llx", &monotonic) != 1)
993 k = -EINVAL;
994 break;
995
996 case 't':
997 realtime_set = true;
998 if (sscanf(item+2, "%llx", &realtime) != 1)
999 k = -EINVAL;
1000 break;
1001
1002 case 'x':
1003 xor_hash_set = true;
1004 if (sscanf(item+2, "%llx", &xor_hash) != 1)
1005 k = -EINVAL;
1006 break;
1007 }
1008
1009 free(item);
1010
1011 if (k < 0)
1012 return k;
1013 }
1014
1015 if ((!seqnum_set || !seqnum_id_set) &&
1016 (!monotonic_set || !boot_id_set) &&
1017 !realtime_set)
1018 return -EINVAL;
1019
1020 reset_location(j);
1021
1022 j->current_location.type = LOCATION_SEEK;
1023
1024 if (realtime_set) {
1025 j->current_location.realtime = (uint64_t) realtime;
1026 j->current_location.realtime_set = true;
1027 }
1028
1029 if (seqnum_set && seqnum_id_set) {
1030 j->current_location.seqnum = (uint64_t) seqnum;
1031 j->current_location.seqnum_id = seqnum_id;
1032 j->current_location.seqnum_set = true;
1033 }
1034
1035 if (monotonic_set && boot_id_set) {
1036 j->current_location.monotonic = (uint64_t) monotonic;
1037 j->current_location.boot_id = boot_id;
1038 j->current_location.monotonic_set = true;
1039 }
1040
1041 if (xor_hash_set) {
1042 j->current_location.xor_hash = (uint64_t) xor_hash;
1043 j->current_location.xor_hash_set = true;
1044 }
1045
1046 return 0;
1047 }
1048
1049 _public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) {
1050 int r;
1051 Object *o;
1052
1053 assert_return(j, -EINVAL);
1054 assert_return(!journal_pid_changed(j), -ECHILD);
1055 assert_return(!isempty(cursor), -EINVAL);
1056
1057 if (!j->current_file || j->current_file->current_offset <= 0)
1058 return -EADDRNOTAVAIL;
1059
1060 r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
1061 if (r < 0)
1062 return r;
1063
1064 for (;;) {
1065 _cleanup_free_ char *item = NULL;
1066 unsigned long long ll;
1067 sd_id128_t id;
1068 int k = 0;
1069
1070 r = extract_first_word(&cursor, &item, ";", EXTRACT_DONT_COALESCE_SEPARATORS);
1071 if (r < 0)
1072 return r;
1073
1074 if (r == 0)
1075 break;
1076
1077 if (strlen(item) < 2 || item[1] != '=')
1078 return -EINVAL;
1079
1080 switch (item[0]) {
1081
1082 case 's':
1083 k = sd_id128_from_string(item+2, &id);
1084 if (k < 0)
1085 return k;
1086 if (!sd_id128_equal(id, j->current_file->header->seqnum_id))
1087 return 0;
1088 break;
1089
1090 case 'i':
1091 if (sscanf(item+2, "%llx", &ll) != 1)
1092 return -EINVAL;
1093 if (ll != le64toh(o->entry.seqnum))
1094 return 0;
1095 break;
1096
1097 case 'b':
1098 k = sd_id128_from_string(item+2, &id);
1099 if (k < 0)
1100 return k;
1101 if (!sd_id128_equal(id, o->entry.boot_id))
1102 return 0;
1103 break;
1104
1105 case 'm':
1106 if (sscanf(item+2, "%llx", &ll) != 1)
1107 return -EINVAL;
1108 if (ll != le64toh(o->entry.monotonic))
1109 return 0;
1110 break;
1111
1112 case 't':
1113 if (sscanf(item+2, "%llx", &ll) != 1)
1114 return -EINVAL;
1115 if (ll != le64toh(o->entry.realtime))
1116 return 0;
1117 break;
1118
1119 case 'x':
1120 if (sscanf(item+2, "%llx", &ll) != 1)
1121 return -EINVAL;
1122 if (ll != le64toh(o->entry.xor_hash))
1123 return 0;
1124 break;
1125 }
1126 }
1127
1128 return 1;
1129 }
1130
1131 _public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) {
1132 assert_return(j, -EINVAL);
1133 assert_return(!journal_pid_changed(j), -ECHILD);
1134
1135 reset_location(j);
1136 j->current_location.type = LOCATION_SEEK;
1137 j->current_location.boot_id = boot_id;
1138 j->current_location.monotonic = usec;
1139 j->current_location.monotonic_set = true;
1140
1141 return 0;
1142 }
1143
1144 _public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
1145 assert_return(j, -EINVAL);
1146 assert_return(!journal_pid_changed(j), -ECHILD);
1147
1148 reset_location(j);
1149 j->current_location.type = LOCATION_SEEK;
1150 j->current_location.realtime = usec;
1151 j->current_location.realtime_set = true;
1152
1153 return 0;
1154 }
1155
1156 _public_ int sd_journal_seek_head(sd_journal *j) {
1157 assert_return(j, -EINVAL);
1158 assert_return(!journal_pid_changed(j), -ECHILD);
1159
1160 reset_location(j);
1161 j->current_location.type = LOCATION_HEAD;
1162
1163 return 0;
1164 }
1165
1166 _public_ int sd_journal_seek_tail(sd_journal *j) {
1167 assert_return(j, -EINVAL);
1168 assert_return(!journal_pid_changed(j), -ECHILD);
1169
1170 reset_location(j);
1171 j->current_location.type = LOCATION_TAIL;
1172
1173 return 0;
1174 }
1175
1176 static void check_network(sd_journal *j, int fd) {
1177 assert(j);
1178
1179 if (j->on_network)
1180 return;
1181
1182 j->on_network = fd_is_network_fs(fd);
1183 }
1184
1185 static bool file_has_type_prefix(const char *prefix, const char *filename) {
1186 const char *full, *tilded, *atted;
1187
1188 full = strjoina(prefix, ".journal");
1189 tilded = strjoina(full, "~");
1190 atted = strjoina(prefix, "@");
1191
1192 return streq(filename, full) ||
1193 streq(filename, tilded) ||
1194 startswith(filename, atted);
1195 }
1196
1197 static bool file_type_wanted(int flags, const char *filename) {
1198 assert(filename);
1199
1200 if (!endswith(filename, ".journal") && !endswith(filename, ".journal~"))
1201 return false;
1202
1203 /* no flags set → every type is OK */
1204 if (!(flags & (SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)))
1205 return true;
1206
1207 if (flags & SD_JOURNAL_SYSTEM && file_has_type_prefix("system", filename))
1208 return true;
1209
1210 if (flags & SD_JOURNAL_CURRENT_USER) {
1211 char prefix[5 + DECIMAL_STR_MAX(uid_t) + 1];
1212
1213 xsprintf(prefix, "user-"UID_FMT, getuid());
1214
1215 if (file_has_type_prefix(prefix, filename))
1216 return true;
1217 }
1218
1219 return false;
1220 }
1221
1222 static bool path_has_prefix(sd_journal *j, const char *path, const char *prefix) {
1223 assert(j);
1224 assert(path);
1225 assert(prefix);
1226
1227 if (j->toplevel_fd >= 0)
1228 return false;
1229
1230 return path_startswith(path, prefix);
1231 }
1232
1233 static void track_file_disposition(sd_journal *j, JournalFile *f) {
1234 assert(j);
1235 assert(f);
1236
1237 if (!j->has_runtime_files && path_has_prefix(j, f->path, "/run"))
1238 j->has_runtime_files = true;
1239 else if (!j->has_persistent_files && path_has_prefix(j, f->path, "/var"))
1240 j->has_persistent_files = true;
1241 }
1242
1243 static const char *skip_slash(const char *p) {
1244
1245 if (!p)
1246 return NULL;
1247
1248 while (*p == '/')
1249 p++;
1250
1251 return p;
1252 }
1253
1254 static int add_any_file(
1255 sd_journal *j,
1256 int fd,
1257 const char *path) {
1258
1259 bool close_fd = false;
1260 JournalFile *f;
1261 struct stat st;
1262 int r, k;
1263
1264 assert(j);
1265 assert(fd >= 0 || path);
1266
1267 if (fd < 0) {
1268 if (j->toplevel_fd >= 0)
1269 /* If there's a top-level fd defined make the path relative, explicitly, since otherwise
1270 * openat() ignores the first argument. */
1271
1272 fd = openat(j->toplevel_fd, skip_slash(path), O_RDONLY|O_CLOEXEC|O_NONBLOCK);
1273 else
1274 fd = open(path, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
1275 if (fd < 0) {
1276 r = log_debug_errno(errno, "Failed to open journal file %s: %m", path);
1277 goto finish;
1278 }
1279
1280 close_fd = true;
1281
1282 r = fd_nonblock(fd, false);
1283 if (r < 0) {
1284 r = log_debug_errno(errno, "Failed to turn off O_NONBLOCK for %s: %m", path);
1285 goto finish;
1286 }
1287 }
1288
1289 if (fstat(fd, &st) < 0) {
1290 r = log_debug_errno(errno, "Failed to fstat file '%s': %m", path);
1291 goto finish;
1292 }
1293
1294 r = stat_verify_regular(&st);
1295 if (r < 0) {
1296 log_debug_errno(r, "Refusing to open '%s', as it is not a regular file.", path);
1297 goto finish;
1298 }
1299
1300 f = ordered_hashmap_get(j->files, path);
1301 if (f) {
1302 if (f->last_stat.st_dev == st.st_dev &&
1303 f->last_stat.st_ino == st.st_ino) {
1304
1305 /* We already track this file, under the same path and with the same device/inode numbers, it's
1306 * hence really the same. Mark this file as seen in this generation. This is used to GC old
1307 * files in process_q_overflow() to detect journal files that are still there and discern them
1308 * from those which are gone. */
1309
1310 f->last_seen_generation = j->generation;
1311 r = 0;
1312 goto finish;
1313 }
1314
1315 /* So we tracked a file under this name, but it has a different inode/device. In that case, it got
1316 * replaced (probably due to rotation?), let's drop it hence from our list. */
1317 remove_file_real(j, f);
1318 f = NULL;
1319 }
1320
1321 if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
1322 log_debug("Too many open journal files, not adding %s.", path);
1323 r = -ETOOMANYREFS;
1324 goto finish;
1325 }
1326
1327 r = journal_file_open(fd, path, O_RDONLY, 0, false, 0, false, NULL, j->mmap, NULL, NULL, &f);
1328 if (r < 0) {
1329 log_debug_errno(r, "Failed to open journal file %s: %m", path);
1330 goto finish;
1331 }
1332
1333 /* journal_file_dump(f); */
1334
1335 r = ordered_hashmap_put(j->files, f->path, f);
1336 if (r < 0) {
1337 f->close_fd = false; /* make sure journal_file_close() doesn't close the caller's fd (or our own). We'll let the caller do that, or ourselves */
1338 (void) journal_file_close(f);
1339 goto finish;
1340 }
1341
1342 close_fd = false; /* the fd is now owned by the JournalFile object */
1343
1344 f->last_seen_generation = j->generation;
1345
1346 track_file_disposition(j, f);
1347 check_network(j, f->fd);
1348
1349 j->current_invalidate_counter++;
1350
1351 log_debug("File %s added.", f->path);
1352
1353 r = 0;
1354
1355 finish:
1356 if (close_fd)
1357 safe_close(fd);
1358
1359 if (r < 0) {
1360 k = journal_put_error(j, r, path);
1361 if (k < 0)
1362 return k;
1363 }
1364
1365 return r;
1366 }
1367
1368 static int add_file_by_name(
1369 sd_journal *j,
1370 const char *prefix,
1371 const char *filename) {
1372
1373 const char *path;
1374
1375 assert(j);
1376 assert(prefix);
1377 assert(filename);
1378
1379 if (j->no_new_files)
1380 return 0;
1381
1382 if (!file_type_wanted(j->flags, filename))
1383 return 0;
1384
1385 path = strjoina(prefix, "/", filename);
1386 return add_any_file(j, -1, path);
1387 }
1388
1389 static void remove_file_by_name(
1390 sd_journal *j,
1391 const char *prefix,
1392 const char *filename) {
1393
1394 const char *path;
1395 JournalFile *f;
1396
1397 assert(j);
1398 assert(prefix);
1399 assert(filename);
1400
1401 path = strjoina(prefix, "/", filename);
1402 f = ordered_hashmap_get(j->files, path);
1403 if (!f)
1404 return;
1405
1406 remove_file_real(j, f);
1407 }
1408
1409 static void remove_file_real(sd_journal *j, JournalFile *f) {
1410 assert(j);
1411 assert(f);
1412
1413 (void) ordered_hashmap_remove(j->files, f->path);
1414
1415 log_debug("File %s removed.", f->path);
1416
1417 if (j->current_file == f) {
1418 j->current_file = NULL;
1419 j->current_field = 0;
1420 }
1421
1422 if (j->unique_file == f) {
1423 /* Jump to the next unique_file or NULL if that one was last */
1424 j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
1425 j->unique_offset = 0;
1426 if (!j->unique_file)
1427 j->unique_file_lost = true;
1428 }
1429
1430 if (j->fields_file == f) {
1431 j->fields_file = ordered_hashmap_next(j->files, j->fields_file->path);
1432 j->fields_offset = 0;
1433 if (!j->fields_file)
1434 j->fields_file_lost = true;
1435 }
1436
1437 (void) journal_file_close(f);
1438
1439 j->current_invalidate_counter++;
1440 }
1441
1442 static int dirname_is_machine_id(const char *fn) {
1443 sd_id128_t id, machine;
1444 int r;
1445
1446 r = sd_id128_get_machine(&machine);
1447 if (r < 0)
1448 return r;
1449
1450 r = sd_id128_from_string(fn, &id);
1451 if (r < 0)
1452 return r;
1453
1454 return sd_id128_equal(id, machine);
1455 }
1456
1457 static bool dirent_is_journal_file(const struct dirent *de) {
1458 assert(de);
1459
1460 if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
1461 return false;
1462
1463 return endswith(de->d_name, ".journal") ||
1464 endswith(de->d_name, ".journal~");
1465 }
1466
1467 static bool dirent_is_id128_subdir(const struct dirent *de) {
1468 assert(de);
1469
1470 if (!IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN))
1471 return false;
1472
1473 return id128_is_valid(de->d_name);
1474 }
1475
1476 static int directory_open(sd_journal *j, const char *path, DIR **ret) {
1477 DIR *d;
1478
1479 assert(j);
1480 assert(path);
1481 assert(ret);
1482
1483 if (j->toplevel_fd < 0)
1484 d = opendir(path);
1485 else
1486 /* Open the specified directory relative to the toplevel fd. Enforce that the path specified is
1487 * relative, by dropping the initial slash */
1488 d = xopendirat(j->toplevel_fd, skip_slash(path), 0);
1489 if (!d)
1490 return -errno;
1491
1492 *ret = d;
1493 return 0;
1494 }
1495
1496 static int add_directory(sd_journal *j, const char *prefix, const char *dirname);
1497
1498 static void directory_enumerate(sd_journal *j, Directory *m, DIR *d) {
1499 struct dirent *de;
1500
1501 assert(j);
1502 assert(m);
1503 assert(d);
1504
1505 FOREACH_DIRENT_ALL(de, d, goto fail) {
1506
1507 if (dirent_is_journal_file(de))
1508 (void) add_file_by_name(j, m->path, de->d_name);
1509
1510 if (m->is_root && dirent_is_id128_subdir(de))
1511 (void) add_directory(j, m->path, de->d_name);
1512 }
1513
1514 return;
1515
1516 fail:
1517 log_debug_errno(errno, "Failed to enumerate directory %s, ignoring: %m", m->path);
1518 }
1519
1520 static void directory_watch(sd_journal *j, Directory *m, int fd, uint32_t mask) {
1521 int r;
1522
1523 assert(j);
1524 assert(m);
1525 assert(fd >= 0);
1526
1527 /* Watch this directory if that's enabled and if it not being watched yet. */
1528
1529 if (m->wd > 0) /* Already have a watch? */
1530 return;
1531 if (j->inotify_fd < 0) /* Not watching at all? */
1532 return;
1533
1534 m->wd = inotify_add_watch_fd(j->inotify_fd, fd, mask);
1535 if (m->wd < 0) {
1536 log_debug_errno(errno, "Failed to watch journal directory '%s', ignoring: %m", m->path);
1537 return;
1538 }
1539
1540 r = hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m);
1541 if (r == -EEXIST)
1542 log_debug_errno(r, "Directory '%s' already being watched under a different path, ignoring: %m", m->path);
1543 if (r < 0) {
1544 log_debug_errno(r, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m->path);
1545 (void) inotify_rm_watch(j->inotify_fd, m->wd);
1546 m->wd = -1;
1547 }
1548 }
1549
1550 static int add_directory(sd_journal *j, const char *prefix, const char *dirname) {
1551 _cleanup_free_ char *path = NULL;
1552 _cleanup_closedir_ DIR *d = NULL;
1553 Directory *m;
1554 int r, k;
1555
1556 assert(j);
1557 assert(prefix);
1558
1559 /* Adds a journal file directory to watch. If the directory is already tracked this updates the inotify watch
1560 * and reenumerates directory contents */
1561
1562 if (dirname)
1563 path = strjoin(prefix, "/", dirname);
1564 else
1565 path = strdup(prefix);
1566 if (!path) {
1567 r = -ENOMEM;
1568 goto fail;
1569 }
1570
1571 log_debug("Considering directory '%s'.", path);
1572
1573 /* We consider everything local that is in a directory for the local machine ID, or that is stored in /run */
1574 if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
1575 !((dirname && dirname_is_machine_id(dirname) > 0) || path_has_prefix(j, path, "/run")))
1576 return 0;
1577
1578 r = directory_open(j, path, &d);
1579 if (r < 0) {
1580 log_debug_errno(r, "Failed to open directory '%s': %m", path);
1581 goto fail;
1582 }
1583
1584 m = hashmap_get(j->directories_by_path, path);
1585 if (!m) {
1586 m = new0(Directory, 1);
1587 if (!m) {
1588 r = -ENOMEM;
1589 goto fail;
1590 }
1591
1592 m->is_root = false;
1593 m->path = path;
1594
1595 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1596 free(m);
1597 r = -ENOMEM;
1598 goto fail;
1599 }
1600
1601 path = NULL; /* avoid freeing in cleanup */
1602 j->current_invalidate_counter++;
1603
1604 log_debug("Directory %s added.", m->path);
1605
1606 } else if (m->is_root)
1607 return 0; /* Don't 'downgrade' from root directory */
1608
1609 m->last_seen_generation = j->generation;
1610
1611 directory_watch(j, m, dirfd(d),
1612 IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1613 IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
1614 IN_ONLYDIR);
1615
1616 if (!j->no_new_files)
1617 directory_enumerate(j, m, d);
1618
1619 check_network(j, dirfd(d));
1620
1621 return 0;
1622
1623 fail:
1624 k = journal_put_error(j, r, path ?: prefix);
1625 if (k < 0)
1626 return k;
1627
1628 return r;
1629 }
1630
1631 static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
1632
1633 _cleanup_closedir_ DIR *d = NULL;
1634 Directory *m;
1635 int r, k;
1636
1637 assert(j);
1638
1639 /* Adds a root directory to our set of directories to use. If the root directory is already in the set, we
1640 * update the inotify logic, and renumerate the directory entries. This call may hence be called to initially
1641 * populate the set, as well as to update it later. */
1642
1643 if (p) {
1644 /* If there's a path specified, use it. */
1645
1646 log_debug("Considering root directory '%s'.", p);
1647
1648 if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
1649 !path_has_prefix(j, p, "/run"))
1650 return -EINVAL;
1651
1652 if (j->prefix)
1653 p = strjoina(j->prefix, p);
1654
1655 r = directory_open(j, p, &d);
1656 if (r == -ENOENT && missing_ok)
1657 return 0;
1658 if (r < 0) {
1659 log_debug_errno(r, "Failed to open root directory %s: %m", p);
1660 goto fail;
1661 }
1662 } else {
1663 int dfd;
1664
1665 /* If there's no path specified, then we use the top-level fd itself. We duplicate the fd here, since
1666 * opendir() will take possession of the fd, and close it, which we don't want. */
1667
1668 p = "."; /* store this as "." in the directories hashmap */
1669
1670 dfd = fcntl(j->toplevel_fd, F_DUPFD_CLOEXEC, 3);
1671 if (dfd < 0) {
1672 r = -errno;
1673 goto fail;
1674 }
1675
1676 d = fdopendir(dfd);
1677 if (!d) {
1678 r = -errno;
1679 safe_close(dfd);
1680 goto fail;
1681 }
1682
1683 rewinddir(d);
1684 }
1685
1686 m = hashmap_get(j->directories_by_path, p);
1687 if (!m) {
1688 m = new0(Directory, 1);
1689 if (!m) {
1690 r = -ENOMEM;
1691 goto fail;
1692 }
1693
1694 m->is_root = true;
1695
1696 m->path = strdup(p);
1697 if (!m->path) {
1698 free(m);
1699 r = -ENOMEM;
1700 goto fail;
1701 }
1702
1703 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1704 free(m->path);
1705 free(m);
1706 r = -ENOMEM;
1707 goto fail;
1708 }
1709
1710 j->current_invalidate_counter++;
1711
1712 log_debug("Root directory %s added.", m->path);
1713
1714 } else if (!m->is_root)
1715 return 0;
1716
1717 directory_watch(j, m, dirfd(d),
1718 IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1719 IN_ONLYDIR);
1720
1721 if (!j->no_new_files)
1722 directory_enumerate(j, m, d);
1723
1724 check_network(j, dirfd(d));
1725
1726 return 0;
1727
1728 fail:
1729 k = journal_put_error(j, r, p);
1730 if (k < 0)
1731 return k;
1732
1733 return r;
1734 }
1735
1736 static void remove_directory(sd_journal *j, Directory *d) {
1737 assert(j);
1738
1739 if (d->wd > 0) {
1740 hashmap_remove(j->directories_by_wd, INT_TO_PTR(d->wd));
1741
1742 if (j->inotify_fd >= 0)
1743 inotify_rm_watch(j->inotify_fd, d->wd);
1744 }
1745
1746 hashmap_remove(j->directories_by_path, d->path);
1747
1748 if (d->is_root)
1749 log_debug("Root directory %s removed.", d->path);
1750 else
1751 log_debug("Directory %s removed.", d->path);
1752
1753 free(d->path);
1754 free(d);
1755 }
1756
1757 static int add_search_paths(sd_journal *j) {
1758
1759 static const char search_paths[] =
1760 "/run/log/journal\0"
1761 "/var/log/journal\0";
1762 const char *p;
1763
1764 assert(j);
1765
1766 /* We ignore most errors here, since the idea is to only open
1767 * what's actually accessible, and ignore the rest. */
1768
1769 NULSTR_FOREACH(p, search_paths)
1770 (void) add_root_directory(j, p, true);
1771
1772 if (!(j->flags & SD_JOURNAL_LOCAL_ONLY))
1773 (void) add_root_directory(j, "/var/log/journal/remote", true);
1774
1775 return 0;
1776 }
1777
1778 static int add_current_paths(sd_journal *j) {
1779 Iterator i;
1780 JournalFile *f;
1781
1782 assert(j);
1783 assert(j->no_new_files);
1784
1785 /* Simply adds all directories for files we have open as directories. We don't expect errors here, so we
1786 * treat them as fatal. */
1787
1788 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
1789 _cleanup_free_ char *dir;
1790 int r;
1791
1792 dir = dirname_malloc(f->path);
1793 if (!dir)
1794 return -ENOMEM;
1795
1796 r = add_directory(j, dir, NULL);
1797 if (r < 0)
1798 return r;
1799 }
1800
1801 return 0;
1802 }
1803
1804 static int allocate_inotify(sd_journal *j) {
1805 assert(j);
1806
1807 if (j->inotify_fd < 0) {
1808 j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1809 if (j->inotify_fd < 0)
1810 return -errno;
1811 }
1812
1813 return hashmap_ensure_allocated(&j->directories_by_wd, NULL);
1814 }
1815
1816 static sd_journal *journal_new(int flags, const char *path) {
1817 sd_journal *j;
1818
1819 j = new0(sd_journal, 1);
1820 if (!j)
1821 return NULL;
1822
1823 j->original_pid = getpid_cached();
1824 j->toplevel_fd = -1;
1825 j->inotify_fd = -1;
1826 j->flags = flags;
1827 j->data_threshold = DEFAULT_DATA_THRESHOLD;
1828
1829 if (path) {
1830 char *t;
1831
1832 t = strdup(path);
1833 if (!t)
1834 goto fail;
1835
1836 if (flags & SD_JOURNAL_OS_ROOT)
1837 j->prefix = t;
1838 else
1839 j->path = t;
1840 }
1841
1842 j->files = ordered_hashmap_new(&path_hash_ops);
1843 if (!j->files)
1844 goto fail;
1845
1846 j->files_cache = ordered_hashmap_iterated_cache_new(j->files);
1847 j->directories_by_path = hashmap_new(&path_hash_ops);
1848 j->mmap = mmap_cache_new();
1849 if (!j->files_cache || !j->directories_by_path || !j->mmap)
1850 goto fail;
1851
1852 return j;
1853
1854 fail:
1855 sd_journal_close(j);
1856 return NULL;
1857 }
1858
1859 #define OPEN_ALLOWED_FLAGS \
1860 (SD_JOURNAL_LOCAL_ONLY | \
1861 SD_JOURNAL_RUNTIME_ONLY | \
1862 SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)
1863
1864 _public_ int sd_journal_open(sd_journal **ret, int flags) {
1865 sd_journal *j;
1866 int r;
1867
1868 assert_return(ret, -EINVAL);
1869 assert_return((flags & ~OPEN_ALLOWED_FLAGS) == 0, -EINVAL);
1870
1871 j = journal_new(flags, NULL);
1872 if (!j)
1873 return -ENOMEM;
1874
1875 r = add_search_paths(j);
1876 if (r < 0)
1877 goto fail;
1878
1879 *ret = j;
1880 return 0;
1881
1882 fail:
1883 sd_journal_close(j);
1884
1885 return r;
1886 }
1887
1888 #define OPEN_CONTAINER_ALLOWED_FLAGS \
1889 (SD_JOURNAL_LOCAL_ONLY | SD_JOURNAL_SYSTEM)
1890
1891 _public_ int sd_journal_open_container(sd_journal **ret, const char *machine, int flags) {
1892 _cleanup_free_ char *root = NULL, *class = NULL;
1893 sd_journal *j;
1894 char *p;
1895 int r;
1896
1897 /* This is pretty much deprecated, people should use machined's OpenMachineRootDirectory() call instead in
1898 * combination with sd_journal_open_directory_fd(). */
1899
1900 assert_return(machine, -EINVAL);
1901 assert_return(ret, -EINVAL);
1902 assert_return((flags & ~OPEN_CONTAINER_ALLOWED_FLAGS) == 0, -EINVAL);
1903 assert_return(machine_name_is_valid(machine), -EINVAL);
1904
1905 p = strjoina("/run/systemd/machines/", machine);
1906 r = parse_env_file(p, NEWLINE, "ROOT", &root, "CLASS", &class, NULL);
1907 if (r == -ENOENT)
1908 return -EHOSTDOWN;
1909 if (r < 0)
1910 return r;
1911 if (!root)
1912 return -ENODATA;
1913
1914 if (!streq_ptr(class, "container"))
1915 return -EIO;
1916
1917 j = journal_new(flags, root);
1918 if (!j)
1919 return -ENOMEM;
1920
1921 r = add_search_paths(j);
1922 if (r < 0)
1923 goto fail;
1924
1925 *ret = j;
1926 return 0;
1927
1928 fail:
1929 sd_journal_close(j);
1930 return r;
1931 }
1932
1933 #define OPEN_DIRECTORY_ALLOWED_FLAGS \
1934 (SD_JOURNAL_OS_ROOT | \
1935 SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER )
1936
1937 _public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) {
1938 sd_journal *j;
1939 int r;
1940
1941 assert_return(ret, -EINVAL);
1942 assert_return(path, -EINVAL);
1943 assert_return((flags & ~OPEN_DIRECTORY_ALLOWED_FLAGS) == 0, -EINVAL);
1944
1945 j = journal_new(flags, path);
1946 if (!j)
1947 return -ENOMEM;
1948
1949 if (flags & SD_JOURNAL_OS_ROOT)
1950 r = add_search_paths(j);
1951 else
1952 r = add_root_directory(j, path, false);
1953 if (r < 0)
1954 goto fail;
1955
1956 *ret = j;
1957 return 0;
1958
1959 fail:
1960 sd_journal_close(j);
1961 return r;
1962 }
1963
1964 _public_ int sd_journal_open_files(sd_journal **ret, const char **paths, int flags) {
1965 sd_journal *j;
1966 const char **path;
1967 int r;
1968
1969 assert_return(ret, -EINVAL);
1970 assert_return(flags == 0, -EINVAL);
1971
1972 j = journal_new(flags, NULL);
1973 if (!j)
1974 return -ENOMEM;
1975
1976 STRV_FOREACH(path, paths) {
1977 r = add_any_file(j, -1, *path);
1978 if (r < 0)
1979 goto fail;
1980 }
1981
1982 j->no_new_files = true;
1983
1984 *ret = j;
1985 return 0;
1986
1987 fail:
1988 sd_journal_close(j);
1989 return r;
1990 }
1991
1992 #define OPEN_DIRECTORY_FD_ALLOWED_FLAGS \
1993 (SD_JOURNAL_OS_ROOT | \
1994 SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER )
1995
1996 _public_ int sd_journal_open_directory_fd(sd_journal **ret, int fd, int flags) {
1997 sd_journal *j;
1998 struct stat st;
1999 int r;
2000
2001 assert_return(ret, -EINVAL);
2002 assert_return(fd >= 0, -EBADF);
2003 assert_return((flags & ~OPEN_DIRECTORY_FD_ALLOWED_FLAGS) == 0, -EINVAL);
2004
2005 if (fstat(fd, &st) < 0)
2006 return -errno;
2007
2008 if (!S_ISDIR(st.st_mode))
2009 return -EBADFD;
2010
2011 j = journal_new(flags, NULL);
2012 if (!j)
2013 return -ENOMEM;
2014
2015 j->toplevel_fd = fd;
2016
2017 if (flags & SD_JOURNAL_OS_ROOT)
2018 r = add_search_paths(j);
2019 else
2020 r = add_root_directory(j, NULL, false);
2021 if (r < 0)
2022 goto fail;
2023
2024 *ret = j;
2025 return 0;
2026
2027 fail:
2028 sd_journal_close(j);
2029 return r;
2030 }
2031
2032 _public_ int sd_journal_open_files_fd(sd_journal **ret, int fds[], unsigned n_fds, int flags) {
2033 Iterator iterator;
2034 JournalFile *f;
2035 sd_journal *j;
2036 unsigned i;
2037 int r;
2038
2039 assert_return(ret, -EINVAL);
2040 assert_return(n_fds > 0, -EBADF);
2041 assert_return(flags == 0, -EINVAL);
2042
2043 j = journal_new(flags, NULL);
2044 if (!j)
2045 return -ENOMEM;
2046
2047 for (i = 0; i < n_fds; i++) {
2048 struct stat st;
2049
2050 if (fds[i] < 0) {
2051 r = -EBADF;
2052 goto fail;
2053 }
2054
2055 if (fstat(fds[i], &st) < 0) {
2056 r = -errno;
2057 goto fail;
2058 }
2059
2060 r = stat_verify_regular(&st);
2061 if (r < 0)
2062 goto fail;
2063
2064 r = add_any_file(j, fds[i], NULL);
2065 if (r < 0)
2066 goto fail;
2067 }
2068
2069 j->no_new_files = true;
2070 j->no_inotify = true;
2071
2072 *ret = j;
2073 return 0;
2074
2075 fail:
2076 /* If we fail, make sure we don't take possession of the files we managed to make use of successfully, and they
2077 * remain open */
2078 ORDERED_HASHMAP_FOREACH(f, j->files, iterator)
2079 f->close_fd = false;
2080
2081 sd_journal_close(j);
2082 return r;
2083 }
2084
2085 _public_ void sd_journal_close(sd_journal *j) {
2086 Directory *d;
2087
2088 if (!j)
2089 return;
2090
2091 sd_journal_flush_matches(j);
2092
2093 ordered_hashmap_free_with_destructor(j->files, journal_file_close);
2094 iterated_cache_free(j->files_cache);
2095
2096 while ((d = hashmap_first(j->directories_by_path)))
2097 remove_directory(j, d);
2098
2099 while ((d = hashmap_first(j->directories_by_wd)))
2100 remove_directory(j, d);
2101
2102 hashmap_free(j->directories_by_path);
2103 hashmap_free(j->directories_by_wd);
2104
2105 safe_close(j->inotify_fd);
2106
2107 if (j->mmap) {
2108 log_debug("mmap cache statistics: %u hit, %u miss", mmap_cache_get_hit(j->mmap), mmap_cache_get_missed(j->mmap));
2109 mmap_cache_unref(j->mmap);
2110 }
2111
2112 hashmap_free_free(j->errors);
2113
2114 free(j->path);
2115 free(j->prefix);
2116 free(j->unique_field);
2117 free(j->fields_buffer);
2118 free(j);
2119 }
2120
2121 _public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) {
2122 Object *o;
2123 JournalFile *f;
2124 int r;
2125
2126 assert_return(j, -EINVAL);
2127 assert_return(!journal_pid_changed(j), -ECHILD);
2128 assert_return(ret, -EINVAL);
2129
2130 f = j->current_file;
2131 if (!f)
2132 return -EADDRNOTAVAIL;
2133
2134 if (f->current_offset <= 0)
2135 return -EADDRNOTAVAIL;
2136
2137 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2138 if (r < 0)
2139 return r;
2140
2141 *ret = le64toh(o->entry.realtime);
2142 return 0;
2143 }
2144
2145 _public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) {
2146 Object *o;
2147 JournalFile *f;
2148 int r;
2149 sd_id128_t id;
2150
2151 assert_return(j, -EINVAL);
2152 assert_return(!journal_pid_changed(j), -ECHILD);
2153
2154 f = j->current_file;
2155 if (!f)
2156 return -EADDRNOTAVAIL;
2157
2158 if (f->current_offset <= 0)
2159 return -EADDRNOTAVAIL;
2160
2161 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2162 if (r < 0)
2163 return r;
2164
2165 if (ret_boot_id)
2166 *ret_boot_id = o->entry.boot_id;
2167 else {
2168 r = sd_id128_get_boot(&id);
2169 if (r < 0)
2170 return r;
2171
2172 if (!sd_id128_equal(id, o->entry.boot_id))
2173 return -ESTALE;
2174 }
2175
2176 if (ret)
2177 *ret = le64toh(o->entry.monotonic);
2178
2179 return 0;
2180 }
2181
2182 static bool field_is_valid(const char *field) {
2183 const char *p;
2184
2185 assert(field);
2186
2187 if (isempty(field))
2188 return false;
2189
2190 if (startswith(field, "__"))
2191 return false;
2192
2193 for (p = field; *p; p++) {
2194
2195 if (*p == '_')
2196 continue;
2197
2198 if (*p >= 'A' && *p <= 'Z')
2199 continue;
2200
2201 if (*p >= '0' && *p <= '9')
2202 continue;
2203
2204 return false;
2205 }
2206
2207 return true;
2208 }
2209
2210 _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) {
2211 JournalFile *f;
2212 uint64_t i, n;
2213 size_t field_length;
2214 int r;
2215 Object *o;
2216
2217 assert_return(j, -EINVAL);
2218 assert_return(!journal_pid_changed(j), -ECHILD);
2219 assert_return(field, -EINVAL);
2220 assert_return(data, -EINVAL);
2221 assert_return(size, -EINVAL);
2222 assert_return(field_is_valid(field), -EINVAL);
2223
2224 f = j->current_file;
2225 if (!f)
2226 return -EADDRNOTAVAIL;
2227
2228 if (f->current_offset <= 0)
2229 return -EADDRNOTAVAIL;
2230
2231 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2232 if (r < 0)
2233 return r;
2234
2235 field_length = strlen(field);
2236
2237 n = journal_file_entry_n_items(o);
2238 for (i = 0; i < n; i++) {
2239 uint64_t p, l;
2240 le64_t le_hash;
2241 size_t t;
2242 int compression;
2243
2244 p = le64toh(o->entry.items[i].object_offset);
2245 le_hash = o->entry.items[i].hash;
2246 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2247 if (r < 0)
2248 return r;
2249
2250 if (le_hash != o->data.hash)
2251 return -EBADMSG;
2252
2253 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2254
2255 compression = o->object.flags & OBJECT_COMPRESSION_MASK;
2256 if (compression) {
2257 #if HAVE_XZ || HAVE_LZ4
2258 r = decompress_startswith(compression,
2259 o->data.payload, l,
2260 &f->compress_buffer, &f->compress_buffer_size,
2261 field, field_length, '=');
2262 if (r < 0)
2263 log_debug_errno(r, "Cannot decompress %s object of length %"PRIu64" at offset "OFSfmt": %m",
2264 object_compressed_to_string(compression), l, p);
2265 else if (r > 0) {
2266
2267 size_t rsize;
2268
2269 r = decompress_blob(compression,
2270 o->data.payload, l,
2271 &f->compress_buffer, &f->compress_buffer_size, &rsize,
2272 j->data_threshold);
2273 if (r < 0)
2274 return r;
2275
2276 *data = f->compress_buffer;
2277 *size = (size_t) rsize;
2278
2279 return 0;
2280 }
2281 #else
2282 return -EPROTONOSUPPORT;
2283 #endif
2284 } else if (l >= field_length+1 &&
2285 memcmp(o->data.payload, field, field_length) == 0 &&
2286 o->data.payload[field_length] == '=') {
2287
2288 t = (size_t) l;
2289
2290 if ((uint64_t) t != l)
2291 return -E2BIG;
2292
2293 *data = o->data.payload;
2294 *size = t;
2295
2296 return 0;
2297 }
2298
2299 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2300 if (r < 0)
2301 return r;
2302 }
2303
2304 return -ENOENT;
2305 }
2306
2307 static int return_data(sd_journal *j, JournalFile *f, Object *o, const void **data, size_t *size) {
2308 size_t t;
2309 uint64_t l;
2310 int compression;
2311
2312 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2313 t = (size_t) l;
2314
2315 /* We can't read objects larger than 4G on a 32bit machine */
2316 if ((uint64_t) t != l)
2317 return -E2BIG;
2318
2319 compression = o->object.flags & OBJECT_COMPRESSION_MASK;
2320 if (compression) {
2321 #if HAVE_XZ || HAVE_LZ4
2322 size_t rsize;
2323 int r;
2324
2325 r = decompress_blob(compression,
2326 o->data.payload, l, &f->compress_buffer,
2327 &f->compress_buffer_size, &rsize, j->data_threshold);
2328 if (r < 0)
2329 return r;
2330
2331 *data = f->compress_buffer;
2332 *size = (size_t) rsize;
2333 #else
2334 return -EPROTONOSUPPORT;
2335 #endif
2336 } else {
2337 *data = o->data.payload;
2338 *size = t;
2339 }
2340
2341 return 0;
2342 }
2343
2344 _public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) {
2345 JournalFile *f;
2346 uint64_t p, n;
2347 le64_t le_hash;
2348 int r;
2349 Object *o;
2350
2351 assert_return(j, -EINVAL);
2352 assert_return(!journal_pid_changed(j), -ECHILD);
2353 assert_return(data, -EINVAL);
2354 assert_return(size, -EINVAL);
2355
2356 f = j->current_file;
2357 if (!f)
2358 return -EADDRNOTAVAIL;
2359
2360 if (f->current_offset <= 0)
2361 return -EADDRNOTAVAIL;
2362
2363 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2364 if (r < 0)
2365 return r;
2366
2367 n = journal_file_entry_n_items(o);
2368 if (j->current_field >= n)
2369 return 0;
2370
2371 p = le64toh(o->entry.items[j->current_field].object_offset);
2372 le_hash = o->entry.items[j->current_field].hash;
2373 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2374 if (r < 0)
2375 return r;
2376
2377 if (le_hash != o->data.hash)
2378 return -EBADMSG;
2379
2380 r = return_data(j, f, o, data, size);
2381 if (r < 0)
2382 return r;
2383
2384 j->current_field++;
2385
2386 return 1;
2387 }
2388
2389 _public_ void sd_journal_restart_data(sd_journal *j) {
2390 if (!j)
2391 return;
2392
2393 j->current_field = 0;
2394 }
2395
2396 static int reiterate_all_paths(sd_journal *j) {
2397 assert(j);
2398
2399 if (j->no_new_files)
2400 return add_current_paths(j);
2401
2402 if (j->flags & SD_JOURNAL_OS_ROOT)
2403 return add_search_paths(j);
2404
2405 if (j->toplevel_fd >= 0)
2406 return add_root_directory(j, NULL, false);
2407
2408 if (j->path)
2409 return add_root_directory(j, j->path, true);
2410
2411 return add_search_paths(j);
2412 }
2413
2414 _public_ int sd_journal_get_fd(sd_journal *j) {
2415 int r;
2416
2417 assert_return(j, -EINVAL);
2418 assert_return(!journal_pid_changed(j), -ECHILD);
2419
2420 if (j->no_inotify)
2421 return -EMEDIUMTYPE;
2422
2423 if (j->inotify_fd >= 0)
2424 return j->inotify_fd;
2425
2426 r = allocate_inotify(j);
2427 if (r < 0)
2428 return r;
2429
2430 log_debug("Reiterating files to get inotify watches established.");
2431
2432 /* Iterate through all dirs again, to add them to the inotify */
2433 r = reiterate_all_paths(j);
2434 if (r < 0)
2435 return r;
2436
2437 return j->inotify_fd;
2438 }
2439
2440 _public_ int sd_journal_get_events(sd_journal *j) {
2441 int fd;
2442
2443 assert_return(j, -EINVAL);
2444 assert_return(!journal_pid_changed(j), -ECHILD);
2445
2446 fd = sd_journal_get_fd(j);
2447 if (fd < 0)
2448 return fd;
2449
2450 return POLLIN;
2451 }
2452
2453 _public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) {
2454 int fd;
2455
2456 assert_return(j, -EINVAL);
2457 assert_return(!journal_pid_changed(j), -ECHILD);
2458 assert_return(timeout_usec, -EINVAL);
2459
2460 fd = sd_journal_get_fd(j);
2461 if (fd < 0)
2462 return fd;
2463
2464 if (!j->on_network) {
2465 *timeout_usec = (uint64_t) -1;
2466 return 0;
2467 }
2468
2469 /* If we are on the network we need to regularly check for
2470 * changes manually */
2471
2472 *timeout_usec = j->last_process_usec + JOURNAL_FILES_RECHECK_USEC;
2473 return 1;
2474 }
2475
2476 static void process_q_overflow(sd_journal *j) {
2477 JournalFile *f;
2478 Directory *m;
2479 Iterator i;
2480
2481 assert(j);
2482
2483 /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list
2484 * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all
2485 * journal files we encounter. All journal files and all directories that don't carry it after reenumeration
2486 * are subject for unloading. */
2487
2488 log_debug("Inotify queue overrun, reiterating everything.");
2489
2490 j->generation++;
2491 (void) reiterate_all_paths(j);
2492
2493 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2494
2495 if (f->last_seen_generation == j->generation)
2496 continue;
2497
2498 log_debug("File '%s' hasn't been seen in this enumeration, removing.", f->path);
2499 remove_file_real(j, f);
2500 }
2501
2502 HASHMAP_FOREACH(m, j->directories_by_path, i) {
2503
2504 if (m->last_seen_generation == j->generation)
2505 continue;
2506
2507 if (m->is_root) /* Never GC root directories */
2508 continue;
2509
2510 log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f->path);
2511 remove_directory(j, m);
2512 }
2513
2514 log_debug("Reiteration complete.");
2515 }
2516
2517 static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
2518 Directory *d;
2519
2520 assert(j);
2521 assert(e);
2522
2523 if (e->mask & IN_Q_OVERFLOW) {
2524 process_q_overflow(j);
2525 return;
2526 }
2527
2528 /* Is this a subdirectory we watch? */
2529 d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
2530 if (d) {
2531 if (!(e->mask & IN_ISDIR) && e->len > 0 &&
2532 (endswith(e->name, ".journal") ||
2533 endswith(e->name, ".journal~"))) {
2534
2535 /* Event for a journal file */
2536
2537 if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB))
2538 (void) add_file_by_name(j, d->path, e->name);
2539 else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT))
2540 remove_file_by_name(j, d->path, e->name);
2541
2542 } else if (!d->is_root && e->len == 0) {
2543
2544 /* Event for a subdirectory */
2545
2546 if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT))
2547 remove_directory(j, d);
2548
2549 } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && id128_is_valid(e->name)) {
2550
2551 /* Event for root directory */
2552
2553 if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB))
2554 (void) add_directory(j, d->path, e->name);
2555 }
2556
2557 return;
2558 }
2559
2560 if (e->mask & IN_IGNORED)
2561 return;
2562
2563 log_debug("Unexpected inotify event.");
2564 }
2565
2566 static int determine_change(sd_journal *j) {
2567 bool b;
2568
2569 assert(j);
2570
2571 b = j->current_invalidate_counter != j->last_invalidate_counter;
2572 j->last_invalidate_counter = j->current_invalidate_counter;
2573
2574 return b ? SD_JOURNAL_INVALIDATE : SD_JOURNAL_APPEND;
2575 }
2576
2577 _public_ int sd_journal_process(sd_journal *j) {
2578 bool got_something = false;
2579
2580 assert_return(j, -EINVAL);
2581 assert_return(!journal_pid_changed(j), -ECHILD);
2582
2583 if (j->inotify_fd < 0) /* We have no inotify fd yet? Then there's noting to process. */
2584 return 0;
2585
2586 j->last_process_usec = now(CLOCK_MONOTONIC);
2587 j->last_invalidate_counter = j->current_invalidate_counter;
2588
2589 for (;;) {
2590 union inotify_event_buffer buffer;
2591 struct inotify_event *e;
2592 ssize_t l;
2593
2594 l = read(j->inotify_fd, &buffer, sizeof(buffer));
2595 if (l < 0) {
2596 if (IN_SET(errno, EAGAIN, EINTR))
2597 return got_something ? determine_change(j) : SD_JOURNAL_NOP;
2598
2599 return -errno;
2600 }
2601
2602 got_something = true;
2603
2604 FOREACH_INOTIFY_EVENT(e, buffer, l)
2605 process_inotify_event(j, e);
2606 }
2607 }
2608
2609 _public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) {
2610 int r;
2611 uint64_t t;
2612
2613 assert_return(j, -EINVAL);
2614 assert_return(!journal_pid_changed(j), -ECHILD);
2615
2616 if (j->inotify_fd < 0) {
2617
2618 /* This is the first invocation, hence create the
2619 * inotify watch */
2620 r = sd_journal_get_fd(j);
2621 if (r < 0)
2622 return r;
2623
2624 /* The journal might have changed since the context
2625 * object was created and we weren't watching before,
2626 * hence don't wait for anything, and return
2627 * immediately. */
2628 return determine_change(j);
2629 }
2630
2631 r = sd_journal_get_timeout(j, &t);
2632 if (r < 0)
2633 return r;
2634
2635 if (t != (uint64_t) -1) {
2636 usec_t n;
2637
2638 n = now(CLOCK_MONOTONIC);
2639 t = t > n ? t - n : 0;
2640
2641 if (timeout_usec == (uint64_t) -1 || timeout_usec > t)
2642 timeout_usec = t;
2643 }
2644
2645 do {
2646 r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec);
2647 } while (r == -EINTR);
2648
2649 if (r < 0)
2650 return r;
2651
2652 return sd_journal_process(j);
2653 }
2654
2655 _public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) {
2656 Iterator i;
2657 JournalFile *f;
2658 bool first = true;
2659 uint64_t fmin = 0, tmax = 0;
2660 int r;
2661
2662 assert_return(j, -EINVAL);
2663 assert_return(!journal_pid_changed(j), -ECHILD);
2664 assert_return(from || to, -EINVAL);
2665 assert_return(from != to, -EINVAL);
2666
2667 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2668 usec_t fr, t;
2669
2670 r = journal_file_get_cutoff_realtime_usec(f, &fr, &t);
2671 if (r == -ENOENT)
2672 continue;
2673 if (r < 0)
2674 return r;
2675 if (r == 0)
2676 continue;
2677
2678 if (first) {
2679 fmin = fr;
2680 tmax = t;
2681 first = false;
2682 } else {
2683 fmin = MIN(fr, fmin);
2684 tmax = MAX(t, tmax);
2685 }
2686 }
2687
2688 if (from)
2689 *from = fmin;
2690 if (to)
2691 *to = tmax;
2692
2693 return first ? 0 : 1;
2694 }
2695
2696 _public_ int sd_journal_get_cutoff_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t *from, uint64_t *to) {
2697 Iterator i;
2698 JournalFile *f;
2699 bool found = false;
2700 int r;
2701
2702 assert_return(j, -EINVAL);
2703 assert_return(!journal_pid_changed(j), -ECHILD);
2704 assert_return(from || to, -EINVAL);
2705 assert_return(from != to, -EINVAL);
2706
2707 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2708 usec_t fr, t;
2709
2710 r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &fr, &t);
2711 if (r == -ENOENT)
2712 continue;
2713 if (r < 0)
2714 return r;
2715 if (r == 0)
2716 continue;
2717
2718 if (found) {
2719 if (from)
2720 *from = MIN(fr, *from);
2721 if (to)
2722 *to = MAX(t, *to);
2723 } else {
2724 if (from)
2725 *from = fr;
2726 if (to)
2727 *to = t;
2728 found = true;
2729 }
2730 }
2731
2732 return found;
2733 }
2734
2735 void journal_print_header(sd_journal *j) {
2736 Iterator i;
2737 JournalFile *f;
2738 bool newline = false;
2739
2740 assert(j);
2741
2742 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2743 if (newline)
2744 putchar('\n');
2745 else
2746 newline = true;
2747
2748 journal_file_print_header(f);
2749 }
2750 }
2751
2752 _public_ int sd_journal_get_usage(sd_journal *j, uint64_t *bytes) {
2753 Iterator i;
2754 JournalFile *f;
2755 uint64_t sum = 0;
2756
2757 assert_return(j, -EINVAL);
2758 assert_return(!journal_pid_changed(j), -ECHILD);
2759 assert_return(bytes, -EINVAL);
2760
2761 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2762 struct stat st;
2763
2764 if (fstat(f->fd, &st) < 0)
2765 return -errno;
2766
2767 sum += (uint64_t) st.st_blocks * 512ULL;
2768 }
2769
2770 *bytes = sum;
2771 return 0;
2772 }
2773
2774 _public_ int sd_journal_query_unique(sd_journal *j, const char *field) {
2775 char *f;
2776
2777 assert_return(j, -EINVAL);
2778 assert_return(!journal_pid_changed(j), -ECHILD);
2779 assert_return(!isempty(field), -EINVAL);
2780 assert_return(field_is_valid(field), -EINVAL);
2781
2782 f = strdup(field);
2783 if (!f)
2784 return -ENOMEM;
2785
2786 free(j->unique_field);
2787 j->unique_field = f;
2788 j->unique_file = NULL;
2789 j->unique_offset = 0;
2790 j->unique_file_lost = false;
2791
2792 return 0;
2793 }
2794
2795 _public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l) {
2796 size_t k;
2797
2798 assert_return(j, -EINVAL);
2799 assert_return(!journal_pid_changed(j), -ECHILD);
2800 assert_return(data, -EINVAL);
2801 assert_return(l, -EINVAL);
2802 assert_return(j->unique_field, -EINVAL);
2803
2804 k = strlen(j->unique_field);
2805
2806 if (!j->unique_file) {
2807 if (j->unique_file_lost)
2808 return 0;
2809
2810 j->unique_file = ordered_hashmap_first(j->files);
2811 if (!j->unique_file)
2812 return 0;
2813
2814 j->unique_offset = 0;
2815 }
2816
2817 for (;;) {
2818 JournalFile *of;
2819 Iterator i;
2820 Object *o;
2821 const void *odata;
2822 size_t ol;
2823 bool found;
2824 int r;
2825
2826 /* Proceed to next data object in the field's linked list */
2827 if (j->unique_offset == 0) {
2828 r = journal_file_find_field_object(j->unique_file, j->unique_field, k, &o, NULL);
2829 if (r < 0)
2830 return r;
2831
2832 j->unique_offset = r > 0 ? le64toh(o->field.head_data_offset) : 0;
2833 } else {
2834 r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
2835 if (r < 0)
2836 return r;
2837
2838 j->unique_offset = le64toh(o->data.next_field_offset);
2839 }
2840
2841 /* We reached the end of the list? Then start again, with the next file */
2842 if (j->unique_offset == 0) {
2843 j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
2844 if (!j->unique_file)
2845 return 0;
2846
2847 continue;
2848 }
2849
2850 /* We do not use OBJECT_DATA context here, but OBJECT_UNUSED
2851 * instead, so that we can look at this data object at the same
2852 * time as one on another file */
2853 r = journal_file_move_to_object(j->unique_file, OBJECT_UNUSED, j->unique_offset, &o);
2854 if (r < 0)
2855 return r;
2856
2857 /* Let's do the type check by hand, since we used 0 context above. */
2858 if (o->object.type != OBJECT_DATA) {
2859 log_debug("%s:offset " OFSfmt ": object has type %d, expected %d",
2860 j->unique_file->path, j->unique_offset,
2861 o->object.type, OBJECT_DATA);
2862 return -EBADMSG;
2863 }
2864
2865 r = return_data(j, j->unique_file, o, &odata, &ol);
2866 if (r < 0)
2867 return r;
2868
2869 /* Check if we have at least the field name and "=". */
2870 if (ol <= k) {
2871 log_debug("%s:offset " OFSfmt ": object has size %zu, expected at least %zu",
2872 j->unique_file->path, j->unique_offset,
2873 ol, k + 1);
2874 return -EBADMSG;
2875 }
2876
2877 if (memcmp(odata, j->unique_field, k) || ((const char*) odata)[k] != '=') {
2878 log_debug("%s:offset " OFSfmt ": object does not start with \"%s=\"",
2879 j->unique_file->path, j->unique_offset,
2880 j->unique_field);
2881 return -EBADMSG;
2882 }
2883
2884 /* OK, now let's see if we already returned this data
2885 * object by checking if it exists in the earlier
2886 * traversed files. */
2887 found = false;
2888 ORDERED_HASHMAP_FOREACH(of, j->files, i) {
2889 if (of == j->unique_file)
2890 break;
2891
2892 /* Skip this file it didn't have any fields indexed */
2893 if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
2894 continue;
2895
2896 r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), NULL, NULL);
2897 if (r < 0)
2898 return r;
2899 if (r > 0) {
2900 found = true;
2901 break;
2902 }
2903 }
2904
2905 if (found)
2906 continue;
2907
2908 r = return_data(j, j->unique_file, o, data, l);
2909 if (r < 0)
2910 return r;
2911
2912 return 1;
2913 }
2914 }
2915
2916 _public_ void sd_journal_restart_unique(sd_journal *j) {
2917 if (!j)
2918 return;
2919
2920 j->unique_file = NULL;
2921 j->unique_offset = 0;
2922 j->unique_file_lost = false;
2923 }
2924
2925 _public_ int sd_journal_enumerate_fields(sd_journal *j, const char **field) {
2926 int r;
2927
2928 assert_return(j, -EINVAL);
2929 assert_return(!journal_pid_changed(j), -ECHILD);
2930 assert_return(field, -EINVAL);
2931
2932 if (!j->fields_file) {
2933 if (j->fields_file_lost)
2934 return 0;
2935
2936 j->fields_file = ordered_hashmap_first(j->files);
2937 if (!j->fields_file)
2938 return 0;
2939
2940 j->fields_hash_table_index = 0;
2941 j->fields_offset = 0;
2942 }
2943
2944 for (;;) {
2945 JournalFile *f, *of;
2946 Iterator i;
2947 uint64_t m;
2948 Object *o;
2949 size_t sz;
2950 bool found;
2951
2952 f = j->fields_file;
2953
2954 if (j->fields_offset == 0) {
2955 bool eof = false;
2956
2957 /* We are not yet positioned at any field. Let's pick the first one */
2958 r = journal_file_map_field_hash_table(f);
2959 if (r < 0)
2960 return r;
2961
2962 m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
2963 for (;;) {
2964 if (j->fields_hash_table_index >= m) {
2965 /* Reached the end of the hash table, go to the next file. */
2966 eof = true;
2967 break;
2968 }
2969
2970 j->fields_offset = le64toh(f->field_hash_table[j->fields_hash_table_index].head_hash_offset);
2971
2972 if (j->fields_offset != 0)
2973 break;
2974
2975 /* Empty hash table bucket, go to next one */
2976 j->fields_hash_table_index++;
2977 }
2978
2979 if (eof) {
2980 /* Proceed with next file */
2981 j->fields_file = ordered_hashmap_next(j->files, f->path);
2982 if (!j->fields_file) {
2983 *field = NULL;
2984 return 0;
2985 }
2986
2987 j->fields_offset = 0;
2988 j->fields_hash_table_index = 0;
2989 continue;
2990 }
2991
2992 } else {
2993 /* We are already positioned at a field. If so, let's figure out the next field from it */
2994
2995 r = journal_file_move_to_object(f, OBJECT_FIELD, j->fields_offset, &o);
2996 if (r < 0)
2997 return r;
2998
2999 j->fields_offset = le64toh(o->field.next_hash_offset);
3000 if (j->fields_offset == 0) {
3001 /* Reached the end of the hash table chain */
3002 j->fields_hash_table_index++;
3003 continue;
3004 }
3005 }
3006
3007 /* We use OBJECT_UNUSED here, so that the iterator below doesn't remove our mmap window */
3008 r = journal_file_move_to_object(f, OBJECT_UNUSED, j->fields_offset, &o);
3009 if (r < 0)
3010 return r;
3011
3012 /* Because we used OBJECT_UNUSED above, we need to do our type check manually */
3013 if (o->object.type != OBJECT_FIELD) {
3014 log_debug("%s:offset " OFSfmt ": object has type %i, expected %i", f->path, j->fields_offset, o->object.type, OBJECT_FIELD);
3015 return -EBADMSG;
3016 }
3017
3018 sz = le64toh(o->object.size) - offsetof(Object, field.payload);
3019
3020 /* Let's see if we already returned this field name before. */
3021 found = false;
3022 ORDERED_HASHMAP_FOREACH(of, j->files, i) {
3023 if (of == f)
3024 break;
3025
3026 /* Skip this file it didn't have any fields indexed */
3027 if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
3028 continue;
3029
3030 r = journal_file_find_field_object_with_hash(of, o->field.payload, sz, le64toh(o->field.hash), NULL, NULL);
3031 if (r < 0)
3032 return r;
3033 if (r > 0) {
3034 found = true;
3035 break;
3036 }
3037 }
3038
3039 if (found)
3040 continue;
3041
3042 /* Check if this is really a valid string containing no NUL byte */
3043 if (memchr(o->field.payload, 0, sz))
3044 return -EBADMSG;
3045
3046 if (sz > j->data_threshold)
3047 sz = j->data_threshold;
3048
3049 if (!GREEDY_REALLOC(j->fields_buffer, j->fields_buffer_allocated, sz + 1))
3050 return -ENOMEM;
3051
3052 memcpy(j->fields_buffer, o->field.payload, sz);
3053 j->fields_buffer[sz] = 0;
3054
3055 if (!field_is_valid(j->fields_buffer))
3056 return -EBADMSG;
3057
3058 *field = j->fields_buffer;
3059 return 1;
3060 }
3061 }
3062
3063 _public_ void sd_journal_restart_fields(sd_journal *j) {
3064 if (!j)
3065 return;
3066
3067 j->fields_file = NULL;
3068 j->fields_hash_table_index = 0;
3069 j->fields_offset = 0;
3070 j->fields_file_lost = false;
3071 }
3072
3073 _public_ int sd_journal_reliable_fd(sd_journal *j) {
3074 assert_return(j, -EINVAL);
3075 assert_return(!journal_pid_changed(j), -ECHILD);
3076
3077 return !j->on_network;
3078 }
3079
3080 static char *lookup_field(const char *field, void *userdata) {
3081 sd_journal *j = userdata;
3082 const void *data;
3083 size_t size, d;
3084 int r;
3085
3086 assert(field);
3087 assert(j);
3088
3089 r = sd_journal_get_data(j, field, &data, &size);
3090 if (r < 0 ||
3091 size > REPLACE_VAR_MAX)
3092 return strdup(field);
3093
3094 d = strlen(field) + 1;
3095
3096 return strndup((const char*) data + d, size - d);
3097 }
3098
3099 _public_ int sd_journal_get_catalog(sd_journal *j, char **ret) {
3100 const void *data;
3101 size_t size;
3102 sd_id128_t id;
3103 _cleanup_free_ char *text = NULL, *cid = NULL;
3104 char *t;
3105 int r;
3106
3107 assert_return(j, -EINVAL);
3108 assert_return(!journal_pid_changed(j), -ECHILD);
3109 assert_return(ret, -EINVAL);
3110
3111 r = sd_journal_get_data(j, "MESSAGE_ID", &data, &size);
3112 if (r < 0)
3113 return r;
3114
3115 cid = strndup((const char*) data + 11, size - 11);
3116 if (!cid)
3117 return -ENOMEM;
3118
3119 r = sd_id128_from_string(cid, &id);
3120 if (r < 0)
3121 return r;
3122
3123 r = catalog_get(CATALOG_DATABASE, id, &text);
3124 if (r < 0)
3125 return r;
3126
3127 t = replace_var(text, lookup_field, j);
3128 if (!t)
3129 return -ENOMEM;
3130
3131 *ret = t;
3132 return 0;
3133 }
3134
3135 _public_ int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **ret) {
3136 assert_return(ret, -EINVAL);
3137
3138 return catalog_get(CATALOG_DATABASE, id, ret);
3139 }
3140
3141 _public_ int sd_journal_set_data_threshold(sd_journal *j, size_t sz) {
3142 assert_return(j, -EINVAL);
3143 assert_return(!journal_pid_changed(j), -ECHILD);
3144
3145 j->data_threshold = sz;
3146 return 0;
3147 }
3148
3149 _public_ int sd_journal_get_data_threshold(sd_journal *j, size_t *sz) {
3150 assert_return(j, -EINVAL);
3151 assert_return(!journal_pid_changed(j), -ECHILD);
3152 assert_return(sz, -EINVAL);
3153
3154 *sz = j->data_threshold;
3155 return 0;
3156 }
3157
3158 _public_ int sd_journal_has_runtime_files(sd_journal *j) {
3159 assert_return(j, -EINVAL);
3160
3161 return j->has_runtime_files;
3162 }
3163
3164 _public_ int sd_journal_has_persistent_files(sd_journal *j) {
3165 assert_return(j, -EINVAL);
3166
3167 return j->has_persistent_files;
3168 }