]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/sd-journal.c
Merge pull request #8947 from yuwata/meson-0.44
[thirdparty/systemd.git] / src / journal / sd-journal.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2011 Lennart Poettering
6 ***/
7
8 #include <errno.h>
9 #include <fcntl.h>
10 #include <inttypes.h>
11 #include <linux/magic.h>
12 #include <poll.h>
13 #include <stddef.h>
14 #include <sys/inotify.h>
15 #include <sys/vfs.h>
16 #include <unistd.h>
17
18 #include "sd-journal.h"
19
20 #include "alloc-util.h"
21 #include "catalog.h"
22 #include "compress.h"
23 #include "dirent-util.h"
24 #include "fd-util.h"
25 #include "fileio.h"
26 #include "format-util.h"
27 #include "fs-util.h"
28 #include "hashmap.h"
29 #include "hostname-util.h"
30 #include "id128-util.h"
31 #include "io-util.h"
32 #include "journal-def.h"
33 #include "journal-file.h"
34 #include "journal-internal.h"
35 #include "list.h"
36 #include "lookup3.h"
37 #include "missing.h"
38 #include "path-util.h"
39 #include "process-util.h"
40 #include "replace-var.h"
41 #include "stat-util.h"
42 #include "stat-util.h"
43 #include "stdio-util.h"
44 #include "string-util.h"
45 #include "strv.h"
46
47 #define JOURNAL_FILES_MAX 7168
48
49 #define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
50
51 #define REPLACE_VAR_MAX 256
52
53 #define DEFAULT_DATA_THRESHOLD (64*1024)
54
55 static void remove_file_real(sd_journal *j, JournalFile *f);
56
57 static bool journal_pid_changed(sd_journal *j) {
58 assert(j);
59
60 /* We don't support people creating a journal object and
61 * keeping it around over a fork(). Let's complain. */
62
63 return j->original_pid != getpid_cached();
64 }
65
66 static int journal_put_error(sd_journal *j, int r, const char *path) {
67 char *copy;
68 int k;
69
70 /* Memorize an error we encountered, and store which
71 * file/directory it was generated from. Note that we store
72 * only *one* path per error code, as the error code is the
73 * key into the hashmap, and the path is the value. This means
74 * we keep track only of all error kinds, but not of all error
75 * locations. This has the benefit that the hashmap cannot
76 * grow beyond bounds.
77 *
78 * We return an error here only if we didn't manage to
79 * memorize the real error. */
80
81 if (r >= 0)
82 return r;
83
84 k = hashmap_ensure_allocated(&j->errors, NULL);
85 if (k < 0)
86 return k;
87
88 if (path) {
89 copy = strdup(path);
90 if (!copy)
91 return -ENOMEM;
92 } else
93 copy = NULL;
94
95 k = hashmap_put(j->errors, INT_TO_PTR(r), copy);
96 if (k < 0) {
97 free(copy);
98
99 if (k == -EEXIST)
100 return 0;
101
102 return k;
103 }
104
105 return 0;
106 }
107
108 static void detach_location(sd_journal *j) {
109 Iterator i;
110 JournalFile *f;
111
112 assert(j);
113
114 j->current_file = NULL;
115 j->current_field = 0;
116
117 ORDERED_HASHMAP_FOREACH(f, j->files, i)
118 journal_file_reset_location(f);
119 }
120
121 static void reset_location(sd_journal *j) {
122 assert(j);
123
124 detach_location(j);
125 zero(j->current_location);
126 }
127
128 static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) {
129 assert(l);
130 assert(IN_SET(type, LOCATION_DISCRETE, LOCATION_SEEK));
131 assert(f);
132 assert(o->object.type == OBJECT_ENTRY);
133
134 l->type = type;
135 l->seqnum = le64toh(o->entry.seqnum);
136 l->seqnum_id = f->header->seqnum_id;
137 l->realtime = le64toh(o->entry.realtime);
138 l->monotonic = le64toh(o->entry.monotonic);
139 l->boot_id = o->entry.boot_id;
140 l->xor_hash = le64toh(o->entry.xor_hash);
141
142 l->seqnum_set = l->realtime_set = l->monotonic_set = l->xor_hash_set = true;
143 }
144
145 static void set_location(sd_journal *j, JournalFile *f, Object *o) {
146 assert(j);
147 assert(f);
148 assert(o);
149
150 init_location(&j->current_location, LOCATION_DISCRETE, f, o);
151
152 j->current_file = f;
153 j->current_field = 0;
154
155 /* Let f know its candidate entry was picked. */
156 assert(f->location_type == LOCATION_SEEK);
157 f->location_type = LOCATION_DISCRETE;
158 }
159
160 static int match_is_valid(const void *data, size_t size) {
161 const char *b, *p;
162
163 assert(data);
164
165 if (size < 2)
166 return false;
167
168 if (startswith(data, "__"))
169 return false;
170
171 b = data;
172 for (p = b; p < b + size; p++) {
173
174 if (*p == '=')
175 return p > b;
176
177 if (*p == '_')
178 continue;
179
180 if (*p >= 'A' && *p <= 'Z')
181 continue;
182
183 if (*p >= '0' && *p <= '9')
184 continue;
185
186 return false;
187 }
188
189 return false;
190 }
191
192 static bool same_field(const void *_a, size_t s, const void *_b, size_t t) {
193 const uint8_t *a = _a, *b = _b;
194 size_t j;
195
196 for (j = 0; j < s && j < t; j++) {
197
198 if (a[j] != b[j])
199 return false;
200
201 if (a[j] == '=')
202 return true;
203 }
204
205 assert_not_reached("\"=\" not found");
206 }
207
208 static Match *match_new(Match *p, MatchType t) {
209 Match *m;
210
211 m = new0(Match, 1);
212 if (!m)
213 return NULL;
214
215 m->type = t;
216
217 if (p) {
218 m->parent = p;
219 LIST_PREPEND(matches, p->matches, m);
220 }
221
222 return m;
223 }
224
225 static void match_free(Match *m) {
226 assert(m);
227
228 while (m->matches)
229 match_free(m->matches);
230
231 if (m->parent)
232 LIST_REMOVE(matches, m->parent->matches, m);
233
234 free(m->data);
235 free(m);
236 }
237
238 static void match_free_if_empty(Match *m) {
239 if (!m || m->matches)
240 return;
241
242 match_free(m);
243 }
244
245 _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
246 Match *l3, *l4, *add_here = NULL, *m;
247 le64_t le_hash;
248
249 assert_return(j, -EINVAL);
250 assert_return(!journal_pid_changed(j), -ECHILD);
251 assert_return(data, -EINVAL);
252
253 if (size == 0)
254 size = strlen(data);
255
256 assert_return(match_is_valid(data, size), -EINVAL);
257
258 /* level 0: AND term
259 * level 1: OR terms
260 * level 2: AND terms
261 * level 3: OR terms
262 * level 4: concrete matches */
263
264 if (!j->level0) {
265 j->level0 = match_new(NULL, MATCH_AND_TERM);
266 if (!j->level0)
267 return -ENOMEM;
268 }
269
270 if (!j->level1) {
271 j->level1 = match_new(j->level0, MATCH_OR_TERM);
272 if (!j->level1)
273 return -ENOMEM;
274 }
275
276 if (!j->level2) {
277 j->level2 = match_new(j->level1, MATCH_AND_TERM);
278 if (!j->level2)
279 return -ENOMEM;
280 }
281
282 assert(j->level0->type == MATCH_AND_TERM);
283 assert(j->level1->type == MATCH_OR_TERM);
284 assert(j->level2->type == MATCH_AND_TERM);
285
286 le_hash = htole64(hash64(data, size));
287
288 LIST_FOREACH(matches, l3, j->level2->matches) {
289 assert(l3->type == MATCH_OR_TERM);
290
291 LIST_FOREACH(matches, l4, l3->matches) {
292 assert(l4->type == MATCH_DISCRETE);
293
294 /* Exactly the same match already? Then ignore
295 * this addition */
296 if (l4->le_hash == le_hash &&
297 l4->size == size &&
298 memcmp(l4->data, data, size) == 0)
299 return 0;
300
301 /* Same field? Then let's add this to this OR term */
302 if (same_field(data, size, l4->data, l4->size)) {
303 add_here = l3;
304 break;
305 }
306 }
307
308 if (add_here)
309 break;
310 }
311
312 if (!add_here) {
313 add_here = match_new(j->level2, MATCH_OR_TERM);
314 if (!add_here)
315 goto fail;
316 }
317
318 m = match_new(add_here, MATCH_DISCRETE);
319 if (!m)
320 goto fail;
321
322 m->le_hash = le_hash;
323 m->size = size;
324 m->data = memdup(data, size);
325 if (!m->data)
326 goto fail;
327
328 detach_location(j);
329
330 return 0;
331
332 fail:
333 match_free_if_empty(add_here);
334 match_free_if_empty(j->level2);
335 match_free_if_empty(j->level1);
336 match_free_if_empty(j->level0);
337
338 return -ENOMEM;
339 }
340
341 _public_ int sd_journal_add_conjunction(sd_journal *j) {
342 assert_return(j, -EINVAL);
343 assert_return(!journal_pid_changed(j), -ECHILD);
344
345 if (!j->level0)
346 return 0;
347
348 if (!j->level1)
349 return 0;
350
351 if (!j->level1->matches)
352 return 0;
353
354 j->level1 = NULL;
355 j->level2 = NULL;
356
357 return 0;
358 }
359
360 _public_ int sd_journal_add_disjunction(sd_journal *j) {
361 assert_return(j, -EINVAL);
362 assert_return(!journal_pid_changed(j), -ECHILD);
363
364 if (!j->level0)
365 return 0;
366
367 if (!j->level1)
368 return 0;
369
370 if (!j->level2)
371 return 0;
372
373 if (!j->level2->matches)
374 return 0;
375
376 j->level2 = NULL;
377 return 0;
378 }
379
380 static char *match_make_string(Match *m) {
381 char *p = NULL, *r;
382 Match *i;
383 bool enclose = false;
384
385 if (!m)
386 return strdup("none");
387
388 if (m->type == MATCH_DISCRETE)
389 return strndup(m->data, m->size);
390
391 LIST_FOREACH(matches, i, m->matches) {
392 char *t, *k;
393
394 t = match_make_string(i);
395 if (!t)
396 return mfree(p);
397
398 if (p) {
399 k = strjoin(p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t);
400 free(p);
401 free(t);
402
403 if (!k)
404 return NULL;
405
406 p = k;
407
408 enclose = true;
409 } else
410 p = t;
411 }
412
413 if (enclose) {
414 r = strjoin("(", p, ")");
415 free(p);
416 return r;
417 }
418
419 return p;
420 }
421
422 char *journal_make_match_string(sd_journal *j) {
423 assert(j);
424
425 return match_make_string(j->level0);
426 }
427
428 _public_ void sd_journal_flush_matches(sd_journal *j) {
429 if (!j)
430 return;
431
432 if (j->level0)
433 match_free(j->level0);
434
435 j->level0 = j->level1 = j->level2 = NULL;
436
437 detach_location(j);
438 }
439
440 _pure_ static int compare_with_location(JournalFile *f, Location *l) {
441 assert(f);
442 assert(l);
443 assert(f->location_type == LOCATION_SEEK);
444 assert(IN_SET(l->type, LOCATION_DISCRETE, LOCATION_SEEK));
445
446 if (l->monotonic_set &&
447 sd_id128_equal(f->current_boot_id, l->boot_id) &&
448 l->realtime_set &&
449 f->current_realtime == l->realtime &&
450 l->xor_hash_set &&
451 f->current_xor_hash == l->xor_hash)
452 return 0;
453
454 if (l->seqnum_set &&
455 sd_id128_equal(f->header->seqnum_id, l->seqnum_id)) {
456
457 if (f->current_seqnum < l->seqnum)
458 return -1;
459 if (f->current_seqnum > l->seqnum)
460 return 1;
461 }
462
463 if (l->monotonic_set &&
464 sd_id128_equal(f->current_boot_id, l->boot_id)) {
465
466 if (f->current_monotonic < l->monotonic)
467 return -1;
468 if (f->current_monotonic > l->monotonic)
469 return 1;
470 }
471
472 if (l->realtime_set) {
473
474 if (f->current_realtime < l->realtime)
475 return -1;
476 if (f->current_realtime > l->realtime)
477 return 1;
478 }
479
480 if (l->xor_hash_set) {
481
482 if (f->current_xor_hash < l->xor_hash)
483 return -1;
484 if (f->current_xor_hash > l->xor_hash)
485 return 1;
486 }
487
488 return 0;
489 }
490
491 static int next_for_match(
492 sd_journal *j,
493 Match *m,
494 JournalFile *f,
495 uint64_t after_offset,
496 direction_t direction,
497 Object **ret,
498 uint64_t *offset) {
499
500 int r;
501 uint64_t np = 0;
502 Object *n;
503
504 assert(j);
505 assert(m);
506 assert(f);
507
508 if (m->type == MATCH_DISCRETE) {
509 uint64_t dp;
510
511 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
512 if (r <= 0)
513 return r;
514
515 return journal_file_move_to_entry_by_offset_for_data(f, dp, after_offset, direction, ret, offset);
516
517 } else if (m->type == MATCH_OR_TERM) {
518 Match *i;
519
520 /* Find the earliest match beyond after_offset */
521
522 LIST_FOREACH(matches, i, m->matches) {
523 uint64_t cp;
524
525 r = next_for_match(j, i, f, after_offset, direction, NULL, &cp);
526 if (r < 0)
527 return r;
528 else if (r > 0) {
529 if (np == 0 || (direction == DIRECTION_DOWN ? cp < np : cp > np))
530 np = cp;
531 }
532 }
533
534 if (np == 0)
535 return 0;
536
537 } else if (m->type == MATCH_AND_TERM) {
538 Match *i, *last_moved;
539
540 /* Always jump to the next matching entry and repeat
541 * this until we find an offset that matches for all
542 * matches. */
543
544 if (!m->matches)
545 return 0;
546
547 r = next_for_match(j, m->matches, f, after_offset, direction, NULL, &np);
548 if (r <= 0)
549 return r;
550
551 assert(direction == DIRECTION_DOWN ? np >= after_offset : np <= after_offset);
552 last_moved = m->matches;
553
554 LIST_LOOP_BUT_ONE(matches, i, m->matches, last_moved) {
555 uint64_t cp;
556
557 r = next_for_match(j, i, f, np, direction, NULL, &cp);
558 if (r <= 0)
559 return r;
560
561 assert(direction == DIRECTION_DOWN ? cp >= np : cp <= np);
562 if (direction == DIRECTION_DOWN ? cp > np : cp < np) {
563 np = cp;
564 last_moved = i;
565 }
566 }
567 }
568
569 assert(np > 0);
570
571 r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
572 if (r < 0)
573 return r;
574
575 if (ret)
576 *ret = n;
577 if (offset)
578 *offset = np;
579
580 return 1;
581 }
582
583 static int find_location_for_match(
584 sd_journal *j,
585 Match *m,
586 JournalFile *f,
587 direction_t direction,
588 Object **ret,
589 uint64_t *offset) {
590
591 int r;
592
593 assert(j);
594 assert(m);
595 assert(f);
596
597 if (m->type == MATCH_DISCRETE) {
598 uint64_t dp;
599
600 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
601 if (r <= 0)
602 return r;
603
604 /* FIXME: missing: find by monotonic */
605
606 if (j->current_location.type == LOCATION_HEAD)
607 return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_DOWN, ret, offset);
608 if (j->current_location.type == LOCATION_TAIL)
609 return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_UP, ret, offset);
610 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
611 return journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, ret, offset);
612 if (j->current_location.monotonic_set) {
613 r = journal_file_move_to_entry_by_monotonic_for_data(f, dp, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
614 if (r != -ENOENT)
615 return r;
616 }
617 if (j->current_location.realtime_set)
618 return journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, ret, offset);
619
620 return journal_file_next_entry_for_data(f, NULL, 0, dp, direction, ret, offset);
621
622 } else if (m->type == MATCH_OR_TERM) {
623 uint64_t np = 0;
624 Object *n;
625 Match *i;
626
627 /* Find the earliest match */
628
629 LIST_FOREACH(matches, i, m->matches) {
630 uint64_t cp;
631
632 r = find_location_for_match(j, i, f, direction, NULL, &cp);
633 if (r < 0)
634 return r;
635 else if (r > 0) {
636 if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
637 np = cp;
638 }
639 }
640
641 if (np == 0)
642 return 0;
643
644 r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
645 if (r < 0)
646 return r;
647
648 if (ret)
649 *ret = n;
650 if (offset)
651 *offset = np;
652
653 return 1;
654
655 } else {
656 Match *i;
657 uint64_t np = 0;
658
659 assert(m->type == MATCH_AND_TERM);
660
661 /* First jump to the last match, and then find the
662 * next one where all matches match */
663
664 if (!m->matches)
665 return 0;
666
667 LIST_FOREACH(matches, i, m->matches) {
668 uint64_t cp;
669
670 r = find_location_for_match(j, i, f, direction, NULL, &cp);
671 if (r <= 0)
672 return r;
673
674 if (np == 0 || (direction == DIRECTION_DOWN ? cp > np : cp < np))
675 np = cp;
676 }
677
678 return next_for_match(j, m, f, np, direction, ret, offset);
679 }
680 }
681
682 static int find_location_with_matches(
683 sd_journal *j,
684 JournalFile *f,
685 direction_t direction,
686 Object **ret,
687 uint64_t *offset) {
688
689 int r;
690
691 assert(j);
692 assert(f);
693 assert(ret);
694 assert(offset);
695
696 if (!j->level0) {
697 /* No matches is simple */
698
699 if (j->current_location.type == LOCATION_HEAD)
700 return journal_file_next_entry(f, 0, DIRECTION_DOWN, ret, offset);
701 if (j->current_location.type == LOCATION_TAIL)
702 return journal_file_next_entry(f, 0, DIRECTION_UP, ret, offset);
703 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
704 return journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, ret, offset);
705 if (j->current_location.monotonic_set) {
706 r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
707 if (r != -ENOENT)
708 return r;
709 }
710 if (j->current_location.realtime_set)
711 return journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, ret, offset);
712
713 return journal_file_next_entry(f, 0, direction, ret, offset);
714 } else
715 return find_location_for_match(j, j->level0, f, direction, ret, offset);
716 }
717
718 static int next_with_matches(
719 sd_journal *j,
720 JournalFile *f,
721 direction_t direction,
722 Object **ret,
723 uint64_t *offset) {
724
725 assert(j);
726 assert(f);
727 assert(ret);
728 assert(offset);
729
730 /* No matches is easy. We simple advance the file
731 * pointer by one. */
732 if (!j->level0)
733 return journal_file_next_entry(f, f->current_offset, direction, ret, offset);
734
735 /* If we have a match then we look for the next matching entry
736 * with an offset at least one step larger */
737 return next_for_match(j, j->level0, f,
738 direction == DIRECTION_DOWN ? f->current_offset + 1
739 : f->current_offset - 1,
740 direction, ret, offset);
741 }
742
743 static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction) {
744 Object *c;
745 uint64_t cp, n_entries;
746 int r;
747
748 assert(j);
749 assert(f);
750
751 n_entries = le64toh(f->header->n_entries);
752
753 /* If we hit EOF before, we don't need to look into this file again
754 * unless direction changed or new entries appeared. */
755 if (f->last_direction == direction && f->location_type == LOCATION_TAIL &&
756 n_entries == f->last_n_entries)
757 return 0;
758
759 f->last_n_entries = n_entries;
760
761 if (f->last_direction == direction && f->current_offset > 0) {
762 /* LOCATION_SEEK here means we did the work in a previous
763 * iteration and the current location already points to a
764 * candidate entry. */
765 if (f->location_type != LOCATION_SEEK) {
766 r = next_with_matches(j, f, direction, &c, &cp);
767 if (r <= 0)
768 return r;
769
770 journal_file_save_location(f, c, cp);
771 }
772 } else {
773 f->last_direction = direction;
774
775 r = find_location_with_matches(j, f, direction, &c, &cp);
776 if (r <= 0)
777 return r;
778
779 journal_file_save_location(f, c, cp);
780 }
781
782 /* OK, we found the spot, now let's advance until an entry
783 * that is actually different from what we were previously
784 * looking at. This is necessary to handle entries which exist
785 * in two (or more) journal files, and which shall all be
786 * suppressed but one. */
787
788 for (;;) {
789 bool found;
790
791 if (j->current_location.type == LOCATION_DISCRETE) {
792 int k;
793
794 k = compare_with_location(f, &j->current_location);
795
796 found = direction == DIRECTION_DOWN ? k > 0 : k < 0;
797 } else
798 found = true;
799
800 if (found)
801 return 1;
802
803 r = next_with_matches(j, f, direction, &c, &cp);
804 if (r <= 0)
805 return r;
806
807 journal_file_save_location(f, c, cp);
808 }
809 }
810
811 static int real_journal_next(sd_journal *j, direction_t direction) {
812 JournalFile *new_file = NULL;
813 unsigned i, n_files;
814 const void **files;
815 Object *o;
816 int r;
817
818 assert_return(j, -EINVAL);
819 assert_return(!journal_pid_changed(j), -ECHILD);
820
821 r = iterated_cache_get(j->files_cache, NULL, &files, &n_files);
822 if (r < 0)
823 return r;
824
825 for (i = 0; i < n_files; i++) {
826 JournalFile *f = (JournalFile *)files[i];
827 bool found;
828
829 r = next_beyond_location(j, f, direction);
830 if (r < 0) {
831 log_debug_errno(r, "Can't iterate through %s, ignoring: %m", f->path);
832 remove_file_real(j, f);
833 continue;
834 } else if (r == 0) {
835 f->location_type = LOCATION_TAIL;
836 continue;
837 }
838
839 if (!new_file)
840 found = true;
841 else {
842 int k;
843
844 k = journal_file_compare_locations(f, new_file);
845
846 found = direction == DIRECTION_DOWN ? k < 0 : k > 0;
847 }
848
849 if (found)
850 new_file = f;
851 }
852
853 if (!new_file)
854 return 0;
855
856 r = journal_file_move_to_object(new_file, OBJECT_ENTRY, new_file->current_offset, &o);
857 if (r < 0)
858 return r;
859
860 set_location(j, new_file, o);
861
862 return 1;
863 }
864
865 _public_ int sd_journal_next(sd_journal *j) {
866 return real_journal_next(j, DIRECTION_DOWN);
867 }
868
869 _public_ int sd_journal_previous(sd_journal *j) {
870 return real_journal_next(j, DIRECTION_UP);
871 }
872
873 static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) {
874 int c = 0, r;
875
876 assert_return(j, -EINVAL);
877 assert_return(!journal_pid_changed(j), -ECHILD);
878
879 if (skip == 0) {
880 /* If this is not a discrete skip, then at least
881 * resolve the current location */
882 if (j->current_location.type != LOCATION_DISCRETE) {
883 r = real_journal_next(j, direction);
884 if (r < 0)
885 return r;
886 }
887
888 return 0;
889 }
890
891 do {
892 r = real_journal_next(j, direction);
893 if (r < 0)
894 return r;
895
896 if (r == 0)
897 return c;
898
899 skip--;
900 c++;
901 } while (skip > 0);
902
903 return c;
904 }
905
906 _public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) {
907 return real_journal_next_skip(j, DIRECTION_DOWN, skip);
908 }
909
910 _public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) {
911 return real_journal_next_skip(j, DIRECTION_UP, skip);
912 }
913
914 _public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) {
915 Object *o;
916 int r;
917 char bid[33], sid[33];
918
919 assert_return(j, -EINVAL);
920 assert_return(!journal_pid_changed(j), -ECHILD);
921 assert_return(cursor, -EINVAL);
922
923 if (!j->current_file || j->current_file->current_offset <= 0)
924 return -EADDRNOTAVAIL;
925
926 r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
927 if (r < 0)
928 return r;
929
930 sd_id128_to_string(j->current_file->header->seqnum_id, sid);
931 sd_id128_to_string(o->entry.boot_id, bid);
932
933 if (asprintf(cursor,
934 "s=%s;i=%"PRIx64";b=%s;m=%"PRIx64";t=%"PRIx64";x=%"PRIx64,
935 sid, le64toh(o->entry.seqnum),
936 bid, le64toh(o->entry.monotonic),
937 le64toh(o->entry.realtime),
938 le64toh(o->entry.xor_hash)) < 0)
939 return -ENOMEM;
940
941 return 0;
942 }
943
944 _public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
945 const char *word, *state;
946 size_t l;
947 unsigned long long seqnum, monotonic, realtime, xor_hash;
948 bool
949 seqnum_id_set = false,
950 seqnum_set = false,
951 boot_id_set = false,
952 monotonic_set = false,
953 realtime_set = false,
954 xor_hash_set = false;
955 sd_id128_t seqnum_id, boot_id;
956
957 assert_return(j, -EINVAL);
958 assert_return(!journal_pid_changed(j), -ECHILD);
959 assert_return(!isempty(cursor), -EINVAL);
960
961 FOREACH_WORD_SEPARATOR(word, l, cursor, ";", state) {
962 char *item;
963 int k = 0;
964
965 if (l < 2 || word[1] != '=')
966 return -EINVAL;
967
968 item = strndup(word, l);
969 if (!item)
970 return -ENOMEM;
971
972 switch (word[0]) {
973
974 case 's':
975 seqnum_id_set = true;
976 k = sd_id128_from_string(item+2, &seqnum_id);
977 break;
978
979 case 'i':
980 seqnum_set = true;
981 if (sscanf(item+2, "%llx", &seqnum) != 1)
982 k = -EINVAL;
983 break;
984
985 case 'b':
986 boot_id_set = true;
987 k = sd_id128_from_string(item+2, &boot_id);
988 break;
989
990 case 'm':
991 monotonic_set = true;
992 if (sscanf(item+2, "%llx", &monotonic) != 1)
993 k = -EINVAL;
994 break;
995
996 case 't':
997 realtime_set = true;
998 if (sscanf(item+2, "%llx", &realtime) != 1)
999 k = -EINVAL;
1000 break;
1001
1002 case 'x':
1003 xor_hash_set = true;
1004 if (sscanf(item+2, "%llx", &xor_hash) != 1)
1005 k = -EINVAL;
1006 break;
1007 }
1008
1009 free(item);
1010
1011 if (k < 0)
1012 return k;
1013 }
1014
1015 if ((!seqnum_set || !seqnum_id_set) &&
1016 (!monotonic_set || !boot_id_set) &&
1017 !realtime_set)
1018 return -EINVAL;
1019
1020 reset_location(j);
1021
1022 j->current_location.type = LOCATION_SEEK;
1023
1024 if (realtime_set) {
1025 j->current_location.realtime = (uint64_t) realtime;
1026 j->current_location.realtime_set = true;
1027 }
1028
1029 if (seqnum_set && seqnum_id_set) {
1030 j->current_location.seqnum = (uint64_t) seqnum;
1031 j->current_location.seqnum_id = seqnum_id;
1032 j->current_location.seqnum_set = true;
1033 }
1034
1035 if (monotonic_set && boot_id_set) {
1036 j->current_location.monotonic = (uint64_t) monotonic;
1037 j->current_location.boot_id = boot_id;
1038 j->current_location.monotonic_set = true;
1039 }
1040
1041 if (xor_hash_set) {
1042 j->current_location.xor_hash = (uint64_t) xor_hash;
1043 j->current_location.xor_hash_set = true;
1044 }
1045
1046 return 0;
1047 }
1048
1049 _public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) {
1050 int r;
1051 Object *o;
1052
1053 assert_return(j, -EINVAL);
1054 assert_return(!journal_pid_changed(j), -ECHILD);
1055 assert_return(!isempty(cursor), -EINVAL);
1056
1057 if (!j->current_file || j->current_file->current_offset <= 0)
1058 return -EADDRNOTAVAIL;
1059
1060 r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
1061 if (r < 0)
1062 return r;
1063
1064 for (;;) {
1065 _cleanup_free_ char *item = NULL;
1066 unsigned long long ll;
1067 sd_id128_t id;
1068 int k = 0;
1069
1070 r = extract_first_word(&cursor, &item, ";", EXTRACT_DONT_COALESCE_SEPARATORS);
1071 if (r < 0)
1072 return r;
1073
1074 if (r == 0)
1075 break;
1076
1077 if (strlen(item) < 2 || item[1] != '=')
1078 return -EINVAL;
1079
1080 switch (item[0]) {
1081
1082 case 's':
1083 k = sd_id128_from_string(item+2, &id);
1084 if (k < 0)
1085 return k;
1086 if (!sd_id128_equal(id, j->current_file->header->seqnum_id))
1087 return 0;
1088 break;
1089
1090 case 'i':
1091 if (sscanf(item+2, "%llx", &ll) != 1)
1092 return -EINVAL;
1093 if (ll != le64toh(o->entry.seqnum))
1094 return 0;
1095 break;
1096
1097 case 'b':
1098 k = sd_id128_from_string(item+2, &id);
1099 if (k < 0)
1100 return k;
1101 if (!sd_id128_equal(id, o->entry.boot_id))
1102 return 0;
1103 break;
1104
1105 case 'm':
1106 if (sscanf(item+2, "%llx", &ll) != 1)
1107 return -EINVAL;
1108 if (ll != le64toh(o->entry.monotonic))
1109 return 0;
1110 break;
1111
1112 case 't':
1113 if (sscanf(item+2, "%llx", &ll) != 1)
1114 return -EINVAL;
1115 if (ll != le64toh(o->entry.realtime))
1116 return 0;
1117 break;
1118
1119 case 'x':
1120 if (sscanf(item+2, "%llx", &ll) != 1)
1121 return -EINVAL;
1122 if (ll != le64toh(o->entry.xor_hash))
1123 return 0;
1124 break;
1125 }
1126 }
1127
1128 return 1;
1129 }
1130
1131 _public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) {
1132 assert_return(j, -EINVAL);
1133 assert_return(!journal_pid_changed(j), -ECHILD);
1134
1135 reset_location(j);
1136 j->current_location.type = LOCATION_SEEK;
1137 j->current_location.boot_id = boot_id;
1138 j->current_location.monotonic = usec;
1139 j->current_location.monotonic_set = true;
1140
1141 return 0;
1142 }
1143
1144 _public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
1145 assert_return(j, -EINVAL);
1146 assert_return(!journal_pid_changed(j), -ECHILD);
1147
1148 reset_location(j);
1149 j->current_location.type = LOCATION_SEEK;
1150 j->current_location.realtime = usec;
1151 j->current_location.realtime_set = true;
1152
1153 return 0;
1154 }
1155
1156 _public_ int sd_journal_seek_head(sd_journal *j) {
1157 assert_return(j, -EINVAL);
1158 assert_return(!journal_pid_changed(j), -ECHILD);
1159
1160 reset_location(j);
1161 j->current_location.type = LOCATION_HEAD;
1162
1163 return 0;
1164 }
1165
1166 _public_ int sd_journal_seek_tail(sd_journal *j) {
1167 assert_return(j, -EINVAL);
1168 assert_return(!journal_pid_changed(j), -ECHILD);
1169
1170 reset_location(j);
1171 j->current_location.type = LOCATION_TAIL;
1172
1173 return 0;
1174 }
1175
1176 static void check_network(sd_journal *j, int fd) {
1177 assert(j);
1178
1179 if (j->on_network)
1180 return;
1181
1182 j->on_network = fd_is_network_fs(fd);
1183 }
1184
1185 static bool file_has_type_prefix(const char *prefix, const char *filename) {
1186 const char *full, *tilded, *atted;
1187
1188 full = strjoina(prefix, ".journal");
1189 tilded = strjoina(full, "~");
1190 atted = strjoina(prefix, "@");
1191
1192 return streq(filename, full) ||
1193 streq(filename, tilded) ||
1194 startswith(filename, atted);
1195 }
1196
1197 static bool file_type_wanted(int flags, const char *filename) {
1198 assert(filename);
1199
1200 if (!endswith(filename, ".journal") && !endswith(filename, ".journal~"))
1201 return false;
1202
1203 /* no flags set → every type is OK */
1204 if (!(flags & (SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)))
1205 return true;
1206
1207 if (flags & SD_JOURNAL_SYSTEM && file_has_type_prefix("system", filename))
1208 return true;
1209
1210 if (flags & SD_JOURNAL_CURRENT_USER) {
1211 char prefix[5 + DECIMAL_STR_MAX(uid_t) + 1];
1212
1213 xsprintf(prefix, "user-"UID_FMT, getuid());
1214
1215 if (file_has_type_prefix(prefix, filename))
1216 return true;
1217 }
1218
1219 return false;
1220 }
1221
1222 static bool path_has_prefix(sd_journal *j, const char *path, const char *prefix) {
1223 assert(j);
1224 assert(path);
1225 assert(prefix);
1226
1227 if (j->toplevel_fd >= 0)
1228 return false;
1229
1230 return path_startswith(path, prefix);
1231 }
1232
1233 static void track_file_disposition(sd_journal *j, JournalFile *f) {
1234 assert(j);
1235 assert(f);
1236
1237 if (!j->has_runtime_files && path_has_prefix(j, f->path, "/run"))
1238 j->has_runtime_files = true;
1239 else if (!j->has_persistent_files && path_has_prefix(j, f->path, "/var"))
1240 j->has_persistent_files = true;
1241 }
1242
1243 static const char *skip_slash(const char *p) {
1244
1245 if (!p)
1246 return NULL;
1247
1248 while (*p == '/')
1249 p++;
1250
1251 return p;
1252 }
1253
1254 static int add_any_file(
1255 sd_journal *j,
1256 int fd,
1257 const char *path) {
1258
1259 bool close_fd = false;
1260 JournalFile *f;
1261 struct stat st;
1262 int r, k;
1263
1264 assert(j);
1265 assert(fd >= 0 || path);
1266
1267 if (fd < 0) {
1268 if (j->toplevel_fd >= 0)
1269 /* If there's a top-level fd defined make the path relative, explicitly, since otherwise
1270 * openat() ignores the first argument. */
1271
1272 fd = openat(j->toplevel_fd, skip_slash(path), O_RDONLY|O_CLOEXEC|O_NONBLOCK);
1273 else
1274 fd = open(path, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
1275 if (fd < 0) {
1276 r = log_debug_errno(errno, "Failed to open journal file %s: %m", path);
1277 goto finish;
1278 }
1279
1280 close_fd = true;
1281
1282 r = fd_nonblock(fd, false);
1283 if (r < 0) {
1284 r = log_debug_errno(errno, "Failed to turn off O_NONBLOCK for %s: %m", path);
1285 goto finish;
1286 }
1287 }
1288
1289 if (fstat(fd, &st) < 0) {
1290 r = log_debug_errno(errno, "Failed to fstat file '%s': %m", path);
1291 goto finish;
1292 }
1293
1294 r = stat_verify_regular(&st);
1295 if (r < 0) {
1296 log_debug_errno(r, "Refusing to open '%s', as it is not a regular file.", path);
1297 goto finish;
1298 }
1299
1300 f = ordered_hashmap_get(j->files, path);
1301 if (f) {
1302 if (f->last_stat.st_dev == st.st_dev &&
1303 f->last_stat.st_ino == st.st_ino) {
1304
1305 /* We already track this file, under the same path and with the same device/inode numbers, it's
1306 * hence really the same. Mark this file as seen in this generation. This is used to GC old
1307 * files in process_q_overflow() to detect journal files that are still there and discern them
1308 * from those which are gone. */
1309
1310 f->last_seen_generation = j->generation;
1311 r = 0;
1312 goto finish;
1313 }
1314
1315 /* So we tracked a file under this name, but it has a different inode/device. In that case, it got
1316 * replaced (probably due to rotation?), let's drop it hence from our list. */
1317 remove_file_real(j, f);
1318 f = NULL;
1319 }
1320
1321 if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
1322 log_debug("Too many open journal files, not adding %s.", path);
1323 r = -ETOOMANYREFS;
1324 goto finish;
1325 }
1326
1327 r = journal_file_open(fd, path, O_RDONLY, 0, false, 0, false, NULL, j->mmap, NULL, NULL, &f);
1328 if (r < 0) {
1329 log_debug_errno(r, "Failed to open journal file %s: %m", path);
1330 goto finish;
1331 }
1332
1333 /* journal_file_dump(f); */
1334
1335 r = ordered_hashmap_put(j->files, f->path, f);
1336 if (r < 0) {
1337 f->close_fd = false; /* make sure journal_file_close() doesn't close the caller's fd (or our own). We'll let the caller do that, or ourselves */
1338 (void) journal_file_close(f);
1339 goto finish;
1340 }
1341
1342 close_fd = false; /* the fd is now owned by the JournalFile object */
1343
1344 f->last_seen_generation = j->generation;
1345
1346 track_file_disposition(j, f);
1347 check_network(j, f->fd);
1348
1349 j->current_invalidate_counter++;
1350
1351 log_debug("File %s added.", f->path);
1352
1353 r = 0;
1354
1355 finish:
1356 if (close_fd)
1357 safe_close(fd);
1358
1359 if (r < 0) {
1360 k = journal_put_error(j, r, path);
1361 if (k < 0)
1362 return k;
1363 }
1364
1365 return r;
1366 }
1367
1368 static int add_file_by_name(
1369 sd_journal *j,
1370 const char *prefix,
1371 const char *filename) {
1372
1373 const char *path;
1374
1375 assert(j);
1376 assert(prefix);
1377 assert(filename);
1378
1379 if (j->no_new_files)
1380 return 0;
1381
1382 if (!file_type_wanted(j->flags, filename))
1383 return 0;
1384
1385 path = strjoina(prefix, "/", filename);
1386 return add_any_file(j, -1, path);
1387 }
1388
1389 static void remove_file_by_name(
1390 sd_journal *j,
1391 const char *prefix,
1392 const char *filename) {
1393
1394 const char *path;
1395 JournalFile *f;
1396
1397 assert(j);
1398 assert(prefix);
1399 assert(filename);
1400
1401 path = strjoina(prefix, "/", filename);
1402 f = ordered_hashmap_get(j->files, path);
1403 if (!f)
1404 return;
1405
1406 remove_file_real(j, f);
1407 }
1408
1409 static void remove_file_real(sd_journal *j, JournalFile *f) {
1410 assert(j);
1411 assert(f);
1412
1413 (void) ordered_hashmap_remove(j->files, f->path);
1414
1415 log_debug("File %s removed.", f->path);
1416
1417 if (j->current_file == f) {
1418 j->current_file = NULL;
1419 j->current_field = 0;
1420 }
1421
1422 if (j->unique_file == f) {
1423 /* Jump to the next unique_file or NULL if that one was last */
1424 j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
1425 j->unique_offset = 0;
1426 if (!j->unique_file)
1427 j->unique_file_lost = true;
1428 }
1429
1430 if (j->fields_file == f) {
1431 j->fields_file = ordered_hashmap_next(j->files, j->fields_file->path);
1432 j->fields_offset = 0;
1433 if (!j->fields_file)
1434 j->fields_file_lost = true;
1435 }
1436
1437 (void) journal_file_close(f);
1438
1439 j->current_invalidate_counter++;
1440 }
1441
1442 static int dirname_is_machine_id(const char *fn) {
1443 sd_id128_t id, machine;
1444 int r;
1445
1446 r = sd_id128_get_machine(&machine);
1447 if (r < 0)
1448 return r;
1449
1450 r = sd_id128_from_string(fn, &id);
1451 if (r < 0)
1452 return r;
1453
1454 return sd_id128_equal(id, machine);
1455 }
1456
1457 static bool dirent_is_journal_file(const struct dirent *de) {
1458 assert(de);
1459
1460 if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
1461 return false;
1462
1463 return endswith(de->d_name, ".journal") ||
1464 endswith(de->d_name, ".journal~");
1465 }
1466
1467 static bool dirent_is_id128_subdir(const struct dirent *de) {
1468 assert(de);
1469
1470 if (!IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN))
1471 return false;
1472
1473 return id128_is_valid(de->d_name);
1474 }
1475
1476 static int directory_open(sd_journal *j, const char *path, DIR **ret) {
1477 DIR *d;
1478
1479 assert(j);
1480 assert(path);
1481 assert(ret);
1482
1483 if (j->toplevel_fd < 0)
1484 d = opendir(path);
1485 else
1486 /* Open the specified directory relative to the toplevel fd. Enforce that the path specified is
1487 * relative, by dropping the initial slash */
1488 d = xopendirat(j->toplevel_fd, skip_slash(path), 0);
1489 if (!d)
1490 return -errno;
1491
1492 *ret = d;
1493 return 0;
1494 }
1495
1496 static int add_directory(sd_journal *j, const char *prefix, const char *dirname);
1497
1498 static void directory_enumerate(sd_journal *j, Directory *m, DIR *d) {
1499 struct dirent *de;
1500
1501 assert(j);
1502 assert(m);
1503 assert(d);
1504
1505 FOREACH_DIRENT_ALL(de, d, goto fail) {
1506
1507 if (dirent_is_journal_file(de))
1508 (void) add_file_by_name(j, m->path, de->d_name);
1509
1510 if (m->is_root && dirent_is_id128_subdir(de))
1511 (void) add_directory(j, m->path, de->d_name);
1512 }
1513
1514 return;
1515
1516 fail:
1517 log_debug_errno(errno, "Failed to enumerate directory %s, ignoring: %m", m->path);
1518 }
1519
1520 static void directory_watch(sd_journal *j, Directory *m, int fd, uint32_t mask) {
1521 int r;
1522
1523 assert(j);
1524 assert(m);
1525 assert(fd >= 0);
1526
1527 /* Watch this directory if that's enabled and if it not being watched yet. */
1528
1529 if (m->wd > 0) /* Already have a watch? */
1530 return;
1531 if (j->inotify_fd < 0) /* Not watching at all? */
1532 return;
1533
1534 m->wd = inotify_add_watch_fd(j->inotify_fd, fd, mask);
1535 if (m->wd < 0) {
1536 log_debug_errno(errno, "Failed to watch journal directory '%s', ignoring: %m", m->path);
1537 return;
1538 }
1539
1540 r = hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m);
1541 if (r == -EEXIST)
1542 log_debug_errno(r, "Directory '%s' already being watched under a different path, ignoring: %m", m->path);
1543 if (r < 0) {
1544 log_debug_errno(r, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m->path);
1545 (void) inotify_rm_watch(j->inotify_fd, m->wd);
1546 m->wd = -1;
1547 }
1548 }
1549
1550 static int add_directory(sd_journal *j, const char *prefix, const char *dirname) {
1551 _cleanup_free_ char *path = NULL;
1552 _cleanup_closedir_ DIR *d = NULL;
1553 Directory *m;
1554 int r, k;
1555
1556 assert(j);
1557 assert(prefix);
1558
1559 /* Adds a journal file directory to watch. If the directory is already tracked this updates the inotify watch
1560 * and reenumerates directory contents */
1561
1562 if (dirname)
1563 path = strjoin(prefix, "/", dirname);
1564 else
1565 path = strdup(prefix);
1566 if (!path) {
1567 r = -ENOMEM;
1568 goto fail;
1569 }
1570
1571 log_debug("Considering directory '%s'.", path);
1572
1573 /* We consider everything local that is in a directory for the local machine ID, or that is stored in /run */
1574 if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
1575 !((dirname && dirname_is_machine_id(dirname) > 0) || path_has_prefix(j, path, "/run")))
1576 return 0;
1577
1578 r = directory_open(j, path, &d);
1579 if (r < 0) {
1580 log_debug_errno(r, "Failed to open directory '%s': %m", path);
1581 goto fail;
1582 }
1583
1584 m = hashmap_get(j->directories_by_path, path);
1585 if (!m) {
1586 m = new0(Directory, 1);
1587 if (!m) {
1588 r = -ENOMEM;
1589 goto fail;
1590 }
1591
1592 m->is_root = false;
1593 m->path = path;
1594
1595 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1596 free(m);
1597 r = -ENOMEM;
1598 goto fail;
1599 }
1600
1601 path = NULL; /* avoid freeing in cleanup */
1602 j->current_invalidate_counter++;
1603
1604 log_debug("Directory %s added.", m->path);
1605
1606 } else if (m->is_root)
1607 return 0; /* Don't 'downgrade' from root directory */
1608
1609 m->last_seen_generation = j->generation;
1610
1611 directory_watch(j, m, dirfd(d),
1612 IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1613 IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
1614 IN_ONLYDIR);
1615
1616 if (!j->no_new_files)
1617 directory_enumerate(j, m, d);
1618
1619 check_network(j, dirfd(d));
1620
1621 return 0;
1622
1623 fail:
1624 k = journal_put_error(j, r, path ?: prefix);
1625 if (k < 0)
1626 return k;
1627
1628 return r;
1629 }
1630
1631 static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
1632
1633 _cleanup_closedir_ DIR *d = NULL;
1634 Directory *m;
1635 int r, k;
1636
1637 assert(j);
1638
1639 /* Adds a root directory to our set of directories to use. If the root directory is already in the set, we
1640 * update the inotify logic, and renumerate the directory entries. This call may hence be called to initially
1641 * populate the set, as well as to update it later. */
1642
1643 if (p) {
1644 /* If there's a path specified, use it. */
1645
1646 log_debug("Considering root directory '%s'.", p);
1647
1648 if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
1649 !path_has_prefix(j, p, "/run"))
1650 return -EINVAL;
1651
1652 if (j->prefix)
1653 p = strjoina(j->prefix, p);
1654
1655 r = directory_open(j, p, &d);
1656 if (r == -ENOENT && missing_ok)
1657 return 0;
1658 if (r < 0) {
1659 log_debug_errno(r, "Failed to open root directory %s: %m", p);
1660 goto fail;
1661 }
1662 } else {
1663 int dfd;
1664
1665 /* If there's no path specified, then we use the top-level fd itself. We duplicate the fd here, since
1666 * opendir() will take possession of the fd, and close it, which we don't want. */
1667
1668 p = "."; /* store this as "." in the directories hashmap */
1669
1670 dfd = fcntl(j->toplevel_fd, F_DUPFD_CLOEXEC, 3);
1671 if (dfd < 0) {
1672 r = -errno;
1673 goto fail;
1674 }
1675
1676 d = fdopendir(dfd);
1677 if (!d) {
1678 r = -errno;
1679 safe_close(dfd);
1680 goto fail;
1681 }
1682
1683 rewinddir(d);
1684 }
1685
1686 m = hashmap_get(j->directories_by_path, p);
1687 if (!m) {
1688 m = new0(Directory, 1);
1689 if (!m) {
1690 r = -ENOMEM;
1691 goto fail;
1692 }
1693
1694 m->is_root = true;
1695
1696 m->path = strdup(p);
1697 if (!m->path) {
1698 free(m);
1699 r = -ENOMEM;
1700 goto fail;
1701 }
1702
1703 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1704 free(m->path);
1705 free(m);
1706 r = -ENOMEM;
1707 goto fail;
1708 }
1709
1710 j->current_invalidate_counter++;
1711
1712 log_debug("Root directory %s added.", m->path);
1713
1714 } else if (!m->is_root)
1715 return 0;
1716
1717 directory_watch(j, m, dirfd(d),
1718 IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1719 IN_ONLYDIR);
1720
1721 if (!j->no_new_files)
1722 directory_enumerate(j, m, d);
1723
1724 check_network(j, dirfd(d));
1725
1726 return 0;
1727
1728 fail:
1729 k = journal_put_error(j, r, p);
1730 if (k < 0)
1731 return k;
1732
1733 return r;
1734 }
1735
1736 static void remove_directory(sd_journal *j, Directory *d) {
1737 assert(j);
1738
1739 if (d->wd > 0) {
1740 hashmap_remove(j->directories_by_wd, INT_TO_PTR(d->wd));
1741
1742 if (j->inotify_fd >= 0)
1743 inotify_rm_watch(j->inotify_fd, d->wd);
1744 }
1745
1746 hashmap_remove(j->directories_by_path, d->path);
1747
1748 if (d->is_root)
1749 log_debug("Root directory %s removed.", d->path);
1750 else
1751 log_debug("Directory %s removed.", d->path);
1752
1753 free(d->path);
1754 free(d);
1755 }
1756
1757 static int add_search_paths(sd_journal *j) {
1758
1759 static const char search_paths[] =
1760 "/run/log/journal\0"
1761 "/var/log/journal\0";
1762 const char *p;
1763
1764 assert(j);
1765
1766 /* We ignore most errors here, since the idea is to only open
1767 * what's actually accessible, and ignore the rest. */
1768
1769 NULSTR_FOREACH(p, search_paths)
1770 (void) add_root_directory(j, p, true);
1771
1772 if (!(j->flags & SD_JOURNAL_LOCAL_ONLY))
1773 (void) add_root_directory(j, "/var/log/journal/remote", true);
1774
1775 return 0;
1776 }
1777
1778 static int add_current_paths(sd_journal *j) {
1779 Iterator i;
1780 JournalFile *f;
1781
1782 assert(j);
1783 assert(j->no_new_files);
1784
1785 /* Simply adds all directories for files we have open as directories. We don't expect errors here, so we
1786 * treat them as fatal. */
1787
1788 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
1789 _cleanup_free_ char *dir;
1790 int r;
1791
1792 dir = dirname_malloc(f->path);
1793 if (!dir)
1794 return -ENOMEM;
1795
1796 r = add_directory(j, dir, NULL);
1797 if (r < 0)
1798 return r;
1799 }
1800
1801 return 0;
1802 }
1803
1804 static int allocate_inotify(sd_journal *j) {
1805 assert(j);
1806
1807 if (j->inotify_fd < 0) {
1808 j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1809 if (j->inotify_fd < 0)
1810 return -errno;
1811 }
1812
1813 return hashmap_ensure_allocated(&j->directories_by_wd, NULL);
1814 }
1815
1816 static sd_journal *journal_new(int flags, const char *path) {
1817 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
1818
1819 j = new0(sd_journal, 1);
1820 if (!j)
1821 return NULL;
1822
1823 j->original_pid = getpid_cached();
1824 j->toplevel_fd = -1;
1825 j->inotify_fd = -1;
1826 j->flags = flags;
1827 j->data_threshold = DEFAULT_DATA_THRESHOLD;
1828
1829 if (path) {
1830 char *t;
1831
1832 t = strdup(path);
1833 if (!t)
1834 return NULL;
1835
1836 if (flags & SD_JOURNAL_OS_ROOT)
1837 j->prefix = t;
1838 else
1839 j->path = t;
1840 }
1841
1842 j->files = ordered_hashmap_new(&path_hash_ops);
1843 if (!j->files)
1844 return NULL;
1845
1846 j->files_cache = ordered_hashmap_iterated_cache_new(j->files);
1847 j->directories_by_path = hashmap_new(&path_hash_ops);
1848 j->mmap = mmap_cache_new();
1849 if (!j->files_cache || !j->directories_by_path || !j->mmap)
1850 return NULL;
1851
1852 return TAKE_PTR(j);
1853 }
1854
1855 #define OPEN_ALLOWED_FLAGS \
1856 (SD_JOURNAL_LOCAL_ONLY | \
1857 SD_JOURNAL_RUNTIME_ONLY | \
1858 SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)
1859
1860 _public_ int sd_journal_open(sd_journal **ret, int flags) {
1861 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
1862 int r;
1863
1864 assert_return(ret, -EINVAL);
1865 assert_return((flags & ~OPEN_ALLOWED_FLAGS) == 0, -EINVAL);
1866
1867 j = journal_new(flags, NULL);
1868 if (!j)
1869 return -ENOMEM;
1870
1871 r = add_search_paths(j);
1872 if (r < 0)
1873 return r;
1874
1875 *ret = TAKE_PTR(j);
1876 return 0;
1877 }
1878
1879 #define OPEN_CONTAINER_ALLOWED_FLAGS \
1880 (SD_JOURNAL_LOCAL_ONLY | SD_JOURNAL_SYSTEM)
1881
1882 _public_ int sd_journal_open_container(sd_journal **ret, const char *machine, int flags) {
1883 _cleanup_free_ char *root = NULL, *class = NULL;
1884 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
1885 char *p;
1886 int r;
1887
1888 /* This is pretty much deprecated, people should use machined's OpenMachineRootDirectory() call instead in
1889 * combination with sd_journal_open_directory_fd(). */
1890
1891 assert_return(machine, -EINVAL);
1892 assert_return(ret, -EINVAL);
1893 assert_return((flags & ~OPEN_CONTAINER_ALLOWED_FLAGS) == 0, -EINVAL);
1894 assert_return(machine_name_is_valid(machine), -EINVAL);
1895
1896 p = strjoina("/run/systemd/machines/", machine);
1897 r = parse_env_file(p, NEWLINE, "ROOT", &root, "CLASS", &class, NULL);
1898 if (r == -ENOENT)
1899 return -EHOSTDOWN;
1900 if (r < 0)
1901 return r;
1902 if (!root)
1903 return -ENODATA;
1904
1905 if (!streq_ptr(class, "container"))
1906 return -EIO;
1907
1908 j = journal_new(flags, root);
1909 if (!j)
1910 return -ENOMEM;
1911
1912 r = add_search_paths(j);
1913 if (r < 0)
1914 return r;
1915
1916 *ret = TAKE_PTR(j);
1917 return 0;
1918 }
1919
1920 #define OPEN_DIRECTORY_ALLOWED_FLAGS \
1921 (SD_JOURNAL_OS_ROOT | \
1922 SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER )
1923
1924 _public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) {
1925 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
1926 int r;
1927
1928 assert_return(ret, -EINVAL);
1929 assert_return(path, -EINVAL);
1930 assert_return((flags & ~OPEN_DIRECTORY_ALLOWED_FLAGS) == 0, -EINVAL);
1931
1932 j = journal_new(flags, path);
1933 if (!j)
1934 return -ENOMEM;
1935
1936 if (flags & SD_JOURNAL_OS_ROOT)
1937 r = add_search_paths(j);
1938 else
1939 r = add_root_directory(j, path, false);
1940 if (r < 0)
1941 return r;
1942
1943 *ret = TAKE_PTR(j);
1944 return 0;
1945 }
1946
1947 _public_ int sd_journal_open_files(sd_journal **ret, const char **paths, int flags) {
1948 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
1949 const char **path;
1950 int r;
1951
1952 assert_return(ret, -EINVAL);
1953 assert_return(flags == 0, -EINVAL);
1954
1955 j = journal_new(flags, NULL);
1956 if (!j)
1957 return -ENOMEM;
1958
1959 STRV_FOREACH(path, paths) {
1960 r = add_any_file(j, -1, *path);
1961 if (r < 0)
1962 return r;
1963 }
1964
1965 j->no_new_files = true;
1966
1967 *ret = TAKE_PTR(j);
1968 return 0;
1969 }
1970
1971 #define OPEN_DIRECTORY_FD_ALLOWED_FLAGS \
1972 (SD_JOURNAL_OS_ROOT | \
1973 SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER )
1974
1975 _public_ int sd_journal_open_directory_fd(sd_journal **ret, int fd, int flags) {
1976 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
1977 struct stat st;
1978 int r;
1979
1980 assert_return(ret, -EINVAL);
1981 assert_return(fd >= 0, -EBADF);
1982 assert_return((flags & ~OPEN_DIRECTORY_FD_ALLOWED_FLAGS) == 0, -EINVAL);
1983
1984 if (fstat(fd, &st) < 0)
1985 return -errno;
1986
1987 if (!S_ISDIR(st.st_mode))
1988 return -EBADFD;
1989
1990 j = journal_new(flags, NULL);
1991 if (!j)
1992 return -ENOMEM;
1993
1994 j->toplevel_fd = fd;
1995
1996 if (flags & SD_JOURNAL_OS_ROOT)
1997 r = add_search_paths(j);
1998 else
1999 r = add_root_directory(j, NULL, false);
2000 if (r < 0)
2001 return r;
2002
2003 *ret = TAKE_PTR(j);
2004 return 0;
2005 }
2006
2007 _public_ int sd_journal_open_files_fd(sd_journal **ret, int fds[], unsigned n_fds, int flags) {
2008 Iterator iterator;
2009 JournalFile *f;
2010 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
2011 unsigned i;
2012 int r;
2013
2014 assert_return(ret, -EINVAL);
2015 assert_return(n_fds > 0, -EBADF);
2016 assert_return(flags == 0, -EINVAL);
2017
2018 j = journal_new(flags, NULL);
2019 if (!j)
2020 return -ENOMEM;
2021
2022 for (i = 0; i < n_fds; i++) {
2023 struct stat st;
2024
2025 if (fds[i] < 0) {
2026 r = -EBADF;
2027 goto fail;
2028 }
2029
2030 if (fstat(fds[i], &st) < 0) {
2031 r = -errno;
2032 goto fail;
2033 }
2034
2035 r = stat_verify_regular(&st);
2036 if (r < 0)
2037 goto fail;
2038
2039 r = add_any_file(j, fds[i], NULL);
2040 if (r < 0)
2041 goto fail;
2042 }
2043
2044 j->no_new_files = true;
2045 j->no_inotify = true;
2046
2047 *ret = TAKE_PTR(j);
2048 return 0;
2049
2050 fail:
2051 /* If we fail, make sure we don't take possession of the files we managed to make use of successfully, and they
2052 * remain open */
2053 ORDERED_HASHMAP_FOREACH(f, j->files, iterator)
2054 f->close_fd = false;
2055
2056 return r;
2057 }
2058
2059 _public_ void sd_journal_close(sd_journal *j) {
2060 Directory *d;
2061
2062 if (!j)
2063 return;
2064
2065 sd_journal_flush_matches(j);
2066
2067 ordered_hashmap_free_with_destructor(j->files, journal_file_close);
2068 iterated_cache_free(j->files_cache);
2069
2070 while ((d = hashmap_first(j->directories_by_path)))
2071 remove_directory(j, d);
2072
2073 while ((d = hashmap_first(j->directories_by_wd)))
2074 remove_directory(j, d);
2075
2076 hashmap_free(j->directories_by_path);
2077 hashmap_free(j->directories_by_wd);
2078
2079 safe_close(j->inotify_fd);
2080
2081 if (j->mmap) {
2082 log_debug("mmap cache statistics: %u hit, %u miss", mmap_cache_get_hit(j->mmap), mmap_cache_get_missed(j->mmap));
2083 mmap_cache_unref(j->mmap);
2084 }
2085
2086 hashmap_free_free(j->errors);
2087
2088 free(j->path);
2089 free(j->prefix);
2090 free(j->unique_field);
2091 free(j->fields_buffer);
2092 free(j);
2093 }
2094
2095 _public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) {
2096 Object *o;
2097 JournalFile *f;
2098 int r;
2099
2100 assert_return(j, -EINVAL);
2101 assert_return(!journal_pid_changed(j), -ECHILD);
2102 assert_return(ret, -EINVAL);
2103
2104 f = j->current_file;
2105 if (!f)
2106 return -EADDRNOTAVAIL;
2107
2108 if (f->current_offset <= 0)
2109 return -EADDRNOTAVAIL;
2110
2111 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2112 if (r < 0)
2113 return r;
2114
2115 *ret = le64toh(o->entry.realtime);
2116 return 0;
2117 }
2118
2119 _public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) {
2120 Object *o;
2121 JournalFile *f;
2122 int r;
2123 sd_id128_t id;
2124
2125 assert_return(j, -EINVAL);
2126 assert_return(!journal_pid_changed(j), -ECHILD);
2127
2128 f = j->current_file;
2129 if (!f)
2130 return -EADDRNOTAVAIL;
2131
2132 if (f->current_offset <= 0)
2133 return -EADDRNOTAVAIL;
2134
2135 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2136 if (r < 0)
2137 return r;
2138
2139 if (ret_boot_id)
2140 *ret_boot_id = o->entry.boot_id;
2141 else {
2142 r = sd_id128_get_boot(&id);
2143 if (r < 0)
2144 return r;
2145
2146 if (!sd_id128_equal(id, o->entry.boot_id))
2147 return -ESTALE;
2148 }
2149
2150 if (ret)
2151 *ret = le64toh(o->entry.monotonic);
2152
2153 return 0;
2154 }
2155
2156 static bool field_is_valid(const char *field) {
2157 const char *p;
2158
2159 assert(field);
2160
2161 if (isempty(field))
2162 return false;
2163
2164 if (startswith(field, "__"))
2165 return false;
2166
2167 for (p = field; *p; p++) {
2168
2169 if (*p == '_')
2170 continue;
2171
2172 if (*p >= 'A' && *p <= 'Z')
2173 continue;
2174
2175 if (*p >= '0' && *p <= '9')
2176 continue;
2177
2178 return false;
2179 }
2180
2181 return true;
2182 }
2183
2184 _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) {
2185 JournalFile *f;
2186 uint64_t i, n;
2187 size_t field_length;
2188 int r;
2189 Object *o;
2190
2191 assert_return(j, -EINVAL);
2192 assert_return(!journal_pid_changed(j), -ECHILD);
2193 assert_return(field, -EINVAL);
2194 assert_return(data, -EINVAL);
2195 assert_return(size, -EINVAL);
2196 assert_return(field_is_valid(field), -EINVAL);
2197
2198 f = j->current_file;
2199 if (!f)
2200 return -EADDRNOTAVAIL;
2201
2202 if (f->current_offset <= 0)
2203 return -EADDRNOTAVAIL;
2204
2205 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2206 if (r < 0)
2207 return r;
2208
2209 field_length = strlen(field);
2210
2211 n = journal_file_entry_n_items(o);
2212 for (i = 0; i < n; i++) {
2213 uint64_t p, l;
2214 le64_t le_hash;
2215 size_t t;
2216 int compression;
2217
2218 p = le64toh(o->entry.items[i].object_offset);
2219 le_hash = o->entry.items[i].hash;
2220 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2221 if (r < 0)
2222 return r;
2223
2224 if (le_hash != o->data.hash)
2225 return -EBADMSG;
2226
2227 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2228
2229 compression = o->object.flags & OBJECT_COMPRESSION_MASK;
2230 if (compression) {
2231 #if HAVE_XZ || HAVE_LZ4
2232 r = decompress_startswith(compression,
2233 o->data.payload, l,
2234 &f->compress_buffer, &f->compress_buffer_size,
2235 field, field_length, '=');
2236 if (r < 0)
2237 log_debug_errno(r, "Cannot decompress %s object of length %"PRIu64" at offset "OFSfmt": %m",
2238 object_compressed_to_string(compression), l, p);
2239 else if (r > 0) {
2240
2241 size_t rsize;
2242
2243 r = decompress_blob(compression,
2244 o->data.payload, l,
2245 &f->compress_buffer, &f->compress_buffer_size, &rsize,
2246 j->data_threshold);
2247 if (r < 0)
2248 return r;
2249
2250 *data = f->compress_buffer;
2251 *size = (size_t) rsize;
2252
2253 return 0;
2254 }
2255 #else
2256 return -EPROTONOSUPPORT;
2257 #endif
2258 } else if (l >= field_length+1 &&
2259 memcmp(o->data.payload, field, field_length) == 0 &&
2260 o->data.payload[field_length] == '=') {
2261
2262 t = (size_t) l;
2263
2264 if ((uint64_t) t != l)
2265 return -E2BIG;
2266
2267 *data = o->data.payload;
2268 *size = t;
2269
2270 return 0;
2271 }
2272
2273 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2274 if (r < 0)
2275 return r;
2276 }
2277
2278 return -ENOENT;
2279 }
2280
2281 static int return_data(sd_journal *j, JournalFile *f, Object *o, const void **data, size_t *size) {
2282 size_t t;
2283 uint64_t l;
2284 int compression;
2285
2286 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2287 t = (size_t) l;
2288
2289 /* We can't read objects larger than 4G on a 32bit machine */
2290 if ((uint64_t) t != l)
2291 return -E2BIG;
2292
2293 compression = o->object.flags & OBJECT_COMPRESSION_MASK;
2294 if (compression) {
2295 #if HAVE_XZ || HAVE_LZ4
2296 size_t rsize;
2297 int r;
2298
2299 r = decompress_blob(compression,
2300 o->data.payload, l, &f->compress_buffer,
2301 &f->compress_buffer_size, &rsize, j->data_threshold);
2302 if (r < 0)
2303 return r;
2304
2305 *data = f->compress_buffer;
2306 *size = (size_t) rsize;
2307 #else
2308 return -EPROTONOSUPPORT;
2309 #endif
2310 } else {
2311 *data = o->data.payload;
2312 *size = t;
2313 }
2314
2315 return 0;
2316 }
2317
2318 _public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) {
2319 JournalFile *f;
2320 uint64_t p, n;
2321 le64_t le_hash;
2322 int r;
2323 Object *o;
2324
2325 assert_return(j, -EINVAL);
2326 assert_return(!journal_pid_changed(j), -ECHILD);
2327 assert_return(data, -EINVAL);
2328 assert_return(size, -EINVAL);
2329
2330 f = j->current_file;
2331 if (!f)
2332 return -EADDRNOTAVAIL;
2333
2334 if (f->current_offset <= 0)
2335 return -EADDRNOTAVAIL;
2336
2337 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2338 if (r < 0)
2339 return r;
2340
2341 n = journal_file_entry_n_items(o);
2342 if (j->current_field >= n)
2343 return 0;
2344
2345 p = le64toh(o->entry.items[j->current_field].object_offset);
2346 le_hash = o->entry.items[j->current_field].hash;
2347 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2348 if (r < 0)
2349 return r;
2350
2351 if (le_hash != o->data.hash)
2352 return -EBADMSG;
2353
2354 r = return_data(j, f, o, data, size);
2355 if (r < 0)
2356 return r;
2357
2358 j->current_field++;
2359
2360 return 1;
2361 }
2362
2363 _public_ void sd_journal_restart_data(sd_journal *j) {
2364 if (!j)
2365 return;
2366
2367 j->current_field = 0;
2368 }
2369
2370 static int reiterate_all_paths(sd_journal *j) {
2371 assert(j);
2372
2373 if (j->no_new_files)
2374 return add_current_paths(j);
2375
2376 if (j->flags & SD_JOURNAL_OS_ROOT)
2377 return add_search_paths(j);
2378
2379 if (j->toplevel_fd >= 0)
2380 return add_root_directory(j, NULL, false);
2381
2382 if (j->path)
2383 return add_root_directory(j, j->path, true);
2384
2385 return add_search_paths(j);
2386 }
2387
2388 _public_ int sd_journal_get_fd(sd_journal *j) {
2389 int r;
2390
2391 assert_return(j, -EINVAL);
2392 assert_return(!journal_pid_changed(j), -ECHILD);
2393
2394 if (j->no_inotify)
2395 return -EMEDIUMTYPE;
2396
2397 if (j->inotify_fd >= 0)
2398 return j->inotify_fd;
2399
2400 r = allocate_inotify(j);
2401 if (r < 0)
2402 return r;
2403
2404 log_debug("Reiterating files to get inotify watches established.");
2405
2406 /* Iterate through all dirs again, to add them to the inotify */
2407 r = reiterate_all_paths(j);
2408 if (r < 0)
2409 return r;
2410
2411 return j->inotify_fd;
2412 }
2413
2414 _public_ int sd_journal_get_events(sd_journal *j) {
2415 int fd;
2416
2417 assert_return(j, -EINVAL);
2418 assert_return(!journal_pid_changed(j), -ECHILD);
2419
2420 fd = sd_journal_get_fd(j);
2421 if (fd < 0)
2422 return fd;
2423
2424 return POLLIN;
2425 }
2426
2427 _public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) {
2428 int fd;
2429
2430 assert_return(j, -EINVAL);
2431 assert_return(!journal_pid_changed(j), -ECHILD);
2432 assert_return(timeout_usec, -EINVAL);
2433
2434 fd = sd_journal_get_fd(j);
2435 if (fd < 0)
2436 return fd;
2437
2438 if (!j->on_network) {
2439 *timeout_usec = (uint64_t) -1;
2440 return 0;
2441 }
2442
2443 /* If we are on the network we need to regularly check for
2444 * changes manually */
2445
2446 *timeout_usec = j->last_process_usec + JOURNAL_FILES_RECHECK_USEC;
2447 return 1;
2448 }
2449
2450 static void process_q_overflow(sd_journal *j) {
2451 JournalFile *f;
2452 Directory *m;
2453 Iterator i;
2454
2455 assert(j);
2456
2457 /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list
2458 * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all
2459 * journal files we encounter. All journal files and all directories that don't carry it after reenumeration
2460 * are subject for unloading. */
2461
2462 log_debug("Inotify queue overrun, reiterating everything.");
2463
2464 j->generation++;
2465 (void) reiterate_all_paths(j);
2466
2467 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2468
2469 if (f->last_seen_generation == j->generation)
2470 continue;
2471
2472 log_debug("File '%s' hasn't been seen in this enumeration, removing.", f->path);
2473 remove_file_real(j, f);
2474 }
2475
2476 HASHMAP_FOREACH(m, j->directories_by_path, i) {
2477
2478 if (m->last_seen_generation == j->generation)
2479 continue;
2480
2481 if (m->is_root) /* Never GC root directories */
2482 continue;
2483
2484 log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f->path);
2485 remove_directory(j, m);
2486 }
2487
2488 log_debug("Reiteration complete.");
2489 }
2490
2491 static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
2492 Directory *d;
2493
2494 assert(j);
2495 assert(e);
2496
2497 if (e->mask & IN_Q_OVERFLOW) {
2498 process_q_overflow(j);
2499 return;
2500 }
2501
2502 /* Is this a subdirectory we watch? */
2503 d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
2504 if (d) {
2505 if (!(e->mask & IN_ISDIR) && e->len > 0 &&
2506 (endswith(e->name, ".journal") ||
2507 endswith(e->name, ".journal~"))) {
2508
2509 /* Event for a journal file */
2510
2511 if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB))
2512 (void) add_file_by_name(j, d->path, e->name);
2513 else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT))
2514 remove_file_by_name(j, d->path, e->name);
2515
2516 } else if (!d->is_root && e->len == 0) {
2517
2518 /* Event for a subdirectory */
2519
2520 if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT))
2521 remove_directory(j, d);
2522
2523 } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && id128_is_valid(e->name)) {
2524
2525 /* Event for root directory */
2526
2527 if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB))
2528 (void) add_directory(j, d->path, e->name);
2529 }
2530
2531 return;
2532 }
2533
2534 if (e->mask & IN_IGNORED)
2535 return;
2536
2537 log_debug("Unexpected inotify event.");
2538 }
2539
2540 static int determine_change(sd_journal *j) {
2541 bool b;
2542
2543 assert(j);
2544
2545 b = j->current_invalidate_counter != j->last_invalidate_counter;
2546 j->last_invalidate_counter = j->current_invalidate_counter;
2547
2548 return b ? SD_JOURNAL_INVALIDATE : SD_JOURNAL_APPEND;
2549 }
2550
2551 _public_ int sd_journal_process(sd_journal *j) {
2552 bool got_something = false;
2553
2554 assert_return(j, -EINVAL);
2555 assert_return(!journal_pid_changed(j), -ECHILD);
2556
2557 if (j->inotify_fd < 0) /* We have no inotify fd yet? Then there's noting to process. */
2558 return 0;
2559
2560 j->last_process_usec = now(CLOCK_MONOTONIC);
2561 j->last_invalidate_counter = j->current_invalidate_counter;
2562
2563 for (;;) {
2564 union inotify_event_buffer buffer;
2565 struct inotify_event *e;
2566 ssize_t l;
2567
2568 l = read(j->inotify_fd, &buffer, sizeof(buffer));
2569 if (l < 0) {
2570 if (IN_SET(errno, EAGAIN, EINTR))
2571 return got_something ? determine_change(j) : SD_JOURNAL_NOP;
2572
2573 return -errno;
2574 }
2575
2576 got_something = true;
2577
2578 FOREACH_INOTIFY_EVENT(e, buffer, l)
2579 process_inotify_event(j, e);
2580 }
2581 }
2582
2583 _public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) {
2584 int r;
2585 uint64_t t;
2586
2587 assert_return(j, -EINVAL);
2588 assert_return(!journal_pid_changed(j), -ECHILD);
2589
2590 if (j->inotify_fd < 0) {
2591
2592 /* This is the first invocation, hence create the
2593 * inotify watch */
2594 r = sd_journal_get_fd(j);
2595 if (r < 0)
2596 return r;
2597
2598 /* The journal might have changed since the context
2599 * object was created and we weren't watching before,
2600 * hence don't wait for anything, and return
2601 * immediately. */
2602 return determine_change(j);
2603 }
2604
2605 r = sd_journal_get_timeout(j, &t);
2606 if (r < 0)
2607 return r;
2608
2609 if (t != (uint64_t) -1) {
2610 usec_t n;
2611
2612 n = now(CLOCK_MONOTONIC);
2613 t = t > n ? t - n : 0;
2614
2615 if (timeout_usec == (uint64_t) -1 || timeout_usec > t)
2616 timeout_usec = t;
2617 }
2618
2619 do {
2620 r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec);
2621 } while (r == -EINTR);
2622
2623 if (r < 0)
2624 return r;
2625
2626 return sd_journal_process(j);
2627 }
2628
2629 _public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) {
2630 Iterator i;
2631 JournalFile *f;
2632 bool first = true;
2633 uint64_t fmin = 0, tmax = 0;
2634 int r;
2635
2636 assert_return(j, -EINVAL);
2637 assert_return(!journal_pid_changed(j), -ECHILD);
2638 assert_return(from || to, -EINVAL);
2639 assert_return(from != to, -EINVAL);
2640
2641 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2642 usec_t fr, t;
2643
2644 r = journal_file_get_cutoff_realtime_usec(f, &fr, &t);
2645 if (r == -ENOENT)
2646 continue;
2647 if (r < 0)
2648 return r;
2649 if (r == 0)
2650 continue;
2651
2652 if (first) {
2653 fmin = fr;
2654 tmax = t;
2655 first = false;
2656 } else {
2657 fmin = MIN(fr, fmin);
2658 tmax = MAX(t, tmax);
2659 }
2660 }
2661
2662 if (from)
2663 *from = fmin;
2664 if (to)
2665 *to = tmax;
2666
2667 return first ? 0 : 1;
2668 }
2669
2670 _public_ int sd_journal_get_cutoff_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t *from, uint64_t *to) {
2671 Iterator i;
2672 JournalFile *f;
2673 bool found = false;
2674 int r;
2675
2676 assert_return(j, -EINVAL);
2677 assert_return(!journal_pid_changed(j), -ECHILD);
2678 assert_return(from || to, -EINVAL);
2679 assert_return(from != to, -EINVAL);
2680
2681 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2682 usec_t fr, t;
2683
2684 r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &fr, &t);
2685 if (r == -ENOENT)
2686 continue;
2687 if (r < 0)
2688 return r;
2689 if (r == 0)
2690 continue;
2691
2692 if (found) {
2693 if (from)
2694 *from = MIN(fr, *from);
2695 if (to)
2696 *to = MAX(t, *to);
2697 } else {
2698 if (from)
2699 *from = fr;
2700 if (to)
2701 *to = t;
2702 found = true;
2703 }
2704 }
2705
2706 return found;
2707 }
2708
2709 void journal_print_header(sd_journal *j) {
2710 Iterator i;
2711 JournalFile *f;
2712 bool newline = false;
2713
2714 assert(j);
2715
2716 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2717 if (newline)
2718 putchar('\n');
2719 else
2720 newline = true;
2721
2722 journal_file_print_header(f);
2723 }
2724 }
2725
2726 _public_ int sd_journal_get_usage(sd_journal *j, uint64_t *bytes) {
2727 Iterator i;
2728 JournalFile *f;
2729 uint64_t sum = 0;
2730
2731 assert_return(j, -EINVAL);
2732 assert_return(!journal_pid_changed(j), -ECHILD);
2733 assert_return(bytes, -EINVAL);
2734
2735 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2736 struct stat st;
2737
2738 if (fstat(f->fd, &st) < 0)
2739 return -errno;
2740
2741 sum += (uint64_t) st.st_blocks * 512ULL;
2742 }
2743
2744 *bytes = sum;
2745 return 0;
2746 }
2747
2748 _public_ int sd_journal_query_unique(sd_journal *j, const char *field) {
2749 char *f;
2750
2751 assert_return(j, -EINVAL);
2752 assert_return(!journal_pid_changed(j), -ECHILD);
2753 assert_return(!isempty(field), -EINVAL);
2754 assert_return(field_is_valid(field), -EINVAL);
2755
2756 f = strdup(field);
2757 if (!f)
2758 return -ENOMEM;
2759
2760 free(j->unique_field);
2761 j->unique_field = f;
2762 j->unique_file = NULL;
2763 j->unique_offset = 0;
2764 j->unique_file_lost = false;
2765
2766 return 0;
2767 }
2768
2769 _public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l) {
2770 size_t k;
2771
2772 assert_return(j, -EINVAL);
2773 assert_return(!journal_pid_changed(j), -ECHILD);
2774 assert_return(data, -EINVAL);
2775 assert_return(l, -EINVAL);
2776 assert_return(j->unique_field, -EINVAL);
2777
2778 k = strlen(j->unique_field);
2779
2780 if (!j->unique_file) {
2781 if (j->unique_file_lost)
2782 return 0;
2783
2784 j->unique_file = ordered_hashmap_first(j->files);
2785 if (!j->unique_file)
2786 return 0;
2787
2788 j->unique_offset = 0;
2789 }
2790
2791 for (;;) {
2792 JournalFile *of;
2793 Iterator i;
2794 Object *o;
2795 const void *odata;
2796 size_t ol;
2797 bool found;
2798 int r;
2799
2800 /* Proceed to next data object in the field's linked list */
2801 if (j->unique_offset == 0) {
2802 r = journal_file_find_field_object(j->unique_file, j->unique_field, k, &o, NULL);
2803 if (r < 0)
2804 return r;
2805
2806 j->unique_offset = r > 0 ? le64toh(o->field.head_data_offset) : 0;
2807 } else {
2808 r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
2809 if (r < 0)
2810 return r;
2811
2812 j->unique_offset = le64toh(o->data.next_field_offset);
2813 }
2814
2815 /* We reached the end of the list? Then start again, with the next file */
2816 if (j->unique_offset == 0) {
2817 j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
2818 if (!j->unique_file)
2819 return 0;
2820
2821 continue;
2822 }
2823
2824 /* We do not use OBJECT_DATA context here, but OBJECT_UNUSED
2825 * instead, so that we can look at this data object at the same
2826 * time as one on another file */
2827 r = journal_file_move_to_object(j->unique_file, OBJECT_UNUSED, j->unique_offset, &o);
2828 if (r < 0)
2829 return r;
2830
2831 /* Let's do the type check by hand, since we used 0 context above. */
2832 if (o->object.type != OBJECT_DATA) {
2833 log_debug("%s:offset " OFSfmt ": object has type %d, expected %d",
2834 j->unique_file->path, j->unique_offset,
2835 o->object.type, OBJECT_DATA);
2836 return -EBADMSG;
2837 }
2838
2839 r = return_data(j, j->unique_file, o, &odata, &ol);
2840 if (r < 0)
2841 return r;
2842
2843 /* Check if we have at least the field name and "=". */
2844 if (ol <= k) {
2845 log_debug("%s:offset " OFSfmt ": object has size %zu, expected at least %zu",
2846 j->unique_file->path, j->unique_offset,
2847 ol, k + 1);
2848 return -EBADMSG;
2849 }
2850
2851 if (memcmp(odata, j->unique_field, k) || ((const char*) odata)[k] != '=') {
2852 log_debug("%s:offset " OFSfmt ": object does not start with \"%s=\"",
2853 j->unique_file->path, j->unique_offset,
2854 j->unique_field);
2855 return -EBADMSG;
2856 }
2857
2858 /* OK, now let's see if we already returned this data
2859 * object by checking if it exists in the earlier
2860 * traversed files. */
2861 found = false;
2862 ORDERED_HASHMAP_FOREACH(of, j->files, i) {
2863 if (of == j->unique_file)
2864 break;
2865
2866 /* Skip this file it didn't have any fields indexed */
2867 if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
2868 continue;
2869
2870 r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), NULL, NULL);
2871 if (r < 0)
2872 return r;
2873 if (r > 0) {
2874 found = true;
2875 break;
2876 }
2877 }
2878
2879 if (found)
2880 continue;
2881
2882 r = return_data(j, j->unique_file, o, data, l);
2883 if (r < 0)
2884 return r;
2885
2886 return 1;
2887 }
2888 }
2889
2890 _public_ void sd_journal_restart_unique(sd_journal *j) {
2891 if (!j)
2892 return;
2893
2894 j->unique_file = NULL;
2895 j->unique_offset = 0;
2896 j->unique_file_lost = false;
2897 }
2898
2899 _public_ int sd_journal_enumerate_fields(sd_journal *j, const char **field) {
2900 int r;
2901
2902 assert_return(j, -EINVAL);
2903 assert_return(!journal_pid_changed(j), -ECHILD);
2904 assert_return(field, -EINVAL);
2905
2906 if (!j->fields_file) {
2907 if (j->fields_file_lost)
2908 return 0;
2909
2910 j->fields_file = ordered_hashmap_first(j->files);
2911 if (!j->fields_file)
2912 return 0;
2913
2914 j->fields_hash_table_index = 0;
2915 j->fields_offset = 0;
2916 }
2917
2918 for (;;) {
2919 JournalFile *f, *of;
2920 Iterator i;
2921 uint64_t m;
2922 Object *o;
2923 size_t sz;
2924 bool found;
2925
2926 f = j->fields_file;
2927
2928 if (j->fields_offset == 0) {
2929 bool eof = false;
2930
2931 /* We are not yet positioned at any field. Let's pick the first one */
2932 r = journal_file_map_field_hash_table(f);
2933 if (r < 0)
2934 return r;
2935
2936 m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
2937 for (;;) {
2938 if (j->fields_hash_table_index >= m) {
2939 /* Reached the end of the hash table, go to the next file. */
2940 eof = true;
2941 break;
2942 }
2943
2944 j->fields_offset = le64toh(f->field_hash_table[j->fields_hash_table_index].head_hash_offset);
2945
2946 if (j->fields_offset != 0)
2947 break;
2948
2949 /* Empty hash table bucket, go to next one */
2950 j->fields_hash_table_index++;
2951 }
2952
2953 if (eof) {
2954 /* Proceed with next file */
2955 j->fields_file = ordered_hashmap_next(j->files, f->path);
2956 if (!j->fields_file) {
2957 *field = NULL;
2958 return 0;
2959 }
2960
2961 j->fields_offset = 0;
2962 j->fields_hash_table_index = 0;
2963 continue;
2964 }
2965
2966 } else {
2967 /* We are already positioned at a field. If so, let's figure out the next field from it */
2968
2969 r = journal_file_move_to_object(f, OBJECT_FIELD, j->fields_offset, &o);
2970 if (r < 0)
2971 return r;
2972
2973 j->fields_offset = le64toh(o->field.next_hash_offset);
2974 if (j->fields_offset == 0) {
2975 /* Reached the end of the hash table chain */
2976 j->fields_hash_table_index++;
2977 continue;
2978 }
2979 }
2980
2981 /* We use OBJECT_UNUSED here, so that the iterator below doesn't remove our mmap window */
2982 r = journal_file_move_to_object(f, OBJECT_UNUSED, j->fields_offset, &o);
2983 if (r < 0)
2984 return r;
2985
2986 /* Because we used OBJECT_UNUSED above, we need to do our type check manually */
2987 if (o->object.type != OBJECT_FIELD) {
2988 log_debug("%s:offset " OFSfmt ": object has type %i, expected %i", f->path, j->fields_offset, o->object.type, OBJECT_FIELD);
2989 return -EBADMSG;
2990 }
2991
2992 sz = le64toh(o->object.size) - offsetof(Object, field.payload);
2993
2994 /* Let's see if we already returned this field name before. */
2995 found = false;
2996 ORDERED_HASHMAP_FOREACH(of, j->files, i) {
2997 if (of == f)
2998 break;
2999
3000 /* Skip this file it didn't have any fields indexed */
3001 if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
3002 continue;
3003
3004 r = journal_file_find_field_object_with_hash(of, o->field.payload, sz, le64toh(o->field.hash), NULL, NULL);
3005 if (r < 0)
3006 return r;
3007 if (r > 0) {
3008 found = true;
3009 break;
3010 }
3011 }
3012
3013 if (found)
3014 continue;
3015
3016 /* Check if this is really a valid string containing no NUL byte */
3017 if (memchr(o->field.payload, 0, sz))
3018 return -EBADMSG;
3019
3020 if (sz > j->data_threshold)
3021 sz = j->data_threshold;
3022
3023 if (!GREEDY_REALLOC(j->fields_buffer, j->fields_buffer_allocated, sz + 1))
3024 return -ENOMEM;
3025
3026 memcpy(j->fields_buffer, o->field.payload, sz);
3027 j->fields_buffer[sz] = 0;
3028
3029 if (!field_is_valid(j->fields_buffer))
3030 return -EBADMSG;
3031
3032 *field = j->fields_buffer;
3033 return 1;
3034 }
3035 }
3036
3037 _public_ void sd_journal_restart_fields(sd_journal *j) {
3038 if (!j)
3039 return;
3040
3041 j->fields_file = NULL;
3042 j->fields_hash_table_index = 0;
3043 j->fields_offset = 0;
3044 j->fields_file_lost = false;
3045 }
3046
3047 _public_ int sd_journal_reliable_fd(sd_journal *j) {
3048 assert_return(j, -EINVAL);
3049 assert_return(!journal_pid_changed(j), -ECHILD);
3050
3051 return !j->on_network;
3052 }
3053
3054 static char *lookup_field(const char *field, void *userdata) {
3055 sd_journal *j = userdata;
3056 const void *data;
3057 size_t size, d;
3058 int r;
3059
3060 assert(field);
3061 assert(j);
3062
3063 r = sd_journal_get_data(j, field, &data, &size);
3064 if (r < 0 ||
3065 size > REPLACE_VAR_MAX)
3066 return strdup(field);
3067
3068 d = strlen(field) + 1;
3069
3070 return strndup((const char*) data + d, size - d);
3071 }
3072
3073 _public_ int sd_journal_get_catalog(sd_journal *j, char **ret) {
3074 const void *data;
3075 size_t size;
3076 sd_id128_t id;
3077 _cleanup_free_ char *text = NULL, *cid = NULL;
3078 char *t;
3079 int r;
3080
3081 assert_return(j, -EINVAL);
3082 assert_return(!journal_pid_changed(j), -ECHILD);
3083 assert_return(ret, -EINVAL);
3084
3085 r = sd_journal_get_data(j, "MESSAGE_ID", &data, &size);
3086 if (r < 0)
3087 return r;
3088
3089 cid = strndup((const char*) data + 11, size - 11);
3090 if (!cid)
3091 return -ENOMEM;
3092
3093 r = sd_id128_from_string(cid, &id);
3094 if (r < 0)
3095 return r;
3096
3097 r = catalog_get(CATALOG_DATABASE, id, &text);
3098 if (r < 0)
3099 return r;
3100
3101 t = replace_var(text, lookup_field, j);
3102 if (!t)
3103 return -ENOMEM;
3104
3105 *ret = t;
3106 return 0;
3107 }
3108
3109 _public_ int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **ret) {
3110 assert_return(ret, -EINVAL);
3111
3112 return catalog_get(CATALOG_DATABASE, id, ret);
3113 }
3114
3115 _public_ int sd_journal_set_data_threshold(sd_journal *j, size_t sz) {
3116 assert_return(j, -EINVAL);
3117 assert_return(!journal_pid_changed(j), -ECHILD);
3118
3119 j->data_threshold = sz;
3120 return 0;
3121 }
3122
3123 _public_ int sd_journal_get_data_threshold(sd_journal *j, size_t *sz) {
3124 assert_return(j, -EINVAL);
3125 assert_return(!journal_pid_changed(j), -ECHILD);
3126 assert_return(sz, -EINVAL);
3127
3128 *sz = j->data_threshold;
3129 return 0;
3130 }
3131
3132 _public_ int sd_journal_has_runtime_files(sd_journal *j) {
3133 assert_return(j, -EINVAL);
3134
3135 return j->has_runtime_files;
3136 }
3137
3138 _public_ int sd_journal_has_persistent_files(sd_journal *j) {
3139 assert_return(j, -EINVAL);
3140
3141 return j->has_persistent_files;
3142 }