]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/sd-journal.c
Merge pull request #11378 from keszybz/export-dbus-address-conditionally
[thirdparty/systemd.git] / src / journal / sd-journal.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <inttypes.h>
6 #include <linux/magic.h>
7 #include <poll.h>
8 #include <stddef.h>
9 #include <sys/inotify.h>
10 #include <sys/vfs.h>
11 #include <unistd.h>
12
13 #include "sd-journal.h"
14
15 #include "alloc-util.h"
16 #include "catalog.h"
17 #include "compress.h"
18 #include "dirent-util.h"
19 #include "env-file.h"
20 #include "escape.h"
21 #include "fd-util.h"
22 #include "fileio.h"
23 #include "format-util.h"
24 #include "fs-util.h"
25 #include "hashmap.h"
26 #include "hostname-util.h"
27 #include "id128-util.h"
28 #include "io-util.h"
29 #include "journal-def.h"
30 #include "journal-file.h"
31 #include "journal-internal.h"
32 #include "list.h"
33 #include "lookup3.h"
34 #include "missing.h"
35 #include "path-util.h"
36 #include "process-util.h"
37 #include "replace-var.h"
38 #include "stat-util.h"
39 #include "stdio-util.h"
40 #include "string-util.h"
41 #include "strv.h"
42
43 #define JOURNAL_FILES_MAX 7168
44
45 #define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
46
47 #define REPLACE_VAR_MAX 256
48
49 #define DEFAULT_DATA_THRESHOLD (64*1024)
50
51 static void remove_file_real(sd_journal *j, JournalFile *f);
52
53 static bool journal_pid_changed(sd_journal *j) {
54 assert(j);
55
56 /* We don't support people creating a journal object and
57 * keeping it around over a fork(). Let's complain. */
58
59 return j->original_pid != getpid_cached();
60 }
61
62 static int journal_put_error(sd_journal *j, int r, const char *path) {
63 char *copy;
64 int k;
65
66 /* Memorize an error we encountered, and store which
67 * file/directory it was generated from. Note that we store
68 * only *one* path per error code, as the error code is the
69 * key into the hashmap, and the path is the value. This means
70 * we keep track only of all error kinds, but not of all error
71 * locations. This has the benefit that the hashmap cannot
72 * grow beyond bounds.
73 *
74 * We return an error here only if we didn't manage to
75 * memorize the real error. */
76
77 if (r >= 0)
78 return r;
79
80 k = hashmap_ensure_allocated(&j->errors, NULL);
81 if (k < 0)
82 return k;
83
84 if (path) {
85 copy = strdup(path);
86 if (!copy)
87 return -ENOMEM;
88 } else
89 copy = NULL;
90
91 k = hashmap_put(j->errors, INT_TO_PTR(r), copy);
92 if (k < 0) {
93 free(copy);
94
95 if (k == -EEXIST)
96 return 0;
97
98 return k;
99 }
100
101 return 0;
102 }
103
104 static void detach_location(sd_journal *j) {
105 Iterator i;
106 JournalFile *f;
107
108 assert(j);
109
110 j->current_file = NULL;
111 j->current_field = 0;
112
113 ORDERED_HASHMAP_FOREACH(f, j->files, i)
114 journal_file_reset_location(f);
115 }
116
117 static void reset_location(sd_journal *j) {
118 assert(j);
119
120 detach_location(j);
121 zero(j->current_location);
122 }
123
124 static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) {
125 assert(l);
126 assert(IN_SET(type, LOCATION_DISCRETE, LOCATION_SEEK));
127 assert(f);
128 assert(o->object.type == OBJECT_ENTRY);
129
130 l->type = type;
131 l->seqnum = le64toh(o->entry.seqnum);
132 l->seqnum_id = f->header->seqnum_id;
133 l->realtime = le64toh(o->entry.realtime);
134 l->monotonic = le64toh(o->entry.monotonic);
135 l->boot_id = o->entry.boot_id;
136 l->xor_hash = le64toh(o->entry.xor_hash);
137
138 l->seqnum_set = l->realtime_set = l->monotonic_set = l->xor_hash_set = true;
139 }
140
141 static void set_location(sd_journal *j, JournalFile *f, Object *o) {
142 assert(j);
143 assert(f);
144 assert(o);
145
146 init_location(&j->current_location, LOCATION_DISCRETE, f, o);
147
148 j->current_file = f;
149 j->current_field = 0;
150
151 /* Let f know its candidate entry was picked. */
152 assert(f->location_type == LOCATION_SEEK);
153 f->location_type = LOCATION_DISCRETE;
154 }
155
156 static int match_is_valid(const void *data, size_t size) {
157 const char *b, *p;
158
159 assert(data);
160
161 if (size < 2)
162 return false;
163
164 if (startswith(data, "__"))
165 return false;
166
167 b = data;
168 for (p = b; p < b + size; p++) {
169
170 if (*p == '=')
171 return p > b;
172
173 if (*p == '_')
174 continue;
175
176 if (*p >= 'A' && *p <= 'Z')
177 continue;
178
179 if (*p >= '0' && *p <= '9')
180 continue;
181
182 return false;
183 }
184
185 return false;
186 }
187
188 static bool same_field(const void *_a, size_t s, const void *_b, size_t t) {
189 const uint8_t *a = _a, *b = _b;
190 size_t j;
191
192 for (j = 0; j < s && j < t; j++) {
193
194 if (a[j] != b[j])
195 return false;
196
197 if (a[j] == '=')
198 return true;
199 }
200
201 assert_not_reached("\"=\" not found");
202 }
203
204 static Match *match_new(Match *p, MatchType t) {
205 Match *m;
206
207 m = new0(Match, 1);
208 if (!m)
209 return NULL;
210
211 m->type = t;
212
213 if (p) {
214 m->parent = p;
215 LIST_PREPEND(matches, p->matches, m);
216 }
217
218 return m;
219 }
220
221 static void match_free(Match *m) {
222 assert(m);
223
224 while (m->matches)
225 match_free(m->matches);
226
227 if (m->parent)
228 LIST_REMOVE(matches, m->parent->matches, m);
229
230 free(m->data);
231 free(m);
232 }
233
234 static void match_free_if_empty(Match *m) {
235 if (!m || m->matches)
236 return;
237
238 match_free(m);
239 }
240
241 _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
242 Match *l3, *l4, *add_here = NULL, *m;
243 le64_t le_hash;
244
245 assert_return(j, -EINVAL);
246 assert_return(!journal_pid_changed(j), -ECHILD);
247 assert_return(data, -EINVAL);
248
249 if (size == 0)
250 size = strlen(data);
251
252 assert_return(match_is_valid(data, size), -EINVAL);
253
254 /* level 0: AND term
255 * level 1: OR terms
256 * level 2: AND terms
257 * level 3: OR terms
258 * level 4: concrete matches */
259
260 if (!j->level0) {
261 j->level0 = match_new(NULL, MATCH_AND_TERM);
262 if (!j->level0)
263 return -ENOMEM;
264 }
265
266 if (!j->level1) {
267 j->level1 = match_new(j->level0, MATCH_OR_TERM);
268 if (!j->level1)
269 return -ENOMEM;
270 }
271
272 if (!j->level2) {
273 j->level2 = match_new(j->level1, MATCH_AND_TERM);
274 if (!j->level2)
275 return -ENOMEM;
276 }
277
278 assert(j->level0->type == MATCH_AND_TERM);
279 assert(j->level1->type == MATCH_OR_TERM);
280 assert(j->level2->type == MATCH_AND_TERM);
281
282 le_hash = htole64(hash64(data, size));
283
284 LIST_FOREACH(matches, l3, j->level2->matches) {
285 assert(l3->type == MATCH_OR_TERM);
286
287 LIST_FOREACH(matches, l4, l3->matches) {
288 assert(l4->type == MATCH_DISCRETE);
289
290 /* Exactly the same match already? Then ignore
291 * this addition */
292 if (l4->le_hash == le_hash &&
293 l4->size == size &&
294 memcmp(l4->data, data, size) == 0)
295 return 0;
296
297 /* Same field? Then let's add this to this OR term */
298 if (same_field(data, size, l4->data, l4->size)) {
299 add_here = l3;
300 break;
301 }
302 }
303
304 if (add_here)
305 break;
306 }
307
308 if (!add_here) {
309 add_here = match_new(j->level2, MATCH_OR_TERM);
310 if (!add_here)
311 goto fail;
312 }
313
314 m = match_new(add_here, MATCH_DISCRETE);
315 if (!m)
316 goto fail;
317
318 m->le_hash = le_hash;
319 m->size = size;
320 m->data = memdup(data, size);
321 if (!m->data)
322 goto fail;
323
324 detach_location(j);
325
326 return 0;
327
328 fail:
329 match_free_if_empty(add_here);
330 match_free_if_empty(j->level2);
331 match_free_if_empty(j->level1);
332 match_free_if_empty(j->level0);
333
334 return -ENOMEM;
335 }
336
337 _public_ int sd_journal_add_conjunction(sd_journal *j) {
338 assert_return(j, -EINVAL);
339 assert_return(!journal_pid_changed(j), -ECHILD);
340
341 if (!j->level0)
342 return 0;
343
344 if (!j->level1)
345 return 0;
346
347 if (!j->level1->matches)
348 return 0;
349
350 j->level1 = NULL;
351 j->level2 = NULL;
352
353 return 0;
354 }
355
356 _public_ int sd_journal_add_disjunction(sd_journal *j) {
357 assert_return(j, -EINVAL);
358 assert_return(!journal_pid_changed(j), -ECHILD);
359
360 if (!j->level0)
361 return 0;
362
363 if (!j->level1)
364 return 0;
365
366 if (!j->level2)
367 return 0;
368
369 if (!j->level2->matches)
370 return 0;
371
372 j->level2 = NULL;
373 return 0;
374 }
375
376 static char *match_make_string(Match *m) {
377 char *p = NULL, *r;
378 Match *i;
379 bool enclose = false;
380
381 if (!m)
382 return strdup("none");
383
384 if (m->type == MATCH_DISCRETE)
385 return cescape_length(m->data, m->size);
386
387 LIST_FOREACH(matches, i, m->matches) {
388 char *t, *k;
389
390 t = match_make_string(i);
391 if (!t)
392 return mfree(p);
393
394 if (p) {
395 k = strjoin(p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t);
396 free(p);
397 free(t);
398
399 if (!k)
400 return NULL;
401
402 p = k;
403
404 enclose = true;
405 } else
406 p = t;
407 }
408
409 if (enclose) {
410 r = strjoin("(", p, ")");
411 free(p);
412 return r;
413 }
414
415 return p;
416 }
417
418 char *journal_make_match_string(sd_journal *j) {
419 assert(j);
420
421 return match_make_string(j->level0);
422 }
423
424 _public_ void sd_journal_flush_matches(sd_journal *j) {
425 if (!j)
426 return;
427
428 if (j->level0)
429 match_free(j->level0);
430
431 j->level0 = j->level1 = j->level2 = NULL;
432
433 detach_location(j);
434 }
435
436 _pure_ static int compare_with_location(JournalFile *f, Location *l) {
437 int r;
438
439 assert(f);
440 assert(l);
441 assert(f->location_type == LOCATION_SEEK);
442 assert(IN_SET(l->type, LOCATION_DISCRETE, LOCATION_SEEK));
443
444 if (l->monotonic_set &&
445 sd_id128_equal(f->current_boot_id, l->boot_id) &&
446 l->realtime_set &&
447 f->current_realtime == l->realtime &&
448 l->xor_hash_set &&
449 f->current_xor_hash == l->xor_hash)
450 return 0;
451
452 if (l->seqnum_set &&
453 sd_id128_equal(f->header->seqnum_id, l->seqnum_id)) {
454
455 r = CMP(f->current_seqnum, l->seqnum);
456 if (r != 0)
457 return r;
458 }
459
460 if (l->monotonic_set &&
461 sd_id128_equal(f->current_boot_id, l->boot_id)) {
462
463 r = CMP(f->current_monotonic, l->monotonic);
464 if (r != 0)
465 return r;
466 }
467
468 if (l->realtime_set) {
469
470 r = CMP(f->current_realtime, l->realtime);
471 if (r != 0)
472 return r;
473 }
474
475 if (l->xor_hash_set) {
476
477 r = CMP(f->current_xor_hash, l->xor_hash);
478 if (r != 0)
479 return r;
480 }
481
482 return 0;
483 }
484
485 static int next_for_match(
486 sd_journal *j,
487 Match *m,
488 JournalFile *f,
489 uint64_t after_offset,
490 direction_t direction,
491 Object **ret,
492 uint64_t *offset) {
493
494 int r;
495 uint64_t np = 0;
496 Object *n;
497
498 assert(j);
499 assert(m);
500 assert(f);
501
502 if (m->type == MATCH_DISCRETE) {
503 uint64_t dp;
504
505 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
506 if (r <= 0)
507 return r;
508
509 return journal_file_move_to_entry_by_offset_for_data(f, dp, after_offset, direction, ret, offset);
510
511 } else if (m->type == MATCH_OR_TERM) {
512 Match *i;
513
514 /* Find the earliest match beyond after_offset */
515
516 LIST_FOREACH(matches, i, m->matches) {
517 uint64_t cp;
518
519 r = next_for_match(j, i, f, after_offset, direction, NULL, &cp);
520 if (r < 0)
521 return r;
522 else if (r > 0) {
523 if (np == 0 || (direction == DIRECTION_DOWN ? cp < np : cp > np))
524 np = cp;
525 }
526 }
527
528 if (np == 0)
529 return 0;
530
531 } else if (m->type == MATCH_AND_TERM) {
532 Match *i, *last_moved;
533
534 /* Always jump to the next matching entry and repeat
535 * this until we find an offset that matches for all
536 * matches. */
537
538 if (!m->matches)
539 return 0;
540
541 r = next_for_match(j, m->matches, f, after_offset, direction, NULL, &np);
542 if (r <= 0)
543 return r;
544
545 assert(direction == DIRECTION_DOWN ? np >= after_offset : np <= after_offset);
546 last_moved = m->matches;
547
548 LIST_LOOP_BUT_ONE(matches, i, m->matches, last_moved) {
549 uint64_t cp;
550
551 r = next_for_match(j, i, f, np, direction, NULL, &cp);
552 if (r <= 0)
553 return r;
554
555 assert(direction == DIRECTION_DOWN ? cp >= np : cp <= np);
556 if (direction == DIRECTION_DOWN ? cp > np : cp < np) {
557 np = cp;
558 last_moved = i;
559 }
560 }
561 }
562
563 assert(np > 0);
564
565 r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
566 if (r < 0)
567 return r;
568
569 if (ret)
570 *ret = n;
571 if (offset)
572 *offset = np;
573
574 return 1;
575 }
576
577 static int find_location_for_match(
578 sd_journal *j,
579 Match *m,
580 JournalFile *f,
581 direction_t direction,
582 Object **ret,
583 uint64_t *offset) {
584
585 int r;
586
587 assert(j);
588 assert(m);
589 assert(f);
590
591 if (m->type == MATCH_DISCRETE) {
592 uint64_t dp;
593
594 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
595 if (r <= 0)
596 return r;
597
598 /* FIXME: missing: find by monotonic */
599
600 if (j->current_location.type == LOCATION_HEAD)
601 return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_DOWN, ret, offset);
602 if (j->current_location.type == LOCATION_TAIL)
603 return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_UP, ret, offset);
604 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
605 return journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, ret, offset);
606 if (j->current_location.monotonic_set) {
607 r = journal_file_move_to_entry_by_monotonic_for_data(f, dp, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
608 if (r != -ENOENT)
609 return r;
610 }
611 if (j->current_location.realtime_set)
612 return journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, ret, offset);
613
614 return journal_file_next_entry_for_data(f, NULL, 0, dp, direction, ret, offset);
615
616 } else if (m->type == MATCH_OR_TERM) {
617 uint64_t np = 0;
618 Object *n;
619 Match *i;
620
621 /* Find the earliest match */
622
623 LIST_FOREACH(matches, i, m->matches) {
624 uint64_t cp;
625
626 r = find_location_for_match(j, i, f, direction, NULL, &cp);
627 if (r < 0)
628 return r;
629 else if (r > 0) {
630 if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
631 np = cp;
632 }
633 }
634
635 if (np == 0)
636 return 0;
637
638 r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
639 if (r < 0)
640 return r;
641
642 if (ret)
643 *ret = n;
644 if (offset)
645 *offset = np;
646
647 return 1;
648
649 } else {
650 Match *i;
651 uint64_t np = 0;
652
653 assert(m->type == MATCH_AND_TERM);
654
655 /* First jump to the last match, and then find the
656 * next one where all matches match */
657
658 if (!m->matches)
659 return 0;
660
661 LIST_FOREACH(matches, i, m->matches) {
662 uint64_t cp;
663
664 r = find_location_for_match(j, i, f, direction, NULL, &cp);
665 if (r <= 0)
666 return r;
667
668 if (np == 0 || (direction == DIRECTION_DOWN ? cp > np : cp < np))
669 np = cp;
670 }
671
672 return next_for_match(j, m, f, np, direction, ret, offset);
673 }
674 }
675
676 static int find_location_with_matches(
677 sd_journal *j,
678 JournalFile *f,
679 direction_t direction,
680 Object **ret,
681 uint64_t *offset) {
682
683 int r;
684
685 assert(j);
686 assert(f);
687 assert(ret);
688 assert(offset);
689
690 if (!j->level0) {
691 /* No matches is simple */
692
693 if (j->current_location.type == LOCATION_HEAD)
694 return journal_file_next_entry(f, 0, DIRECTION_DOWN, ret, offset);
695 if (j->current_location.type == LOCATION_TAIL)
696 return journal_file_next_entry(f, 0, DIRECTION_UP, ret, offset);
697 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
698 return journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, ret, offset);
699 if (j->current_location.monotonic_set) {
700 r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
701 if (r != -ENOENT)
702 return r;
703 }
704 if (j->current_location.realtime_set)
705 return journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, ret, offset);
706
707 return journal_file_next_entry(f, 0, direction, ret, offset);
708 } else
709 return find_location_for_match(j, j->level0, f, direction, ret, offset);
710 }
711
712 static int next_with_matches(
713 sd_journal *j,
714 JournalFile *f,
715 direction_t direction,
716 Object **ret,
717 uint64_t *offset) {
718
719 assert(j);
720 assert(f);
721 assert(ret);
722 assert(offset);
723
724 /* No matches is easy. We simple advance the file
725 * pointer by one. */
726 if (!j->level0)
727 return journal_file_next_entry(f, f->current_offset, direction, ret, offset);
728
729 /* If we have a match then we look for the next matching entry
730 * with an offset at least one step larger */
731 return next_for_match(j, j->level0, f,
732 direction == DIRECTION_DOWN ? f->current_offset + 1
733 : f->current_offset - 1,
734 direction, ret, offset);
735 }
736
737 static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction) {
738 Object *c;
739 uint64_t cp, n_entries;
740 int r;
741
742 assert(j);
743 assert(f);
744
745 n_entries = le64toh(f->header->n_entries);
746
747 /* If we hit EOF before, we don't need to look into this file again
748 * unless direction changed or new entries appeared. */
749 if (f->last_direction == direction && f->location_type == LOCATION_TAIL &&
750 n_entries == f->last_n_entries)
751 return 0;
752
753 f->last_n_entries = n_entries;
754
755 if (f->last_direction == direction && f->current_offset > 0) {
756 /* LOCATION_SEEK here means we did the work in a previous
757 * iteration and the current location already points to a
758 * candidate entry. */
759 if (f->location_type != LOCATION_SEEK) {
760 r = next_with_matches(j, f, direction, &c, &cp);
761 if (r <= 0)
762 return r;
763
764 journal_file_save_location(f, c, cp);
765 }
766 } else {
767 f->last_direction = direction;
768
769 r = find_location_with_matches(j, f, direction, &c, &cp);
770 if (r <= 0)
771 return r;
772
773 journal_file_save_location(f, c, cp);
774 }
775
776 /* OK, we found the spot, now let's advance until an entry
777 * that is actually different from what we were previously
778 * looking at. This is necessary to handle entries which exist
779 * in two (or more) journal files, and which shall all be
780 * suppressed but one. */
781
782 for (;;) {
783 bool found;
784
785 if (j->current_location.type == LOCATION_DISCRETE) {
786 int k;
787
788 k = compare_with_location(f, &j->current_location);
789
790 found = direction == DIRECTION_DOWN ? k > 0 : k < 0;
791 } else
792 found = true;
793
794 if (found)
795 return 1;
796
797 r = next_with_matches(j, f, direction, &c, &cp);
798 if (r <= 0)
799 return r;
800
801 journal_file_save_location(f, c, cp);
802 }
803 }
804
805 static int real_journal_next(sd_journal *j, direction_t direction) {
806 JournalFile *new_file = NULL;
807 unsigned i, n_files;
808 const void **files;
809 Object *o;
810 int r;
811
812 assert_return(j, -EINVAL);
813 assert_return(!journal_pid_changed(j), -ECHILD);
814
815 r = iterated_cache_get(j->files_cache, NULL, &files, &n_files);
816 if (r < 0)
817 return r;
818
819 for (i = 0; i < n_files; i++) {
820 JournalFile *f = (JournalFile *)files[i];
821 bool found;
822
823 r = next_beyond_location(j, f, direction);
824 if (r < 0) {
825 log_debug_errno(r, "Can't iterate through %s, ignoring: %m", f->path);
826 remove_file_real(j, f);
827 continue;
828 } else if (r == 0) {
829 f->location_type = LOCATION_TAIL;
830 continue;
831 }
832
833 if (!new_file)
834 found = true;
835 else {
836 int k;
837
838 k = journal_file_compare_locations(f, new_file);
839
840 found = direction == DIRECTION_DOWN ? k < 0 : k > 0;
841 }
842
843 if (found)
844 new_file = f;
845 }
846
847 if (!new_file)
848 return 0;
849
850 r = journal_file_move_to_object(new_file, OBJECT_ENTRY, new_file->current_offset, &o);
851 if (r < 0)
852 return r;
853
854 set_location(j, new_file, o);
855
856 return 1;
857 }
858
859 _public_ int sd_journal_next(sd_journal *j) {
860 return real_journal_next(j, DIRECTION_DOWN);
861 }
862
863 _public_ int sd_journal_previous(sd_journal *j) {
864 return real_journal_next(j, DIRECTION_UP);
865 }
866
867 static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) {
868 int c = 0, r;
869
870 assert_return(j, -EINVAL);
871 assert_return(!journal_pid_changed(j), -ECHILD);
872
873 if (skip == 0) {
874 /* If this is not a discrete skip, then at least
875 * resolve the current location */
876 if (j->current_location.type != LOCATION_DISCRETE) {
877 r = real_journal_next(j, direction);
878 if (r < 0)
879 return r;
880 }
881
882 return 0;
883 }
884
885 do {
886 r = real_journal_next(j, direction);
887 if (r < 0)
888 return r;
889
890 if (r == 0)
891 return c;
892
893 skip--;
894 c++;
895 } while (skip > 0);
896
897 return c;
898 }
899
900 _public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) {
901 return real_journal_next_skip(j, DIRECTION_DOWN, skip);
902 }
903
904 _public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) {
905 return real_journal_next_skip(j, DIRECTION_UP, skip);
906 }
907
908 _public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) {
909 Object *o;
910 int r;
911 char bid[33], sid[33];
912
913 assert_return(j, -EINVAL);
914 assert_return(!journal_pid_changed(j), -ECHILD);
915 assert_return(cursor, -EINVAL);
916
917 if (!j->current_file || j->current_file->current_offset <= 0)
918 return -EADDRNOTAVAIL;
919
920 r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
921 if (r < 0)
922 return r;
923
924 sd_id128_to_string(j->current_file->header->seqnum_id, sid);
925 sd_id128_to_string(o->entry.boot_id, bid);
926
927 if (asprintf(cursor,
928 "s=%s;i=%"PRIx64";b=%s;m=%"PRIx64";t=%"PRIx64";x=%"PRIx64,
929 sid, le64toh(o->entry.seqnum),
930 bid, le64toh(o->entry.monotonic),
931 le64toh(o->entry.realtime),
932 le64toh(o->entry.xor_hash)) < 0)
933 return -ENOMEM;
934
935 return 0;
936 }
937
938 _public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
939 const char *word, *state;
940 size_t l;
941 unsigned long long seqnum, monotonic, realtime, xor_hash;
942 bool
943 seqnum_id_set = false,
944 seqnum_set = false,
945 boot_id_set = false,
946 monotonic_set = false,
947 realtime_set = false,
948 xor_hash_set = false;
949 sd_id128_t seqnum_id, boot_id;
950
951 assert_return(j, -EINVAL);
952 assert_return(!journal_pid_changed(j), -ECHILD);
953 assert_return(!isempty(cursor), -EINVAL);
954
955 FOREACH_WORD_SEPARATOR(word, l, cursor, ";", state) {
956 char *item;
957 int k = 0;
958
959 if (l < 2 || word[1] != '=')
960 return -EINVAL;
961
962 item = strndup(word, l);
963 if (!item)
964 return -ENOMEM;
965
966 switch (word[0]) {
967
968 case 's':
969 seqnum_id_set = true;
970 k = sd_id128_from_string(item+2, &seqnum_id);
971 break;
972
973 case 'i':
974 seqnum_set = true;
975 if (sscanf(item+2, "%llx", &seqnum) != 1)
976 k = -EINVAL;
977 break;
978
979 case 'b':
980 boot_id_set = true;
981 k = sd_id128_from_string(item+2, &boot_id);
982 break;
983
984 case 'm':
985 monotonic_set = true;
986 if (sscanf(item+2, "%llx", &monotonic) != 1)
987 k = -EINVAL;
988 break;
989
990 case 't':
991 realtime_set = true;
992 if (sscanf(item+2, "%llx", &realtime) != 1)
993 k = -EINVAL;
994 break;
995
996 case 'x':
997 xor_hash_set = true;
998 if (sscanf(item+2, "%llx", &xor_hash) != 1)
999 k = -EINVAL;
1000 break;
1001 }
1002
1003 free(item);
1004
1005 if (k < 0)
1006 return k;
1007 }
1008
1009 if ((!seqnum_set || !seqnum_id_set) &&
1010 (!monotonic_set || !boot_id_set) &&
1011 !realtime_set)
1012 return -EINVAL;
1013
1014 reset_location(j);
1015
1016 j->current_location.type = LOCATION_SEEK;
1017
1018 if (realtime_set) {
1019 j->current_location.realtime = (uint64_t) realtime;
1020 j->current_location.realtime_set = true;
1021 }
1022
1023 if (seqnum_set && seqnum_id_set) {
1024 j->current_location.seqnum = (uint64_t) seqnum;
1025 j->current_location.seqnum_id = seqnum_id;
1026 j->current_location.seqnum_set = true;
1027 }
1028
1029 if (monotonic_set && boot_id_set) {
1030 j->current_location.monotonic = (uint64_t) monotonic;
1031 j->current_location.boot_id = boot_id;
1032 j->current_location.monotonic_set = true;
1033 }
1034
1035 if (xor_hash_set) {
1036 j->current_location.xor_hash = (uint64_t) xor_hash;
1037 j->current_location.xor_hash_set = true;
1038 }
1039
1040 return 0;
1041 }
1042
1043 _public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) {
1044 int r;
1045 Object *o;
1046
1047 assert_return(j, -EINVAL);
1048 assert_return(!journal_pid_changed(j), -ECHILD);
1049 assert_return(!isempty(cursor), -EINVAL);
1050
1051 if (!j->current_file || j->current_file->current_offset <= 0)
1052 return -EADDRNOTAVAIL;
1053
1054 r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
1055 if (r < 0)
1056 return r;
1057
1058 for (;;) {
1059 _cleanup_free_ char *item = NULL;
1060 unsigned long long ll;
1061 sd_id128_t id;
1062 int k = 0;
1063
1064 r = extract_first_word(&cursor, &item, ";", EXTRACT_DONT_COALESCE_SEPARATORS);
1065 if (r < 0)
1066 return r;
1067
1068 if (r == 0)
1069 break;
1070
1071 if (strlen(item) < 2 || item[1] != '=')
1072 return -EINVAL;
1073
1074 switch (item[0]) {
1075
1076 case 's':
1077 k = sd_id128_from_string(item+2, &id);
1078 if (k < 0)
1079 return k;
1080 if (!sd_id128_equal(id, j->current_file->header->seqnum_id))
1081 return 0;
1082 break;
1083
1084 case 'i':
1085 if (sscanf(item+2, "%llx", &ll) != 1)
1086 return -EINVAL;
1087 if (ll != le64toh(o->entry.seqnum))
1088 return 0;
1089 break;
1090
1091 case 'b':
1092 k = sd_id128_from_string(item+2, &id);
1093 if (k < 0)
1094 return k;
1095 if (!sd_id128_equal(id, o->entry.boot_id))
1096 return 0;
1097 break;
1098
1099 case 'm':
1100 if (sscanf(item+2, "%llx", &ll) != 1)
1101 return -EINVAL;
1102 if (ll != le64toh(o->entry.monotonic))
1103 return 0;
1104 break;
1105
1106 case 't':
1107 if (sscanf(item+2, "%llx", &ll) != 1)
1108 return -EINVAL;
1109 if (ll != le64toh(o->entry.realtime))
1110 return 0;
1111 break;
1112
1113 case 'x':
1114 if (sscanf(item+2, "%llx", &ll) != 1)
1115 return -EINVAL;
1116 if (ll != le64toh(o->entry.xor_hash))
1117 return 0;
1118 break;
1119 }
1120 }
1121
1122 return 1;
1123 }
1124
1125 _public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) {
1126 assert_return(j, -EINVAL);
1127 assert_return(!journal_pid_changed(j), -ECHILD);
1128
1129 reset_location(j);
1130 j->current_location.type = LOCATION_SEEK;
1131 j->current_location.boot_id = boot_id;
1132 j->current_location.monotonic = usec;
1133 j->current_location.monotonic_set = true;
1134
1135 return 0;
1136 }
1137
1138 _public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
1139 assert_return(j, -EINVAL);
1140 assert_return(!journal_pid_changed(j), -ECHILD);
1141
1142 reset_location(j);
1143 j->current_location.type = LOCATION_SEEK;
1144 j->current_location.realtime = usec;
1145 j->current_location.realtime_set = true;
1146
1147 return 0;
1148 }
1149
1150 _public_ int sd_journal_seek_head(sd_journal *j) {
1151 assert_return(j, -EINVAL);
1152 assert_return(!journal_pid_changed(j), -ECHILD);
1153
1154 reset_location(j);
1155 j->current_location.type = LOCATION_HEAD;
1156
1157 return 0;
1158 }
1159
1160 _public_ int sd_journal_seek_tail(sd_journal *j) {
1161 assert_return(j, -EINVAL);
1162 assert_return(!journal_pid_changed(j), -ECHILD);
1163
1164 reset_location(j);
1165 j->current_location.type = LOCATION_TAIL;
1166
1167 return 0;
1168 }
1169
1170 static void check_network(sd_journal *j, int fd) {
1171 assert(j);
1172
1173 if (j->on_network)
1174 return;
1175
1176 j->on_network = fd_is_network_fs(fd);
1177 }
1178
1179 static bool file_has_type_prefix(const char *prefix, const char *filename) {
1180 const char *full, *tilded, *atted;
1181
1182 full = strjoina(prefix, ".journal");
1183 tilded = strjoina(full, "~");
1184 atted = strjoina(prefix, "@");
1185
1186 return STR_IN_SET(filename, full, tilded) ||
1187 startswith(filename, atted);
1188 }
1189
1190 static bool file_type_wanted(int flags, const char *filename) {
1191 assert(filename);
1192
1193 if (!endswith(filename, ".journal") && !endswith(filename, ".journal~"))
1194 return false;
1195
1196 /* no flags set → every type is OK */
1197 if (!(flags & (SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)))
1198 return true;
1199
1200 if (flags & SD_JOURNAL_SYSTEM && file_has_type_prefix("system", filename))
1201 return true;
1202
1203 if (flags & SD_JOURNAL_CURRENT_USER) {
1204 char prefix[5 + DECIMAL_STR_MAX(uid_t) + 1];
1205
1206 xsprintf(prefix, "user-"UID_FMT, getuid());
1207
1208 if (file_has_type_prefix(prefix, filename))
1209 return true;
1210 }
1211
1212 return false;
1213 }
1214
1215 static bool path_has_prefix(sd_journal *j, const char *path, const char *prefix) {
1216 assert(j);
1217 assert(path);
1218 assert(prefix);
1219
1220 if (j->toplevel_fd >= 0)
1221 return false;
1222
1223 return path_startswith(path, prefix);
1224 }
1225
1226 static void track_file_disposition(sd_journal *j, JournalFile *f) {
1227 assert(j);
1228 assert(f);
1229
1230 if (!j->has_runtime_files && path_has_prefix(j, f->path, "/run"))
1231 j->has_runtime_files = true;
1232 else if (!j->has_persistent_files && path_has_prefix(j, f->path, "/var"))
1233 j->has_persistent_files = true;
1234 }
1235
1236 static const char *skip_slash(const char *p) {
1237
1238 if (!p)
1239 return NULL;
1240
1241 while (*p == '/')
1242 p++;
1243
1244 return p;
1245 }
1246
1247 static int add_any_file(
1248 sd_journal *j,
1249 int fd,
1250 const char *path) {
1251
1252 bool close_fd = false;
1253 JournalFile *f;
1254 struct stat st;
1255 int r, k;
1256
1257 assert(j);
1258 assert(fd >= 0 || path);
1259
1260 if (fd < 0) {
1261 if (j->toplevel_fd >= 0)
1262 /* If there's a top-level fd defined make the path relative, explicitly, since otherwise
1263 * openat() ignores the first argument. */
1264
1265 fd = openat(j->toplevel_fd, skip_slash(path), O_RDONLY|O_CLOEXEC|O_NONBLOCK);
1266 else
1267 fd = open(path, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
1268 if (fd < 0) {
1269 r = log_debug_errno(errno, "Failed to open journal file %s: %m", path);
1270 goto finish;
1271 }
1272
1273 close_fd = true;
1274
1275 r = fd_nonblock(fd, false);
1276 if (r < 0) {
1277 r = log_debug_errno(errno, "Failed to turn off O_NONBLOCK for %s: %m", path);
1278 goto finish;
1279 }
1280 }
1281
1282 if (fstat(fd, &st) < 0) {
1283 r = log_debug_errno(errno, "Failed to fstat file '%s': %m", path);
1284 goto finish;
1285 }
1286
1287 r = stat_verify_regular(&st);
1288 if (r < 0) {
1289 log_debug_errno(r, "Refusing to open '%s', as it is not a regular file.", path);
1290 goto finish;
1291 }
1292
1293 f = ordered_hashmap_get(j->files, path);
1294 if (f) {
1295 if (f->last_stat.st_dev == st.st_dev &&
1296 f->last_stat.st_ino == st.st_ino) {
1297
1298 /* We already track this file, under the same path and with the same device/inode numbers, it's
1299 * hence really the same. Mark this file as seen in this generation. This is used to GC old
1300 * files in process_q_overflow() to detect journal files that are still there and discern them
1301 * from those which are gone. */
1302
1303 f->last_seen_generation = j->generation;
1304 r = 0;
1305 goto finish;
1306 }
1307
1308 /* So we tracked a file under this name, but it has a different inode/device. In that case, it got
1309 * replaced (probably due to rotation?), let's drop it hence from our list. */
1310 remove_file_real(j, f);
1311 f = NULL;
1312 }
1313
1314 if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
1315 log_debug("Too many open journal files, not adding %s.", path);
1316 r = -ETOOMANYREFS;
1317 goto finish;
1318 }
1319
1320 r = journal_file_open(fd, path, O_RDONLY, 0, false, 0, false, NULL, j->mmap, NULL, NULL, &f);
1321 if (r < 0) {
1322 log_debug_errno(r, "Failed to open journal file %s: %m", path);
1323 goto finish;
1324 }
1325
1326 /* journal_file_dump(f); */
1327
1328 r = ordered_hashmap_put(j->files, f->path, f);
1329 if (r < 0) {
1330 f->close_fd = false; /* make sure journal_file_close() doesn't close the caller's fd (or our own). We'll let the caller do that, or ourselves */
1331 (void) journal_file_close(f);
1332 goto finish;
1333 }
1334
1335 close_fd = false; /* the fd is now owned by the JournalFile object */
1336
1337 f->last_seen_generation = j->generation;
1338
1339 track_file_disposition(j, f);
1340 check_network(j, f->fd);
1341
1342 j->current_invalidate_counter++;
1343
1344 log_debug("File %s added.", f->path);
1345
1346 r = 0;
1347
1348 finish:
1349 if (close_fd)
1350 safe_close(fd);
1351
1352 if (r < 0) {
1353 k = journal_put_error(j, r, path);
1354 if (k < 0)
1355 return k;
1356 }
1357
1358 return r;
1359 }
1360
1361 static int add_file_by_name(
1362 sd_journal *j,
1363 const char *prefix,
1364 const char *filename) {
1365
1366 const char *path;
1367
1368 assert(j);
1369 assert(prefix);
1370 assert(filename);
1371
1372 if (j->no_new_files)
1373 return 0;
1374
1375 if (!file_type_wanted(j->flags, filename))
1376 return 0;
1377
1378 path = strjoina(prefix, "/", filename);
1379 return add_any_file(j, -1, path);
1380 }
1381
1382 static void remove_file_by_name(
1383 sd_journal *j,
1384 const char *prefix,
1385 const char *filename) {
1386
1387 const char *path;
1388 JournalFile *f;
1389
1390 assert(j);
1391 assert(prefix);
1392 assert(filename);
1393
1394 path = strjoina(prefix, "/", filename);
1395 f = ordered_hashmap_get(j->files, path);
1396 if (!f)
1397 return;
1398
1399 remove_file_real(j, f);
1400 }
1401
1402 static void remove_file_real(sd_journal *j, JournalFile *f) {
1403 assert(j);
1404 assert(f);
1405
1406 (void) ordered_hashmap_remove(j->files, f->path);
1407
1408 log_debug("File %s removed.", f->path);
1409
1410 if (j->current_file == f) {
1411 j->current_file = NULL;
1412 j->current_field = 0;
1413 }
1414
1415 if (j->unique_file == f) {
1416 /* Jump to the next unique_file or NULL if that one was last */
1417 j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
1418 j->unique_offset = 0;
1419 if (!j->unique_file)
1420 j->unique_file_lost = true;
1421 }
1422
1423 if (j->fields_file == f) {
1424 j->fields_file = ordered_hashmap_next(j->files, j->fields_file->path);
1425 j->fields_offset = 0;
1426 if (!j->fields_file)
1427 j->fields_file_lost = true;
1428 }
1429
1430 (void) journal_file_close(f);
1431
1432 j->current_invalidate_counter++;
1433 }
1434
1435 static int dirname_is_machine_id(const char *fn) {
1436 sd_id128_t id, machine;
1437 int r;
1438
1439 r = sd_id128_get_machine(&machine);
1440 if (r < 0)
1441 return r;
1442
1443 r = sd_id128_from_string(fn, &id);
1444 if (r < 0)
1445 return r;
1446
1447 return sd_id128_equal(id, machine);
1448 }
1449
1450 static bool dirent_is_journal_file(const struct dirent *de) {
1451 assert(de);
1452
1453 if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
1454 return false;
1455
1456 return endswith(de->d_name, ".journal") ||
1457 endswith(de->d_name, ".journal~");
1458 }
1459
1460 static bool dirent_is_id128_subdir(const struct dirent *de) {
1461 assert(de);
1462
1463 if (!IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN))
1464 return false;
1465
1466 return id128_is_valid(de->d_name);
1467 }
1468
1469 static int directory_open(sd_journal *j, const char *path, DIR **ret) {
1470 DIR *d;
1471
1472 assert(j);
1473 assert(path);
1474 assert(ret);
1475
1476 if (j->toplevel_fd < 0)
1477 d = opendir(path);
1478 else
1479 /* Open the specified directory relative to the toplevel fd. Enforce that the path specified is
1480 * relative, by dropping the initial slash */
1481 d = xopendirat(j->toplevel_fd, skip_slash(path), 0);
1482 if (!d)
1483 return -errno;
1484
1485 *ret = d;
1486 return 0;
1487 }
1488
1489 static int add_directory(sd_journal *j, const char *prefix, const char *dirname);
1490
1491 static void directory_enumerate(sd_journal *j, Directory *m, DIR *d) {
1492 struct dirent *de;
1493
1494 assert(j);
1495 assert(m);
1496 assert(d);
1497
1498 FOREACH_DIRENT_ALL(de, d, goto fail) {
1499
1500 if (dirent_is_journal_file(de))
1501 (void) add_file_by_name(j, m->path, de->d_name);
1502
1503 if (m->is_root && dirent_is_id128_subdir(de))
1504 (void) add_directory(j, m->path, de->d_name);
1505 }
1506
1507 return;
1508
1509 fail:
1510 log_debug_errno(errno, "Failed to enumerate directory %s, ignoring: %m", m->path);
1511 }
1512
1513 static void directory_watch(sd_journal *j, Directory *m, int fd, uint32_t mask) {
1514 int r;
1515
1516 assert(j);
1517 assert(m);
1518 assert(fd >= 0);
1519
1520 /* Watch this directory if that's enabled and if it not being watched yet. */
1521
1522 if (m->wd > 0) /* Already have a watch? */
1523 return;
1524 if (j->inotify_fd < 0) /* Not watching at all? */
1525 return;
1526
1527 m->wd = inotify_add_watch_fd(j->inotify_fd, fd, mask);
1528 if (m->wd < 0) {
1529 log_debug_errno(errno, "Failed to watch journal directory '%s', ignoring: %m", m->path);
1530 return;
1531 }
1532
1533 r = hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m);
1534 if (r == -EEXIST)
1535 log_debug_errno(r, "Directory '%s' already being watched under a different path, ignoring: %m", m->path);
1536 if (r < 0) {
1537 log_debug_errno(r, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m->path);
1538 (void) inotify_rm_watch(j->inotify_fd, m->wd);
1539 m->wd = -1;
1540 }
1541 }
1542
1543 static int add_directory(sd_journal *j, const char *prefix, const char *dirname) {
1544 _cleanup_free_ char *path = NULL;
1545 _cleanup_closedir_ DIR *d = NULL;
1546 Directory *m;
1547 int r, k;
1548
1549 assert(j);
1550 assert(prefix);
1551
1552 /* Adds a journal file directory to watch. If the directory is already tracked this updates the inotify watch
1553 * and reenumerates directory contents */
1554
1555 if (dirname)
1556 path = strjoin(prefix, "/", dirname);
1557 else
1558 path = strdup(prefix);
1559 if (!path) {
1560 r = -ENOMEM;
1561 goto fail;
1562 }
1563
1564 log_debug("Considering directory '%s'.", path);
1565
1566 /* We consider everything local that is in a directory for the local machine ID, or that is stored in /run */
1567 if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
1568 !((dirname && dirname_is_machine_id(dirname) > 0) || path_has_prefix(j, path, "/run")))
1569 return 0;
1570
1571 r = directory_open(j, path, &d);
1572 if (r < 0) {
1573 log_debug_errno(r, "Failed to open directory '%s': %m", path);
1574 goto fail;
1575 }
1576
1577 m = hashmap_get(j->directories_by_path, path);
1578 if (!m) {
1579 m = new0(Directory, 1);
1580 if (!m) {
1581 r = -ENOMEM;
1582 goto fail;
1583 }
1584
1585 m->is_root = false;
1586 m->path = path;
1587
1588 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1589 free(m);
1590 r = -ENOMEM;
1591 goto fail;
1592 }
1593
1594 path = NULL; /* avoid freeing in cleanup */
1595 j->current_invalidate_counter++;
1596
1597 log_debug("Directory %s added.", m->path);
1598
1599 } else if (m->is_root)
1600 return 0; /* Don't 'downgrade' from root directory */
1601
1602 m->last_seen_generation = j->generation;
1603
1604 directory_watch(j, m, dirfd(d),
1605 IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1606 IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
1607 IN_ONLYDIR);
1608
1609 if (!j->no_new_files)
1610 directory_enumerate(j, m, d);
1611
1612 check_network(j, dirfd(d));
1613
1614 return 0;
1615
1616 fail:
1617 k = journal_put_error(j, r, path ?: prefix);
1618 if (k < 0)
1619 return k;
1620
1621 return r;
1622 }
1623
1624 static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
1625
1626 _cleanup_closedir_ DIR *d = NULL;
1627 Directory *m;
1628 int r, k;
1629
1630 assert(j);
1631
1632 /* Adds a root directory to our set of directories to use. If the root directory is already in the set, we
1633 * update the inotify logic, and renumerate the directory entries. This call may hence be called to initially
1634 * populate the set, as well as to update it later. */
1635
1636 if (p) {
1637 /* If there's a path specified, use it. */
1638
1639 log_debug("Considering root directory '%s'.", p);
1640
1641 if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
1642 !path_has_prefix(j, p, "/run"))
1643 return -EINVAL;
1644
1645 if (j->prefix)
1646 p = strjoina(j->prefix, p);
1647
1648 r = directory_open(j, p, &d);
1649 if (r == -ENOENT && missing_ok)
1650 return 0;
1651 if (r < 0) {
1652 log_debug_errno(r, "Failed to open root directory %s: %m", p);
1653 goto fail;
1654 }
1655 } else {
1656 int dfd;
1657
1658 /* If there's no path specified, then we use the top-level fd itself. We duplicate the fd here, since
1659 * opendir() will take possession of the fd, and close it, which we don't want. */
1660
1661 p = "."; /* store this as "." in the directories hashmap */
1662
1663 dfd = fcntl(j->toplevel_fd, F_DUPFD_CLOEXEC, 3);
1664 if (dfd < 0) {
1665 r = -errno;
1666 goto fail;
1667 }
1668
1669 d = fdopendir(dfd);
1670 if (!d) {
1671 r = -errno;
1672 safe_close(dfd);
1673 goto fail;
1674 }
1675
1676 rewinddir(d);
1677 }
1678
1679 m = hashmap_get(j->directories_by_path, p);
1680 if (!m) {
1681 m = new0(Directory, 1);
1682 if (!m) {
1683 r = -ENOMEM;
1684 goto fail;
1685 }
1686
1687 m->is_root = true;
1688
1689 m->path = strdup(p);
1690 if (!m->path) {
1691 free(m);
1692 r = -ENOMEM;
1693 goto fail;
1694 }
1695
1696 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1697 free(m->path);
1698 free(m);
1699 r = -ENOMEM;
1700 goto fail;
1701 }
1702
1703 j->current_invalidate_counter++;
1704
1705 log_debug("Root directory %s added.", m->path);
1706
1707 } else if (!m->is_root)
1708 return 0;
1709
1710 directory_watch(j, m, dirfd(d),
1711 IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1712 IN_ONLYDIR);
1713
1714 if (!j->no_new_files)
1715 directory_enumerate(j, m, d);
1716
1717 check_network(j, dirfd(d));
1718
1719 return 0;
1720
1721 fail:
1722 k = journal_put_error(j, r, p);
1723 if (k < 0)
1724 return k;
1725
1726 return r;
1727 }
1728
1729 static void remove_directory(sd_journal *j, Directory *d) {
1730 assert(j);
1731
1732 if (d->wd > 0) {
1733 hashmap_remove(j->directories_by_wd, INT_TO_PTR(d->wd));
1734
1735 if (j->inotify_fd >= 0)
1736 inotify_rm_watch(j->inotify_fd, d->wd);
1737 }
1738
1739 hashmap_remove(j->directories_by_path, d->path);
1740
1741 if (d->is_root)
1742 log_debug("Root directory %s removed.", d->path);
1743 else
1744 log_debug("Directory %s removed.", d->path);
1745
1746 free(d->path);
1747 free(d);
1748 }
1749
1750 static int add_search_paths(sd_journal *j) {
1751
1752 static const char search_paths[] =
1753 "/run/log/journal\0"
1754 "/var/log/journal\0";
1755 const char *p;
1756
1757 assert(j);
1758
1759 /* We ignore most errors here, since the idea is to only open
1760 * what's actually accessible, and ignore the rest. */
1761
1762 NULSTR_FOREACH(p, search_paths)
1763 (void) add_root_directory(j, p, true);
1764
1765 if (!(j->flags & SD_JOURNAL_LOCAL_ONLY))
1766 (void) add_root_directory(j, "/var/log/journal/remote", true);
1767
1768 return 0;
1769 }
1770
1771 static int add_current_paths(sd_journal *j) {
1772 Iterator i;
1773 JournalFile *f;
1774
1775 assert(j);
1776 assert(j->no_new_files);
1777
1778 /* Simply adds all directories for files we have open as directories. We don't expect errors here, so we
1779 * treat them as fatal. */
1780
1781 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
1782 _cleanup_free_ char *dir;
1783 int r;
1784
1785 dir = dirname_malloc(f->path);
1786 if (!dir)
1787 return -ENOMEM;
1788
1789 r = add_directory(j, dir, NULL);
1790 if (r < 0)
1791 return r;
1792 }
1793
1794 return 0;
1795 }
1796
1797 static int allocate_inotify(sd_journal *j) {
1798 assert(j);
1799
1800 if (j->inotify_fd < 0) {
1801 j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1802 if (j->inotify_fd < 0)
1803 return -errno;
1804 }
1805
1806 return hashmap_ensure_allocated(&j->directories_by_wd, NULL);
1807 }
1808
1809 static sd_journal *journal_new(int flags, const char *path) {
1810 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
1811
1812 j = new0(sd_journal, 1);
1813 if (!j)
1814 return NULL;
1815
1816 j->original_pid = getpid_cached();
1817 j->toplevel_fd = -1;
1818 j->inotify_fd = -1;
1819 j->flags = flags;
1820 j->data_threshold = DEFAULT_DATA_THRESHOLD;
1821
1822 if (path) {
1823 char *t;
1824
1825 t = strdup(path);
1826 if (!t)
1827 return NULL;
1828
1829 if (flags & SD_JOURNAL_OS_ROOT)
1830 j->prefix = t;
1831 else
1832 j->path = t;
1833 }
1834
1835 j->files = ordered_hashmap_new(&path_hash_ops);
1836 if (!j->files)
1837 return NULL;
1838
1839 j->files_cache = ordered_hashmap_iterated_cache_new(j->files);
1840 j->directories_by_path = hashmap_new(&path_hash_ops);
1841 j->mmap = mmap_cache_new();
1842 if (!j->files_cache || !j->directories_by_path || !j->mmap)
1843 return NULL;
1844
1845 return TAKE_PTR(j);
1846 }
1847
1848 #define OPEN_ALLOWED_FLAGS \
1849 (SD_JOURNAL_LOCAL_ONLY | \
1850 SD_JOURNAL_RUNTIME_ONLY | \
1851 SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)
1852
1853 _public_ int sd_journal_open(sd_journal **ret, int flags) {
1854 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
1855 int r;
1856
1857 assert_return(ret, -EINVAL);
1858 assert_return((flags & ~OPEN_ALLOWED_FLAGS) == 0, -EINVAL);
1859
1860 j = journal_new(flags, NULL);
1861 if (!j)
1862 return -ENOMEM;
1863
1864 r = add_search_paths(j);
1865 if (r < 0)
1866 return r;
1867
1868 *ret = TAKE_PTR(j);
1869 return 0;
1870 }
1871
1872 #define OPEN_CONTAINER_ALLOWED_FLAGS \
1873 (SD_JOURNAL_LOCAL_ONLY | SD_JOURNAL_SYSTEM)
1874
1875 _public_ int sd_journal_open_container(sd_journal **ret, const char *machine, int flags) {
1876 _cleanup_free_ char *root = NULL, *class = NULL;
1877 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
1878 char *p;
1879 int r;
1880
1881 /* This is pretty much deprecated, people should use machined's OpenMachineRootDirectory() call instead in
1882 * combination with sd_journal_open_directory_fd(). */
1883
1884 assert_return(machine, -EINVAL);
1885 assert_return(ret, -EINVAL);
1886 assert_return((flags & ~OPEN_CONTAINER_ALLOWED_FLAGS) == 0, -EINVAL);
1887 assert_return(machine_name_is_valid(machine), -EINVAL);
1888
1889 p = strjoina("/run/systemd/machines/", machine);
1890 r = parse_env_file(NULL, p,
1891 "ROOT", &root,
1892 "CLASS", &class);
1893 if (r == -ENOENT)
1894 return -EHOSTDOWN;
1895 if (r < 0)
1896 return r;
1897 if (!root)
1898 return -ENODATA;
1899
1900 if (!streq_ptr(class, "container"))
1901 return -EIO;
1902
1903 j = journal_new(flags, root);
1904 if (!j)
1905 return -ENOMEM;
1906
1907 r = add_search_paths(j);
1908 if (r < 0)
1909 return r;
1910
1911 *ret = TAKE_PTR(j);
1912 return 0;
1913 }
1914
1915 #define OPEN_DIRECTORY_ALLOWED_FLAGS \
1916 (SD_JOURNAL_OS_ROOT | \
1917 SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER )
1918
1919 _public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) {
1920 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
1921 int r;
1922
1923 assert_return(ret, -EINVAL);
1924 assert_return(path, -EINVAL);
1925 assert_return((flags & ~OPEN_DIRECTORY_ALLOWED_FLAGS) == 0, -EINVAL);
1926
1927 j = journal_new(flags, path);
1928 if (!j)
1929 return -ENOMEM;
1930
1931 if (flags & SD_JOURNAL_OS_ROOT)
1932 r = add_search_paths(j);
1933 else
1934 r = add_root_directory(j, path, false);
1935 if (r < 0)
1936 return r;
1937
1938 *ret = TAKE_PTR(j);
1939 return 0;
1940 }
1941
1942 _public_ int sd_journal_open_files(sd_journal **ret, const char **paths, int flags) {
1943 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
1944 const char **path;
1945 int r;
1946
1947 assert_return(ret, -EINVAL);
1948 assert_return(flags == 0, -EINVAL);
1949
1950 j = journal_new(flags, NULL);
1951 if (!j)
1952 return -ENOMEM;
1953
1954 STRV_FOREACH(path, paths) {
1955 r = add_any_file(j, -1, *path);
1956 if (r < 0)
1957 return r;
1958 }
1959
1960 j->no_new_files = true;
1961
1962 *ret = TAKE_PTR(j);
1963 return 0;
1964 }
1965
1966 #define OPEN_DIRECTORY_FD_ALLOWED_FLAGS \
1967 (SD_JOURNAL_OS_ROOT | \
1968 SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER )
1969
1970 _public_ int sd_journal_open_directory_fd(sd_journal **ret, int fd, int flags) {
1971 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
1972 struct stat st;
1973 int r;
1974
1975 assert_return(ret, -EINVAL);
1976 assert_return(fd >= 0, -EBADF);
1977 assert_return((flags & ~OPEN_DIRECTORY_FD_ALLOWED_FLAGS) == 0, -EINVAL);
1978
1979 if (fstat(fd, &st) < 0)
1980 return -errno;
1981
1982 if (!S_ISDIR(st.st_mode))
1983 return -EBADFD;
1984
1985 j = journal_new(flags, NULL);
1986 if (!j)
1987 return -ENOMEM;
1988
1989 j->toplevel_fd = fd;
1990
1991 if (flags & SD_JOURNAL_OS_ROOT)
1992 r = add_search_paths(j);
1993 else
1994 r = add_root_directory(j, NULL, false);
1995 if (r < 0)
1996 return r;
1997
1998 *ret = TAKE_PTR(j);
1999 return 0;
2000 }
2001
2002 _public_ int sd_journal_open_files_fd(sd_journal **ret, int fds[], unsigned n_fds, int flags) {
2003 Iterator iterator;
2004 JournalFile *f;
2005 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
2006 unsigned i;
2007 int r;
2008
2009 assert_return(ret, -EINVAL);
2010 assert_return(n_fds > 0, -EBADF);
2011 assert_return(flags == 0, -EINVAL);
2012
2013 j = journal_new(flags, NULL);
2014 if (!j)
2015 return -ENOMEM;
2016
2017 for (i = 0; i < n_fds; i++) {
2018 struct stat st;
2019
2020 if (fds[i] < 0) {
2021 r = -EBADF;
2022 goto fail;
2023 }
2024
2025 if (fstat(fds[i], &st) < 0) {
2026 r = -errno;
2027 goto fail;
2028 }
2029
2030 r = stat_verify_regular(&st);
2031 if (r < 0)
2032 goto fail;
2033
2034 r = add_any_file(j, fds[i], NULL);
2035 if (r < 0)
2036 goto fail;
2037 }
2038
2039 j->no_new_files = true;
2040 j->no_inotify = true;
2041
2042 *ret = TAKE_PTR(j);
2043 return 0;
2044
2045 fail:
2046 /* If we fail, make sure we don't take possession of the files we managed to make use of successfully, and they
2047 * remain open */
2048 ORDERED_HASHMAP_FOREACH(f, j->files, iterator)
2049 f->close_fd = false;
2050
2051 return r;
2052 }
2053
2054 _public_ void sd_journal_close(sd_journal *j) {
2055 Directory *d;
2056
2057 if (!j)
2058 return;
2059
2060 sd_journal_flush_matches(j);
2061
2062 ordered_hashmap_free_with_destructor(j->files, journal_file_close);
2063 iterated_cache_free(j->files_cache);
2064
2065 while ((d = hashmap_first(j->directories_by_path)))
2066 remove_directory(j, d);
2067
2068 while ((d = hashmap_first(j->directories_by_wd)))
2069 remove_directory(j, d);
2070
2071 hashmap_free(j->directories_by_path);
2072 hashmap_free(j->directories_by_wd);
2073
2074 safe_close(j->inotify_fd);
2075
2076 if (j->mmap) {
2077 log_debug("mmap cache statistics: %u hit, %u miss", mmap_cache_get_hit(j->mmap), mmap_cache_get_missed(j->mmap));
2078 mmap_cache_unref(j->mmap);
2079 }
2080
2081 hashmap_free_free(j->errors);
2082
2083 free(j->path);
2084 free(j->prefix);
2085 free(j->unique_field);
2086 free(j->fields_buffer);
2087 free(j);
2088 }
2089
2090 _public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) {
2091 Object *o;
2092 JournalFile *f;
2093 int r;
2094
2095 assert_return(j, -EINVAL);
2096 assert_return(!journal_pid_changed(j), -ECHILD);
2097 assert_return(ret, -EINVAL);
2098
2099 f = j->current_file;
2100 if (!f)
2101 return -EADDRNOTAVAIL;
2102
2103 if (f->current_offset <= 0)
2104 return -EADDRNOTAVAIL;
2105
2106 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2107 if (r < 0)
2108 return r;
2109
2110 *ret = le64toh(o->entry.realtime);
2111 return 0;
2112 }
2113
2114 _public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) {
2115 Object *o;
2116 JournalFile *f;
2117 int r;
2118 sd_id128_t id;
2119
2120 assert_return(j, -EINVAL);
2121 assert_return(!journal_pid_changed(j), -ECHILD);
2122
2123 f = j->current_file;
2124 if (!f)
2125 return -EADDRNOTAVAIL;
2126
2127 if (f->current_offset <= 0)
2128 return -EADDRNOTAVAIL;
2129
2130 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2131 if (r < 0)
2132 return r;
2133
2134 if (ret_boot_id)
2135 *ret_boot_id = o->entry.boot_id;
2136 else {
2137 r = sd_id128_get_boot(&id);
2138 if (r < 0)
2139 return r;
2140
2141 if (!sd_id128_equal(id, o->entry.boot_id))
2142 return -ESTALE;
2143 }
2144
2145 if (ret)
2146 *ret = le64toh(o->entry.monotonic);
2147
2148 return 0;
2149 }
2150
2151 static bool field_is_valid(const char *field) {
2152 const char *p;
2153
2154 assert(field);
2155
2156 if (isempty(field))
2157 return false;
2158
2159 if (startswith(field, "__"))
2160 return false;
2161
2162 for (p = field; *p; p++) {
2163
2164 if (*p == '_')
2165 continue;
2166
2167 if (*p >= 'A' && *p <= 'Z')
2168 continue;
2169
2170 if (*p >= '0' && *p <= '9')
2171 continue;
2172
2173 return false;
2174 }
2175
2176 return true;
2177 }
2178
2179 _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) {
2180 JournalFile *f;
2181 uint64_t i, n;
2182 size_t field_length;
2183 int r;
2184 Object *o;
2185
2186 assert_return(j, -EINVAL);
2187 assert_return(!journal_pid_changed(j), -ECHILD);
2188 assert_return(field, -EINVAL);
2189 assert_return(data, -EINVAL);
2190 assert_return(size, -EINVAL);
2191 assert_return(field_is_valid(field), -EINVAL);
2192
2193 f = j->current_file;
2194 if (!f)
2195 return -EADDRNOTAVAIL;
2196
2197 if (f->current_offset <= 0)
2198 return -EADDRNOTAVAIL;
2199
2200 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2201 if (r < 0)
2202 return r;
2203
2204 field_length = strlen(field);
2205
2206 n = journal_file_entry_n_items(o);
2207 for (i = 0; i < n; i++) {
2208 uint64_t p, l;
2209 le64_t le_hash;
2210 size_t t;
2211 int compression;
2212
2213 p = le64toh(o->entry.items[i].object_offset);
2214 le_hash = o->entry.items[i].hash;
2215 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2216 if (r < 0)
2217 return r;
2218
2219 if (le_hash != o->data.hash)
2220 return -EBADMSG;
2221
2222 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2223
2224 compression = o->object.flags & OBJECT_COMPRESSION_MASK;
2225 if (compression) {
2226 #if HAVE_XZ || HAVE_LZ4
2227 r = decompress_startswith(compression,
2228 o->data.payload, l,
2229 &f->compress_buffer, &f->compress_buffer_size,
2230 field, field_length, '=');
2231 if (r < 0)
2232 log_debug_errno(r, "Cannot decompress %s object of length %"PRIu64" at offset "OFSfmt": %m",
2233 object_compressed_to_string(compression), l, p);
2234 else if (r > 0) {
2235
2236 size_t rsize;
2237
2238 r = decompress_blob(compression,
2239 o->data.payload, l,
2240 &f->compress_buffer, &f->compress_buffer_size, &rsize,
2241 j->data_threshold);
2242 if (r < 0)
2243 return r;
2244
2245 *data = f->compress_buffer;
2246 *size = (size_t) rsize;
2247
2248 return 0;
2249 }
2250 #else
2251 return -EPROTONOSUPPORT;
2252 #endif
2253 } else if (l >= field_length+1 &&
2254 memcmp(o->data.payload, field, field_length) == 0 &&
2255 o->data.payload[field_length] == '=') {
2256
2257 t = (size_t) l;
2258
2259 if ((uint64_t) t != l)
2260 return -E2BIG;
2261
2262 *data = o->data.payload;
2263 *size = t;
2264
2265 return 0;
2266 }
2267
2268 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2269 if (r < 0)
2270 return r;
2271 }
2272
2273 return -ENOENT;
2274 }
2275
2276 static int return_data(sd_journal *j, JournalFile *f, Object *o, const void **data, size_t *size) {
2277 size_t t;
2278 uint64_t l;
2279 int compression;
2280
2281 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2282 t = (size_t) l;
2283
2284 /* We can't read objects larger than 4G on a 32bit machine */
2285 if ((uint64_t) t != l)
2286 return -E2BIG;
2287
2288 compression = o->object.flags & OBJECT_COMPRESSION_MASK;
2289 if (compression) {
2290 #if HAVE_XZ || HAVE_LZ4
2291 size_t rsize;
2292 int r;
2293
2294 r = decompress_blob(compression,
2295 o->data.payload, l, &f->compress_buffer,
2296 &f->compress_buffer_size, &rsize, j->data_threshold);
2297 if (r < 0)
2298 return r;
2299
2300 *data = f->compress_buffer;
2301 *size = (size_t) rsize;
2302 #else
2303 return -EPROTONOSUPPORT;
2304 #endif
2305 } else {
2306 *data = o->data.payload;
2307 *size = t;
2308 }
2309
2310 return 0;
2311 }
2312
2313 _public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) {
2314 JournalFile *f;
2315 uint64_t p, n;
2316 le64_t le_hash;
2317 int r;
2318 Object *o;
2319
2320 assert_return(j, -EINVAL);
2321 assert_return(!journal_pid_changed(j), -ECHILD);
2322 assert_return(data, -EINVAL);
2323 assert_return(size, -EINVAL);
2324
2325 f = j->current_file;
2326 if (!f)
2327 return -EADDRNOTAVAIL;
2328
2329 if (f->current_offset <= 0)
2330 return -EADDRNOTAVAIL;
2331
2332 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2333 if (r < 0)
2334 return r;
2335
2336 n = journal_file_entry_n_items(o);
2337 if (j->current_field >= n)
2338 return 0;
2339
2340 p = le64toh(o->entry.items[j->current_field].object_offset);
2341 le_hash = o->entry.items[j->current_field].hash;
2342 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2343 if (r < 0)
2344 return r;
2345
2346 if (le_hash != o->data.hash)
2347 return -EBADMSG;
2348
2349 r = return_data(j, f, o, data, size);
2350 if (r < 0)
2351 return r;
2352
2353 j->current_field++;
2354
2355 return 1;
2356 }
2357
2358 _public_ void sd_journal_restart_data(sd_journal *j) {
2359 if (!j)
2360 return;
2361
2362 j->current_field = 0;
2363 }
2364
2365 static int reiterate_all_paths(sd_journal *j) {
2366 assert(j);
2367
2368 if (j->no_new_files)
2369 return add_current_paths(j);
2370
2371 if (j->flags & SD_JOURNAL_OS_ROOT)
2372 return add_search_paths(j);
2373
2374 if (j->toplevel_fd >= 0)
2375 return add_root_directory(j, NULL, false);
2376
2377 if (j->path)
2378 return add_root_directory(j, j->path, true);
2379
2380 return add_search_paths(j);
2381 }
2382
2383 _public_ int sd_journal_get_fd(sd_journal *j) {
2384 int r;
2385
2386 assert_return(j, -EINVAL);
2387 assert_return(!journal_pid_changed(j), -ECHILD);
2388
2389 if (j->no_inotify)
2390 return -EMEDIUMTYPE;
2391
2392 if (j->inotify_fd >= 0)
2393 return j->inotify_fd;
2394
2395 r = allocate_inotify(j);
2396 if (r < 0)
2397 return r;
2398
2399 log_debug("Reiterating files to get inotify watches established.");
2400
2401 /* Iterate through all dirs again, to add them to the inotify */
2402 r = reiterate_all_paths(j);
2403 if (r < 0)
2404 return r;
2405
2406 return j->inotify_fd;
2407 }
2408
2409 _public_ int sd_journal_get_events(sd_journal *j) {
2410 int fd;
2411
2412 assert_return(j, -EINVAL);
2413 assert_return(!journal_pid_changed(j), -ECHILD);
2414
2415 fd = sd_journal_get_fd(j);
2416 if (fd < 0)
2417 return fd;
2418
2419 return POLLIN;
2420 }
2421
2422 _public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) {
2423 int fd;
2424
2425 assert_return(j, -EINVAL);
2426 assert_return(!journal_pid_changed(j), -ECHILD);
2427 assert_return(timeout_usec, -EINVAL);
2428
2429 fd = sd_journal_get_fd(j);
2430 if (fd < 0)
2431 return fd;
2432
2433 if (!j->on_network) {
2434 *timeout_usec = (uint64_t) -1;
2435 return 0;
2436 }
2437
2438 /* If we are on the network we need to regularly check for
2439 * changes manually */
2440
2441 *timeout_usec = j->last_process_usec + JOURNAL_FILES_RECHECK_USEC;
2442 return 1;
2443 }
2444
2445 static void process_q_overflow(sd_journal *j) {
2446 JournalFile *f;
2447 Directory *m;
2448 Iterator i;
2449
2450 assert(j);
2451
2452 /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list
2453 * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all
2454 * journal files we encounter. All journal files and all directories that don't carry it after reenumeration
2455 * are subject for unloading. */
2456
2457 log_debug("Inotify queue overrun, reiterating everything.");
2458
2459 j->generation++;
2460 (void) reiterate_all_paths(j);
2461
2462 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2463
2464 if (f->last_seen_generation == j->generation)
2465 continue;
2466
2467 log_debug("File '%s' hasn't been seen in this enumeration, removing.", f->path);
2468 remove_file_real(j, f);
2469 }
2470
2471 HASHMAP_FOREACH(m, j->directories_by_path, i) {
2472
2473 if (m->last_seen_generation == j->generation)
2474 continue;
2475
2476 if (m->is_root) /* Never GC root directories */
2477 continue;
2478
2479 log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f->path);
2480 remove_directory(j, m);
2481 }
2482
2483 log_debug("Reiteration complete.");
2484 }
2485
2486 static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
2487 Directory *d;
2488
2489 assert(j);
2490 assert(e);
2491
2492 if (e->mask & IN_Q_OVERFLOW) {
2493 process_q_overflow(j);
2494 return;
2495 }
2496
2497 /* Is this a subdirectory we watch? */
2498 d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
2499 if (d) {
2500 if (!(e->mask & IN_ISDIR) && e->len > 0 &&
2501 (endswith(e->name, ".journal") ||
2502 endswith(e->name, ".journal~"))) {
2503
2504 /* Event for a journal file */
2505
2506 if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB))
2507 (void) add_file_by_name(j, d->path, e->name);
2508 else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT))
2509 remove_file_by_name(j, d->path, e->name);
2510
2511 } else if (!d->is_root && e->len == 0) {
2512
2513 /* Event for a subdirectory */
2514
2515 if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT))
2516 remove_directory(j, d);
2517
2518 } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && id128_is_valid(e->name)) {
2519
2520 /* Event for root directory */
2521
2522 if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB))
2523 (void) add_directory(j, d->path, e->name);
2524 }
2525
2526 return;
2527 }
2528
2529 if (e->mask & IN_IGNORED)
2530 return;
2531
2532 log_debug("Unexpected inotify event.");
2533 }
2534
2535 static int determine_change(sd_journal *j) {
2536 bool b;
2537
2538 assert(j);
2539
2540 b = j->current_invalidate_counter != j->last_invalidate_counter;
2541 j->last_invalidate_counter = j->current_invalidate_counter;
2542
2543 return b ? SD_JOURNAL_INVALIDATE : SD_JOURNAL_APPEND;
2544 }
2545
2546 _public_ int sd_journal_process(sd_journal *j) {
2547 bool got_something = false;
2548
2549 assert_return(j, -EINVAL);
2550 assert_return(!journal_pid_changed(j), -ECHILD);
2551
2552 if (j->inotify_fd < 0) /* We have no inotify fd yet? Then there's noting to process. */
2553 return 0;
2554
2555 j->last_process_usec = now(CLOCK_MONOTONIC);
2556 j->last_invalidate_counter = j->current_invalidate_counter;
2557
2558 for (;;) {
2559 union inotify_event_buffer buffer;
2560 struct inotify_event *e;
2561 ssize_t l;
2562
2563 l = read(j->inotify_fd, &buffer, sizeof(buffer));
2564 if (l < 0) {
2565 if (IN_SET(errno, EAGAIN, EINTR))
2566 return got_something ? determine_change(j) : SD_JOURNAL_NOP;
2567
2568 return -errno;
2569 }
2570
2571 got_something = true;
2572
2573 FOREACH_INOTIFY_EVENT(e, buffer, l)
2574 process_inotify_event(j, e);
2575 }
2576 }
2577
2578 _public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) {
2579 int r;
2580 uint64_t t;
2581
2582 assert_return(j, -EINVAL);
2583 assert_return(!journal_pid_changed(j), -ECHILD);
2584
2585 if (j->inotify_fd < 0) {
2586
2587 /* This is the first invocation, hence create the
2588 * inotify watch */
2589 r = sd_journal_get_fd(j);
2590 if (r < 0)
2591 return r;
2592
2593 /* The journal might have changed since the context
2594 * object was created and we weren't watching before,
2595 * hence don't wait for anything, and return
2596 * immediately. */
2597 return determine_change(j);
2598 }
2599
2600 r = sd_journal_get_timeout(j, &t);
2601 if (r < 0)
2602 return r;
2603
2604 if (t != (uint64_t) -1) {
2605 usec_t n;
2606
2607 n = now(CLOCK_MONOTONIC);
2608 t = t > n ? t - n : 0;
2609
2610 if (timeout_usec == (uint64_t) -1 || timeout_usec > t)
2611 timeout_usec = t;
2612 }
2613
2614 do {
2615 r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec);
2616 } while (r == -EINTR);
2617
2618 if (r < 0)
2619 return r;
2620
2621 return sd_journal_process(j);
2622 }
2623
2624 _public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) {
2625 Iterator i;
2626 JournalFile *f;
2627 bool first = true;
2628 uint64_t fmin = 0, tmax = 0;
2629 int r;
2630
2631 assert_return(j, -EINVAL);
2632 assert_return(!journal_pid_changed(j), -ECHILD);
2633 assert_return(from || to, -EINVAL);
2634 assert_return(from != to, -EINVAL);
2635
2636 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2637 usec_t fr, t;
2638
2639 r = journal_file_get_cutoff_realtime_usec(f, &fr, &t);
2640 if (r == -ENOENT)
2641 continue;
2642 if (r < 0)
2643 return r;
2644 if (r == 0)
2645 continue;
2646
2647 if (first) {
2648 fmin = fr;
2649 tmax = t;
2650 first = false;
2651 } else {
2652 fmin = MIN(fr, fmin);
2653 tmax = MAX(t, tmax);
2654 }
2655 }
2656
2657 if (from)
2658 *from = fmin;
2659 if (to)
2660 *to = tmax;
2661
2662 return first ? 0 : 1;
2663 }
2664
2665 _public_ int sd_journal_get_cutoff_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t *from, uint64_t *to) {
2666 Iterator i;
2667 JournalFile *f;
2668 bool found = false;
2669 int r;
2670
2671 assert_return(j, -EINVAL);
2672 assert_return(!journal_pid_changed(j), -ECHILD);
2673 assert_return(from || to, -EINVAL);
2674 assert_return(from != to, -EINVAL);
2675
2676 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2677 usec_t fr, t;
2678
2679 r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &fr, &t);
2680 if (r == -ENOENT)
2681 continue;
2682 if (r < 0)
2683 return r;
2684 if (r == 0)
2685 continue;
2686
2687 if (found) {
2688 if (from)
2689 *from = MIN(fr, *from);
2690 if (to)
2691 *to = MAX(t, *to);
2692 } else {
2693 if (from)
2694 *from = fr;
2695 if (to)
2696 *to = t;
2697 found = true;
2698 }
2699 }
2700
2701 return found;
2702 }
2703
2704 void journal_print_header(sd_journal *j) {
2705 Iterator i;
2706 JournalFile *f;
2707 bool newline = false;
2708
2709 assert(j);
2710
2711 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2712 if (newline)
2713 putchar('\n');
2714 else
2715 newline = true;
2716
2717 journal_file_print_header(f);
2718 }
2719 }
2720
2721 _public_ int sd_journal_get_usage(sd_journal *j, uint64_t *bytes) {
2722 Iterator i;
2723 JournalFile *f;
2724 uint64_t sum = 0;
2725
2726 assert_return(j, -EINVAL);
2727 assert_return(!journal_pid_changed(j), -ECHILD);
2728 assert_return(bytes, -EINVAL);
2729
2730 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2731 struct stat st;
2732
2733 if (fstat(f->fd, &st) < 0)
2734 return -errno;
2735
2736 sum += (uint64_t) st.st_blocks * 512ULL;
2737 }
2738
2739 *bytes = sum;
2740 return 0;
2741 }
2742
2743 _public_ int sd_journal_query_unique(sd_journal *j, const char *field) {
2744 char *f;
2745
2746 assert_return(j, -EINVAL);
2747 assert_return(!journal_pid_changed(j), -ECHILD);
2748 assert_return(!isempty(field), -EINVAL);
2749 assert_return(field_is_valid(field), -EINVAL);
2750
2751 f = strdup(field);
2752 if (!f)
2753 return -ENOMEM;
2754
2755 free(j->unique_field);
2756 j->unique_field = f;
2757 j->unique_file = NULL;
2758 j->unique_offset = 0;
2759 j->unique_file_lost = false;
2760
2761 return 0;
2762 }
2763
2764 _public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l) {
2765 size_t k;
2766
2767 assert_return(j, -EINVAL);
2768 assert_return(!journal_pid_changed(j), -ECHILD);
2769 assert_return(data, -EINVAL);
2770 assert_return(l, -EINVAL);
2771 assert_return(j->unique_field, -EINVAL);
2772
2773 k = strlen(j->unique_field);
2774
2775 if (!j->unique_file) {
2776 if (j->unique_file_lost)
2777 return 0;
2778
2779 j->unique_file = ordered_hashmap_first(j->files);
2780 if (!j->unique_file)
2781 return 0;
2782
2783 j->unique_offset = 0;
2784 }
2785
2786 for (;;) {
2787 JournalFile *of;
2788 Iterator i;
2789 Object *o;
2790 const void *odata;
2791 size_t ol;
2792 bool found;
2793 int r;
2794
2795 /* Proceed to next data object in the field's linked list */
2796 if (j->unique_offset == 0) {
2797 r = journal_file_find_field_object(j->unique_file, j->unique_field, k, &o, NULL);
2798 if (r < 0)
2799 return r;
2800
2801 j->unique_offset = r > 0 ? le64toh(o->field.head_data_offset) : 0;
2802 } else {
2803 r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
2804 if (r < 0)
2805 return r;
2806
2807 j->unique_offset = le64toh(o->data.next_field_offset);
2808 }
2809
2810 /* We reached the end of the list? Then start again, with the next file */
2811 if (j->unique_offset == 0) {
2812 j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
2813 if (!j->unique_file)
2814 return 0;
2815
2816 continue;
2817 }
2818
2819 /* We do not use OBJECT_DATA context here, but OBJECT_UNUSED
2820 * instead, so that we can look at this data object at the same
2821 * time as one on another file */
2822 r = journal_file_move_to_object(j->unique_file, OBJECT_UNUSED, j->unique_offset, &o);
2823 if (r < 0)
2824 return r;
2825
2826 /* Let's do the type check by hand, since we used 0 context above. */
2827 if (o->object.type != OBJECT_DATA)
2828 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
2829 "%s:offset " OFSfmt ": object has type %d, expected %d",
2830 j->unique_file->path,
2831 j->unique_offset,
2832 o->object.type, OBJECT_DATA);
2833
2834 r = return_data(j, j->unique_file, o, &odata, &ol);
2835 if (r < 0)
2836 return r;
2837
2838 /* Check if we have at least the field name and "=". */
2839 if (ol <= k)
2840 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
2841 "%s:offset " OFSfmt ": object has size %zu, expected at least %zu",
2842 j->unique_file->path,
2843 j->unique_offset, ol, k + 1);
2844
2845 if (memcmp(odata, j->unique_field, k) || ((const char*) odata)[k] != '=')
2846 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
2847 "%s:offset " OFSfmt ": object does not start with \"%s=\"",
2848 j->unique_file->path,
2849 j->unique_offset,
2850 j->unique_field);
2851
2852 /* OK, now let's see if we already returned this data
2853 * object by checking if it exists in the earlier
2854 * traversed files. */
2855 found = false;
2856 ORDERED_HASHMAP_FOREACH(of, j->files, i) {
2857 if (of == j->unique_file)
2858 break;
2859
2860 /* Skip this file it didn't have any fields indexed */
2861 if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
2862 continue;
2863
2864 r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), NULL, NULL);
2865 if (r < 0)
2866 return r;
2867 if (r > 0) {
2868 found = true;
2869 break;
2870 }
2871 }
2872
2873 if (found)
2874 continue;
2875
2876 r = return_data(j, j->unique_file, o, data, l);
2877 if (r < 0)
2878 return r;
2879
2880 return 1;
2881 }
2882 }
2883
2884 _public_ void sd_journal_restart_unique(sd_journal *j) {
2885 if (!j)
2886 return;
2887
2888 j->unique_file = NULL;
2889 j->unique_offset = 0;
2890 j->unique_file_lost = false;
2891 }
2892
2893 _public_ int sd_journal_enumerate_fields(sd_journal *j, const char **field) {
2894 int r;
2895
2896 assert_return(j, -EINVAL);
2897 assert_return(!journal_pid_changed(j), -ECHILD);
2898 assert_return(field, -EINVAL);
2899
2900 if (!j->fields_file) {
2901 if (j->fields_file_lost)
2902 return 0;
2903
2904 j->fields_file = ordered_hashmap_first(j->files);
2905 if (!j->fields_file)
2906 return 0;
2907
2908 j->fields_hash_table_index = 0;
2909 j->fields_offset = 0;
2910 }
2911
2912 for (;;) {
2913 JournalFile *f, *of;
2914 Iterator i;
2915 uint64_t m;
2916 Object *o;
2917 size_t sz;
2918 bool found;
2919
2920 f = j->fields_file;
2921
2922 if (j->fields_offset == 0) {
2923 bool eof = false;
2924
2925 /* We are not yet positioned at any field. Let's pick the first one */
2926 r = journal_file_map_field_hash_table(f);
2927 if (r < 0)
2928 return r;
2929
2930 m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
2931 for (;;) {
2932 if (j->fields_hash_table_index >= m) {
2933 /* Reached the end of the hash table, go to the next file. */
2934 eof = true;
2935 break;
2936 }
2937
2938 j->fields_offset = le64toh(f->field_hash_table[j->fields_hash_table_index].head_hash_offset);
2939
2940 if (j->fields_offset != 0)
2941 break;
2942
2943 /* Empty hash table bucket, go to next one */
2944 j->fields_hash_table_index++;
2945 }
2946
2947 if (eof) {
2948 /* Proceed with next file */
2949 j->fields_file = ordered_hashmap_next(j->files, f->path);
2950 if (!j->fields_file) {
2951 *field = NULL;
2952 return 0;
2953 }
2954
2955 j->fields_offset = 0;
2956 j->fields_hash_table_index = 0;
2957 continue;
2958 }
2959
2960 } else {
2961 /* We are already positioned at a field. If so, let's figure out the next field from it */
2962
2963 r = journal_file_move_to_object(f, OBJECT_FIELD, j->fields_offset, &o);
2964 if (r < 0)
2965 return r;
2966
2967 j->fields_offset = le64toh(o->field.next_hash_offset);
2968 if (j->fields_offset == 0) {
2969 /* Reached the end of the hash table chain */
2970 j->fields_hash_table_index++;
2971 continue;
2972 }
2973 }
2974
2975 /* We use OBJECT_UNUSED here, so that the iterator below doesn't remove our mmap window */
2976 r = journal_file_move_to_object(f, OBJECT_UNUSED, j->fields_offset, &o);
2977 if (r < 0)
2978 return r;
2979
2980 /* Because we used OBJECT_UNUSED above, we need to do our type check manually */
2981 if (o->object.type != OBJECT_FIELD)
2982 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
2983 "%s:offset " OFSfmt ": object has type %i, expected %i",
2984 f->path, j->fields_offset,
2985 o->object.type, OBJECT_FIELD);
2986
2987 sz = le64toh(o->object.size) - offsetof(Object, field.payload);
2988
2989 /* Let's see if we already returned this field name before. */
2990 found = false;
2991 ORDERED_HASHMAP_FOREACH(of, j->files, i) {
2992 if (of == f)
2993 break;
2994
2995 /* Skip this file it didn't have any fields indexed */
2996 if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
2997 continue;
2998
2999 r = journal_file_find_field_object_with_hash(of, o->field.payload, sz, le64toh(o->field.hash), NULL, NULL);
3000 if (r < 0)
3001 return r;
3002 if (r > 0) {
3003 found = true;
3004 break;
3005 }
3006 }
3007
3008 if (found)
3009 continue;
3010
3011 /* Check if this is really a valid string containing no NUL byte */
3012 if (memchr(o->field.payload, 0, sz))
3013 return -EBADMSG;
3014
3015 if (sz > j->data_threshold)
3016 sz = j->data_threshold;
3017
3018 if (!GREEDY_REALLOC(j->fields_buffer, j->fields_buffer_allocated, sz + 1))
3019 return -ENOMEM;
3020
3021 memcpy(j->fields_buffer, o->field.payload, sz);
3022 j->fields_buffer[sz] = 0;
3023
3024 if (!field_is_valid(j->fields_buffer))
3025 return -EBADMSG;
3026
3027 *field = j->fields_buffer;
3028 return 1;
3029 }
3030 }
3031
3032 _public_ void sd_journal_restart_fields(sd_journal *j) {
3033 if (!j)
3034 return;
3035
3036 j->fields_file = NULL;
3037 j->fields_hash_table_index = 0;
3038 j->fields_offset = 0;
3039 j->fields_file_lost = false;
3040 }
3041
3042 _public_ int sd_journal_reliable_fd(sd_journal *j) {
3043 assert_return(j, -EINVAL);
3044 assert_return(!journal_pid_changed(j), -ECHILD);
3045
3046 return !j->on_network;
3047 }
3048
3049 static char *lookup_field(const char *field, void *userdata) {
3050 sd_journal *j = userdata;
3051 const void *data;
3052 size_t size, d;
3053 int r;
3054
3055 assert(field);
3056 assert(j);
3057
3058 r = sd_journal_get_data(j, field, &data, &size);
3059 if (r < 0 ||
3060 size > REPLACE_VAR_MAX)
3061 return strdup(field);
3062
3063 d = strlen(field) + 1;
3064
3065 return strndup((const char*) data + d, size - d);
3066 }
3067
3068 _public_ int sd_journal_get_catalog(sd_journal *j, char **ret) {
3069 const void *data;
3070 size_t size;
3071 sd_id128_t id;
3072 _cleanup_free_ char *text = NULL, *cid = NULL;
3073 char *t;
3074 int r;
3075
3076 assert_return(j, -EINVAL);
3077 assert_return(!journal_pid_changed(j), -ECHILD);
3078 assert_return(ret, -EINVAL);
3079
3080 r = sd_journal_get_data(j, "MESSAGE_ID", &data, &size);
3081 if (r < 0)
3082 return r;
3083
3084 cid = strndup((const char*) data + 11, size - 11);
3085 if (!cid)
3086 return -ENOMEM;
3087
3088 r = sd_id128_from_string(cid, &id);
3089 if (r < 0)
3090 return r;
3091
3092 r = catalog_get(CATALOG_DATABASE, id, &text);
3093 if (r < 0)
3094 return r;
3095
3096 t = replace_var(text, lookup_field, j);
3097 if (!t)
3098 return -ENOMEM;
3099
3100 *ret = t;
3101 return 0;
3102 }
3103
3104 _public_ int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **ret) {
3105 assert_return(ret, -EINVAL);
3106
3107 return catalog_get(CATALOG_DATABASE, id, ret);
3108 }
3109
3110 _public_ int sd_journal_set_data_threshold(sd_journal *j, size_t sz) {
3111 assert_return(j, -EINVAL);
3112 assert_return(!journal_pid_changed(j), -ECHILD);
3113
3114 j->data_threshold = sz;
3115 return 0;
3116 }
3117
3118 _public_ int sd_journal_get_data_threshold(sd_journal *j, size_t *sz) {
3119 assert_return(j, -EINVAL);
3120 assert_return(!journal_pid_changed(j), -ECHILD);
3121 assert_return(sz, -EINVAL);
3122
3123 *sz = j->data_threshold;
3124 return 0;
3125 }
3126
3127 _public_ int sd_journal_has_runtime_files(sd_journal *j) {
3128 assert_return(j, -EINVAL);
3129
3130 return j->has_runtime_files;
3131 }
3132
3133 _public_ int sd_journal_has_persistent_files(sd_journal *j) {
3134 assert_return(j, -EINVAL);
3135
3136 return j->has_persistent_files;
3137 }