]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/sd-journal.c
e3cdb08c5deb9be74ed2d55b878a458d1a8e2ae0
[thirdparty/systemd.git] / src / journal / sd-journal.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 Copyright 2011 Lennart Poettering
4 ***/
5
6 #include <errno.h>
7 #include <fcntl.h>
8 #include <inttypes.h>
9 #include <linux/magic.h>
10 #include <poll.h>
11 #include <stddef.h>
12 #include <sys/inotify.h>
13 #include <sys/vfs.h>
14 #include <unistd.h>
15
16 #include "sd-journal.h"
17
18 #include "alloc-util.h"
19 #include "catalog.h"
20 #include "compress.h"
21 #include "dirent-util.h"
22 #include "fd-util.h"
23 #include "fileio.h"
24 #include "format-util.h"
25 #include "fs-util.h"
26 #include "hashmap.h"
27 #include "hostname-util.h"
28 #include "id128-util.h"
29 #include "io-util.h"
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-internal.h"
33 #include "list.h"
34 #include "lookup3.h"
35 #include "missing.h"
36 #include "path-util.h"
37 #include "process-util.h"
38 #include "replace-var.h"
39 #include "stat-util.h"
40 #include "stat-util.h"
41 #include "stdio-util.h"
42 #include "string-util.h"
43 #include "strv.h"
44
45 #define JOURNAL_FILES_MAX 7168
46
47 #define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
48
49 #define REPLACE_VAR_MAX 256
50
51 #define DEFAULT_DATA_THRESHOLD (64*1024)
52
53 static void remove_file_real(sd_journal *j, JournalFile *f);
54
55 static bool journal_pid_changed(sd_journal *j) {
56 assert(j);
57
58 /* We don't support people creating a journal object and
59 * keeping it around over a fork(). Let's complain. */
60
61 return j->original_pid != getpid_cached();
62 }
63
64 static int journal_put_error(sd_journal *j, int r, const char *path) {
65 char *copy;
66 int k;
67
68 /* Memorize an error we encountered, and store which
69 * file/directory it was generated from. Note that we store
70 * only *one* path per error code, as the error code is the
71 * key into the hashmap, and the path is the value. This means
72 * we keep track only of all error kinds, but not of all error
73 * locations. This has the benefit that the hashmap cannot
74 * grow beyond bounds.
75 *
76 * We return an error here only if we didn't manage to
77 * memorize the real error. */
78
79 if (r >= 0)
80 return r;
81
82 k = hashmap_ensure_allocated(&j->errors, NULL);
83 if (k < 0)
84 return k;
85
86 if (path) {
87 copy = strdup(path);
88 if (!copy)
89 return -ENOMEM;
90 } else
91 copy = NULL;
92
93 k = hashmap_put(j->errors, INT_TO_PTR(r), copy);
94 if (k < 0) {
95 free(copy);
96
97 if (k == -EEXIST)
98 return 0;
99
100 return k;
101 }
102
103 return 0;
104 }
105
106 static void detach_location(sd_journal *j) {
107 Iterator i;
108 JournalFile *f;
109
110 assert(j);
111
112 j->current_file = NULL;
113 j->current_field = 0;
114
115 ORDERED_HASHMAP_FOREACH(f, j->files, i)
116 journal_file_reset_location(f);
117 }
118
119 static void reset_location(sd_journal *j) {
120 assert(j);
121
122 detach_location(j);
123 zero(j->current_location);
124 }
125
126 static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) {
127 assert(l);
128 assert(IN_SET(type, LOCATION_DISCRETE, LOCATION_SEEK));
129 assert(f);
130 assert(o->object.type == OBJECT_ENTRY);
131
132 l->type = type;
133 l->seqnum = le64toh(o->entry.seqnum);
134 l->seqnum_id = f->header->seqnum_id;
135 l->realtime = le64toh(o->entry.realtime);
136 l->monotonic = le64toh(o->entry.monotonic);
137 l->boot_id = o->entry.boot_id;
138 l->xor_hash = le64toh(o->entry.xor_hash);
139
140 l->seqnum_set = l->realtime_set = l->monotonic_set = l->xor_hash_set = true;
141 }
142
143 static void set_location(sd_journal *j, JournalFile *f, Object *o) {
144 assert(j);
145 assert(f);
146 assert(o);
147
148 init_location(&j->current_location, LOCATION_DISCRETE, f, o);
149
150 j->current_file = f;
151 j->current_field = 0;
152
153 /* Let f know its candidate entry was picked. */
154 assert(f->location_type == LOCATION_SEEK);
155 f->location_type = LOCATION_DISCRETE;
156 }
157
158 static int match_is_valid(const void *data, size_t size) {
159 const char *b, *p;
160
161 assert(data);
162
163 if (size < 2)
164 return false;
165
166 if (startswith(data, "__"))
167 return false;
168
169 b = data;
170 for (p = b; p < b + size; p++) {
171
172 if (*p == '=')
173 return p > b;
174
175 if (*p == '_')
176 continue;
177
178 if (*p >= 'A' && *p <= 'Z')
179 continue;
180
181 if (*p >= '0' && *p <= '9')
182 continue;
183
184 return false;
185 }
186
187 return false;
188 }
189
190 static bool same_field(const void *_a, size_t s, const void *_b, size_t t) {
191 const uint8_t *a = _a, *b = _b;
192 size_t j;
193
194 for (j = 0; j < s && j < t; j++) {
195
196 if (a[j] != b[j])
197 return false;
198
199 if (a[j] == '=')
200 return true;
201 }
202
203 assert_not_reached("\"=\" not found");
204 }
205
206 static Match *match_new(Match *p, MatchType t) {
207 Match *m;
208
209 m = new0(Match, 1);
210 if (!m)
211 return NULL;
212
213 m->type = t;
214
215 if (p) {
216 m->parent = p;
217 LIST_PREPEND(matches, p->matches, m);
218 }
219
220 return m;
221 }
222
223 static void match_free(Match *m) {
224 assert(m);
225
226 while (m->matches)
227 match_free(m->matches);
228
229 if (m->parent)
230 LIST_REMOVE(matches, m->parent->matches, m);
231
232 free(m->data);
233 free(m);
234 }
235
236 static void match_free_if_empty(Match *m) {
237 if (!m || m->matches)
238 return;
239
240 match_free(m);
241 }
242
243 _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
244 Match *l3, *l4, *add_here = NULL, *m;
245 le64_t le_hash;
246
247 assert_return(j, -EINVAL);
248 assert_return(!journal_pid_changed(j), -ECHILD);
249 assert_return(data, -EINVAL);
250
251 if (size == 0)
252 size = strlen(data);
253
254 assert_return(match_is_valid(data, size), -EINVAL);
255
256 /* level 0: AND term
257 * level 1: OR terms
258 * level 2: AND terms
259 * level 3: OR terms
260 * level 4: concrete matches */
261
262 if (!j->level0) {
263 j->level0 = match_new(NULL, MATCH_AND_TERM);
264 if (!j->level0)
265 return -ENOMEM;
266 }
267
268 if (!j->level1) {
269 j->level1 = match_new(j->level0, MATCH_OR_TERM);
270 if (!j->level1)
271 return -ENOMEM;
272 }
273
274 if (!j->level2) {
275 j->level2 = match_new(j->level1, MATCH_AND_TERM);
276 if (!j->level2)
277 return -ENOMEM;
278 }
279
280 assert(j->level0->type == MATCH_AND_TERM);
281 assert(j->level1->type == MATCH_OR_TERM);
282 assert(j->level2->type == MATCH_AND_TERM);
283
284 le_hash = htole64(hash64(data, size));
285
286 LIST_FOREACH(matches, l3, j->level2->matches) {
287 assert(l3->type == MATCH_OR_TERM);
288
289 LIST_FOREACH(matches, l4, l3->matches) {
290 assert(l4->type == MATCH_DISCRETE);
291
292 /* Exactly the same match already? Then ignore
293 * this addition */
294 if (l4->le_hash == le_hash &&
295 l4->size == size &&
296 memcmp(l4->data, data, size) == 0)
297 return 0;
298
299 /* Same field? Then let's add this to this OR term */
300 if (same_field(data, size, l4->data, l4->size)) {
301 add_here = l3;
302 break;
303 }
304 }
305
306 if (add_here)
307 break;
308 }
309
310 if (!add_here) {
311 add_here = match_new(j->level2, MATCH_OR_TERM);
312 if (!add_here)
313 goto fail;
314 }
315
316 m = match_new(add_here, MATCH_DISCRETE);
317 if (!m)
318 goto fail;
319
320 m->le_hash = le_hash;
321 m->size = size;
322 m->data = memdup(data, size);
323 if (!m->data)
324 goto fail;
325
326 detach_location(j);
327
328 return 0;
329
330 fail:
331 match_free_if_empty(add_here);
332 match_free_if_empty(j->level2);
333 match_free_if_empty(j->level1);
334 match_free_if_empty(j->level0);
335
336 return -ENOMEM;
337 }
338
339 _public_ int sd_journal_add_conjunction(sd_journal *j) {
340 assert_return(j, -EINVAL);
341 assert_return(!journal_pid_changed(j), -ECHILD);
342
343 if (!j->level0)
344 return 0;
345
346 if (!j->level1)
347 return 0;
348
349 if (!j->level1->matches)
350 return 0;
351
352 j->level1 = NULL;
353 j->level2 = NULL;
354
355 return 0;
356 }
357
358 _public_ int sd_journal_add_disjunction(sd_journal *j) {
359 assert_return(j, -EINVAL);
360 assert_return(!journal_pid_changed(j), -ECHILD);
361
362 if (!j->level0)
363 return 0;
364
365 if (!j->level1)
366 return 0;
367
368 if (!j->level2)
369 return 0;
370
371 if (!j->level2->matches)
372 return 0;
373
374 j->level2 = NULL;
375 return 0;
376 }
377
378 static char *match_make_string(Match *m) {
379 char *p = NULL, *r;
380 Match *i;
381 bool enclose = false;
382
383 if (!m)
384 return strdup("none");
385
386 if (m->type == MATCH_DISCRETE)
387 return strndup(m->data, m->size);
388
389 LIST_FOREACH(matches, i, m->matches) {
390 char *t, *k;
391
392 t = match_make_string(i);
393 if (!t)
394 return mfree(p);
395
396 if (p) {
397 k = strjoin(p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t);
398 free(p);
399 free(t);
400
401 if (!k)
402 return NULL;
403
404 p = k;
405
406 enclose = true;
407 } else
408 p = t;
409 }
410
411 if (enclose) {
412 r = strjoin("(", p, ")");
413 free(p);
414 return r;
415 }
416
417 return p;
418 }
419
420 char *journal_make_match_string(sd_journal *j) {
421 assert(j);
422
423 return match_make_string(j->level0);
424 }
425
426 _public_ void sd_journal_flush_matches(sd_journal *j) {
427 if (!j)
428 return;
429
430 if (j->level0)
431 match_free(j->level0);
432
433 j->level0 = j->level1 = j->level2 = NULL;
434
435 detach_location(j);
436 }
437
438 _pure_ static int compare_with_location(JournalFile *f, Location *l) {
439 assert(f);
440 assert(l);
441 assert(f->location_type == LOCATION_SEEK);
442 assert(IN_SET(l->type, LOCATION_DISCRETE, LOCATION_SEEK));
443
444 if (l->monotonic_set &&
445 sd_id128_equal(f->current_boot_id, l->boot_id) &&
446 l->realtime_set &&
447 f->current_realtime == l->realtime &&
448 l->xor_hash_set &&
449 f->current_xor_hash == l->xor_hash)
450 return 0;
451
452 if (l->seqnum_set &&
453 sd_id128_equal(f->header->seqnum_id, l->seqnum_id)) {
454
455 if (f->current_seqnum < l->seqnum)
456 return -1;
457 if (f->current_seqnum > l->seqnum)
458 return 1;
459 }
460
461 if (l->monotonic_set &&
462 sd_id128_equal(f->current_boot_id, l->boot_id)) {
463
464 if (f->current_monotonic < l->monotonic)
465 return -1;
466 if (f->current_monotonic > l->monotonic)
467 return 1;
468 }
469
470 if (l->realtime_set) {
471
472 if (f->current_realtime < l->realtime)
473 return -1;
474 if (f->current_realtime > l->realtime)
475 return 1;
476 }
477
478 if (l->xor_hash_set) {
479
480 if (f->current_xor_hash < l->xor_hash)
481 return -1;
482 if (f->current_xor_hash > l->xor_hash)
483 return 1;
484 }
485
486 return 0;
487 }
488
489 static int next_for_match(
490 sd_journal *j,
491 Match *m,
492 JournalFile *f,
493 uint64_t after_offset,
494 direction_t direction,
495 Object **ret,
496 uint64_t *offset) {
497
498 int r;
499 uint64_t np = 0;
500 Object *n;
501
502 assert(j);
503 assert(m);
504 assert(f);
505
506 if (m->type == MATCH_DISCRETE) {
507 uint64_t dp;
508
509 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
510 if (r <= 0)
511 return r;
512
513 return journal_file_move_to_entry_by_offset_for_data(f, dp, after_offset, direction, ret, offset);
514
515 } else if (m->type == MATCH_OR_TERM) {
516 Match *i;
517
518 /* Find the earliest match beyond after_offset */
519
520 LIST_FOREACH(matches, i, m->matches) {
521 uint64_t cp;
522
523 r = next_for_match(j, i, f, after_offset, direction, NULL, &cp);
524 if (r < 0)
525 return r;
526 else if (r > 0) {
527 if (np == 0 || (direction == DIRECTION_DOWN ? cp < np : cp > np))
528 np = cp;
529 }
530 }
531
532 if (np == 0)
533 return 0;
534
535 } else if (m->type == MATCH_AND_TERM) {
536 Match *i, *last_moved;
537
538 /* Always jump to the next matching entry and repeat
539 * this until we find an offset that matches for all
540 * matches. */
541
542 if (!m->matches)
543 return 0;
544
545 r = next_for_match(j, m->matches, f, after_offset, direction, NULL, &np);
546 if (r <= 0)
547 return r;
548
549 assert(direction == DIRECTION_DOWN ? np >= after_offset : np <= after_offset);
550 last_moved = m->matches;
551
552 LIST_LOOP_BUT_ONE(matches, i, m->matches, last_moved) {
553 uint64_t cp;
554
555 r = next_for_match(j, i, f, np, direction, NULL, &cp);
556 if (r <= 0)
557 return r;
558
559 assert(direction == DIRECTION_DOWN ? cp >= np : cp <= np);
560 if (direction == DIRECTION_DOWN ? cp > np : cp < np) {
561 np = cp;
562 last_moved = i;
563 }
564 }
565 }
566
567 assert(np > 0);
568
569 r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
570 if (r < 0)
571 return r;
572
573 if (ret)
574 *ret = n;
575 if (offset)
576 *offset = np;
577
578 return 1;
579 }
580
581 static int find_location_for_match(
582 sd_journal *j,
583 Match *m,
584 JournalFile *f,
585 direction_t direction,
586 Object **ret,
587 uint64_t *offset) {
588
589 int r;
590
591 assert(j);
592 assert(m);
593 assert(f);
594
595 if (m->type == MATCH_DISCRETE) {
596 uint64_t dp;
597
598 r = journal_file_find_data_object_with_hash(f, m->data, m->size, le64toh(m->le_hash), NULL, &dp);
599 if (r <= 0)
600 return r;
601
602 /* FIXME: missing: find by monotonic */
603
604 if (j->current_location.type == LOCATION_HEAD)
605 return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_DOWN, ret, offset);
606 if (j->current_location.type == LOCATION_TAIL)
607 return journal_file_next_entry_for_data(f, NULL, 0, dp, DIRECTION_UP, ret, offset);
608 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
609 return journal_file_move_to_entry_by_seqnum_for_data(f, dp, j->current_location.seqnum, direction, ret, offset);
610 if (j->current_location.monotonic_set) {
611 r = journal_file_move_to_entry_by_monotonic_for_data(f, dp, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
612 if (r != -ENOENT)
613 return r;
614 }
615 if (j->current_location.realtime_set)
616 return journal_file_move_to_entry_by_realtime_for_data(f, dp, j->current_location.realtime, direction, ret, offset);
617
618 return journal_file_next_entry_for_data(f, NULL, 0, dp, direction, ret, offset);
619
620 } else if (m->type == MATCH_OR_TERM) {
621 uint64_t np = 0;
622 Object *n;
623 Match *i;
624
625 /* Find the earliest match */
626
627 LIST_FOREACH(matches, i, m->matches) {
628 uint64_t cp;
629
630 r = find_location_for_match(j, i, f, direction, NULL, &cp);
631 if (r < 0)
632 return r;
633 else if (r > 0) {
634 if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
635 np = cp;
636 }
637 }
638
639 if (np == 0)
640 return 0;
641
642 r = journal_file_move_to_object(f, OBJECT_ENTRY, np, &n);
643 if (r < 0)
644 return r;
645
646 if (ret)
647 *ret = n;
648 if (offset)
649 *offset = np;
650
651 return 1;
652
653 } else {
654 Match *i;
655 uint64_t np = 0;
656
657 assert(m->type == MATCH_AND_TERM);
658
659 /* First jump to the last match, and then find the
660 * next one where all matches match */
661
662 if (!m->matches)
663 return 0;
664
665 LIST_FOREACH(matches, i, m->matches) {
666 uint64_t cp;
667
668 r = find_location_for_match(j, i, f, direction, NULL, &cp);
669 if (r <= 0)
670 return r;
671
672 if (np == 0 || (direction == DIRECTION_DOWN ? cp > np : cp < np))
673 np = cp;
674 }
675
676 return next_for_match(j, m, f, np, direction, ret, offset);
677 }
678 }
679
680 static int find_location_with_matches(
681 sd_journal *j,
682 JournalFile *f,
683 direction_t direction,
684 Object **ret,
685 uint64_t *offset) {
686
687 int r;
688
689 assert(j);
690 assert(f);
691 assert(ret);
692 assert(offset);
693
694 if (!j->level0) {
695 /* No matches is simple */
696
697 if (j->current_location.type == LOCATION_HEAD)
698 return journal_file_next_entry(f, 0, DIRECTION_DOWN, ret, offset);
699 if (j->current_location.type == LOCATION_TAIL)
700 return journal_file_next_entry(f, 0, DIRECTION_UP, ret, offset);
701 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
702 return journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, ret, offset);
703 if (j->current_location.monotonic_set) {
704 r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
705 if (r != -ENOENT)
706 return r;
707 }
708 if (j->current_location.realtime_set)
709 return journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, ret, offset);
710
711 return journal_file_next_entry(f, 0, direction, ret, offset);
712 } else
713 return find_location_for_match(j, j->level0, f, direction, ret, offset);
714 }
715
716 static int next_with_matches(
717 sd_journal *j,
718 JournalFile *f,
719 direction_t direction,
720 Object **ret,
721 uint64_t *offset) {
722
723 assert(j);
724 assert(f);
725 assert(ret);
726 assert(offset);
727
728 /* No matches is easy. We simple advance the file
729 * pointer by one. */
730 if (!j->level0)
731 return journal_file_next_entry(f, f->current_offset, direction, ret, offset);
732
733 /* If we have a match then we look for the next matching entry
734 * with an offset at least one step larger */
735 return next_for_match(j, j->level0, f,
736 direction == DIRECTION_DOWN ? f->current_offset + 1
737 : f->current_offset - 1,
738 direction, ret, offset);
739 }
740
741 static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction) {
742 Object *c;
743 uint64_t cp, n_entries;
744 int r;
745
746 assert(j);
747 assert(f);
748
749 n_entries = le64toh(f->header->n_entries);
750
751 /* If we hit EOF before, we don't need to look into this file again
752 * unless direction changed or new entries appeared. */
753 if (f->last_direction == direction && f->location_type == LOCATION_TAIL &&
754 n_entries == f->last_n_entries)
755 return 0;
756
757 f->last_n_entries = n_entries;
758
759 if (f->last_direction == direction && f->current_offset > 0) {
760 /* LOCATION_SEEK here means we did the work in a previous
761 * iteration and the current location already points to a
762 * candidate entry. */
763 if (f->location_type != LOCATION_SEEK) {
764 r = next_with_matches(j, f, direction, &c, &cp);
765 if (r <= 0)
766 return r;
767
768 journal_file_save_location(f, c, cp);
769 }
770 } else {
771 f->last_direction = direction;
772
773 r = find_location_with_matches(j, f, direction, &c, &cp);
774 if (r <= 0)
775 return r;
776
777 journal_file_save_location(f, c, cp);
778 }
779
780 /* OK, we found the spot, now let's advance until an entry
781 * that is actually different from what we were previously
782 * looking at. This is necessary to handle entries which exist
783 * in two (or more) journal files, and which shall all be
784 * suppressed but one. */
785
786 for (;;) {
787 bool found;
788
789 if (j->current_location.type == LOCATION_DISCRETE) {
790 int k;
791
792 k = compare_with_location(f, &j->current_location);
793
794 found = direction == DIRECTION_DOWN ? k > 0 : k < 0;
795 } else
796 found = true;
797
798 if (found)
799 return 1;
800
801 r = next_with_matches(j, f, direction, &c, &cp);
802 if (r <= 0)
803 return r;
804
805 journal_file_save_location(f, c, cp);
806 }
807 }
808
809 static int real_journal_next(sd_journal *j, direction_t direction) {
810 JournalFile *new_file = NULL;
811 unsigned i, n_files;
812 const void **files;
813 Object *o;
814 int r;
815
816 assert_return(j, -EINVAL);
817 assert_return(!journal_pid_changed(j), -ECHILD);
818
819 r = iterated_cache_get(j->files_cache, NULL, &files, &n_files);
820 if (r < 0)
821 return r;
822
823 for (i = 0; i < n_files; i++) {
824 JournalFile *f = (JournalFile *)files[i];
825 bool found;
826
827 r = next_beyond_location(j, f, direction);
828 if (r < 0) {
829 log_debug_errno(r, "Can't iterate through %s, ignoring: %m", f->path);
830 remove_file_real(j, f);
831 continue;
832 } else if (r == 0) {
833 f->location_type = LOCATION_TAIL;
834 continue;
835 }
836
837 if (!new_file)
838 found = true;
839 else {
840 int k;
841
842 k = journal_file_compare_locations(f, new_file);
843
844 found = direction == DIRECTION_DOWN ? k < 0 : k > 0;
845 }
846
847 if (found)
848 new_file = f;
849 }
850
851 if (!new_file)
852 return 0;
853
854 r = journal_file_move_to_object(new_file, OBJECT_ENTRY, new_file->current_offset, &o);
855 if (r < 0)
856 return r;
857
858 set_location(j, new_file, o);
859
860 return 1;
861 }
862
863 _public_ int sd_journal_next(sd_journal *j) {
864 return real_journal_next(j, DIRECTION_DOWN);
865 }
866
867 _public_ int sd_journal_previous(sd_journal *j) {
868 return real_journal_next(j, DIRECTION_UP);
869 }
870
871 static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) {
872 int c = 0, r;
873
874 assert_return(j, -EINVAL);
875 assert_return(!journal_pid_changed(j), -ECHILD);
876
877 if (skip == 0) {
878 /* If this is not a discrete skip, then at least
879 * resolve the current location */
880 if (j->current_location.type != LOCATION_DISCRETE) {
881 r = real_journal_next(j, direction);
882 if (r < 0)
883 return r;
884 }
885
886 return 0;
887 }
888
889 do {
890 r = real_journal_next(j, direction);
891 if (r < 0)
892 return r;
893
894 if (r == 0)
895 return c;
896
897 skip--;
898 c++;
899 } while (skip > 0);
900
901 return c;
902 }
903
904 _public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) {
905 return real_journal_next_skip(j, DIRECTION_DOWN, skip);
906 }
907
908 _public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) {
909 return real_journal_next_skip(j, DIRECTION_UP, skip);
910 }
911
912 _public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) {
913 Object *o;
914 int r;
915 char bid[33], sid[33];
916
917 assert_return(j, -EINVAL);
918 assert_return(!journal_pid_changed(j), -ECHILD);
919 assert_return(cursor, -EINVAL);
920
921 if (!j->current_file || j->current_file->current_offset <= 0)
922 return -EADDRNOTAVAIL;
923
924 r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
925 if (r < 0)
926 return r;
927
928 sd_id128_to_string(j->current_file->header->seqnum_id, sid);
929 sd_id128_to_string(o->entry.boot_id, bid);
930
931 if (asprintf(cursor,
932 "s=%s;i=%"PRIx64";b=%s;m=%"PRIx64";t=%"PRIx64";x=%"PRIx64,
933 sid, le64toh(o->entry.seqnum),
934 bid, le64toh(o->entry.monotonic),
935 le64toh(o->entry.realtime),
936 le64toh(o->entry.xor_hash)) < 0)
937 return -ENOMEM;
938
939 return 0;
940 }
941
942 _public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
943 const char *word, *state;
944 size_t l;
945 unsigned long long seqnum, monotonic, realtime, xor_hash;
946 bool
947 seqnum_id_set = false,
948 seqnum_set = false,
949 boot_id_set = false,
950 monotonic_set = false,
951 realtime_set = false,
952 xor_hash_set = false;
953 sd_id128_t seqnum_id, boot_id;
954
955 assert_return(j, -EINVAL);
956 assert_return(!journal_pid_changed(j), -ECHILD);
957 assert_return(!isempty(cursor), -EINVAL);
958
959 FOREACH_WORD_SEPARATOR(word, l, cursor, ";", state) {
960 char *item;
961 int k = 0;
962
963 if (l < 2 || word[1] != '=')
964 return -EINVAL;
965
966 item = strndup(word, l);
967 if (!item)
968 return -ENOMEM;
969
970 switch (word[0]) {
971
972 case 's':
973 seqnum_id_set = true;
974 k = sd_id128_from_string(item+2, &seqnum_id);
975 break;
976
977 case 'i':
978 seqnum_set = true;
979 if (sscanf(item+2, "%llx", &seqnum) != 1)
980 k = -EINVAL;
981 break;
982
983 case 'b':
984 boot_id_set = true;
985 k = sd_id128_from_string(item+2, &boot_id);
986 break;
987
988 case 'm':
989 monotonic_set = true;
990 if (sscanf(item+2, "%llx", &monotonic) != 1)
991 k = -EINVAL;
992 break;
993
994 case 't':
995 realtime_set = true;
996 if (sscanf(item+2, "%llx", &realtime) != 1)
997 k = -EINVAL;
998 break;
999
1000 case 'x':
1001 xor_hash_set = true;
1002 if (sscanf(item+2, "%llx", &xor_hash) != 1)
1003 k = -EINVAL;
1004 break;
1005 }
1006
1007 free(item);
1008
1009 if (k < 0)
1010 return k;
1011 }
1012
1013 if ((!seqnum_set || !seqnum_id_set) &&
1014 (!monotonic_set || !boot_id_set) &&
1015 !realtime_set)
1016 return -EINVAL;
1017
1018 reset_location(j);
1019
1020 j->current_location.type = LOCATION_SEEK;
1021
1022 if (realtime_set) {
1023 j->current_location.realtime = (uint64_t) realtime;
1024 j->current_location.realtime_set = true;
1025 }
1026
1027 if (seqnum_set && seqnum_id_set) {
1028 j->current_location.seqnum = (uint64_t) seqnum;
1029 j->current_location.seqnum_id = seqnum_id;
1030 j->current_location.seqnum_set = true;
1031 }
1032
1033 if (monotonic_set && boot_id_set) {
1034 j->current_location.monotonic = (uint64_t) monotonic;
1035 j->current_location.boot_id = boot_id;
1036 j->current_location.monotonic_set = true;
1037 }
1038
1039 if (xor_hash_set) {
1040 j->current_location.xor_hash = (uint64_t) xor_hash;
1041 j->current_location.xor_hash_set = true;
1042 }
1043
1044 return 0;
1045 }
1046
1047 _public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) {
1048 int r;
1049 Object *o;
1050
1051 assert_return(j, -EINVAL);
1052 assert_return(!journal_pid_changed(j), -ECHILD);
1053 assert_return(!isempty(cursor), -EINVAL);
1054
1055 if (!j->current_file || j->current_file->current_offset <= 0)
1056 return -EADDRNOTAVAIL;
1057
1058 r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
1059 if (r < 0)
1060 return r;
1061
1062 for (;;) {
1063 _cleanup_free_ char *item = NULL;
1064 unsigned long long ll;
1065 sd_id128_t id;
1066 int k = 0;
1067
1068 r = extract_first_word(&cursor, &item, ";", EXTRACT_DONT_COALESCE_SEPARATORS);
1069 if (r < 0)
1070 return r;
1071
1072 if (r == 0)
1073 break;
1074
1075 if (strlen(item) < 2 || item[1] != '=')
1076 return -EINVAL;
1077
1078 switch (item[0]) {
1079
1080 case 's':
1081 k = sd_id128_from_string(item+2, &id);
1082 if (k < 0)
1083 return k;
1084 if (!sd_id128_equal(id, j->current_file->header->seqnum_id))
1085 return 0;
1086 break;
1087
1088 case 'i':
1089 if (sscanf(item+2, "%llx", &ll) != 1)
1090 return -EINVAL;
1091 if (ll != le64toh(o->entry.seqnum))
1092 return 0;
1093 break;
1094
1095 case 'b':
1096 k = sd_id128_from_string(item+2, &id);
1097 if (k < 0)
1098 return k;
1099 if (!sd_id128_equal(id, o->entry.boot_id))
1100 return 0;
1101 break;
1102
1103 case 'm':
1104 if (sscanf(item+2, "%llx", &ll) != 1)
1105 return -EINVAL;
1106 if (ll != le64toh(o->entry.monotonic))
1107 return 0;
1108 break;
1109
1110 case 't':
1111 if (sscanf(item+2, "%llx", &ll) != 1)
1112 return -EINVAL;
1113 if (ll != le64toh(o->entry.realtime))
1114 return 0;
1115 break;
1116
1117 case 'x':
1118 if (sscanf(item+2, "%llx", &ll) != 1)
1119 return -EINVAL;
1120 if (ll != le64toh(o->entry.xor_hash))
1121 return 0;
1122 break;
1123 }
1124 }
1125
1126 return 1;
1127 }
1128
1129 _public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) {
1130 assert_return(j, -EINVAL);
1131 assert_return(!journal_pid_changed(j), -ECHILD);
1132
1133 reset_location(j);
1134 j->current_location.type = LOCATION_SEEK;
1135 j->current_location.boot_id = boot_id;
1136 j->current_location.monotonic = usec;
1137 j->current_location.monotonic_set = true;
1138
1139 return 0;
1140 }
1141
1142 _public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
1143 assert_return(j, -EINVAL);
1144 assert_return(!journal_pid_changed(j), -ECHILD);
1145
1146 reset_location(j);
1147 j->current_location.type = LOCATION_SEEK;
1148 j->current_location.realtime = usec;
1149 j->current_location.realtime_set = true;
1150
1151 return 0;
1152 }
1153
1154 _public_ int sd_journal_seek_head(sd_journal *j) {
1155 assert_return(j, -EINVAL);
1156 assert_return(!journal_pid_changed(j), -ECHILD);
1157
1158 reset_location(j);
1159 j->current_location.type = LOCATION_HEAD;
1160
1161 return 0;
1162 }
1163
1164 _public_ int sd_journal_seek_tail(sd_journal *j) {
1165 assert_return(j, -EINVAL);
1166 assert_return(!journal_pid_changed(j), -ECHILD);
1167
1168 reset_location(j);
1169 j->current_location.type = LOCATION_TAIL;
1170
1171 return 0;
1172 }
1173
1174 static void check_network(sd_journal *j, int fd) {
1175 assert(j);
1176
1177 if (j->on_network)
1178 return;
1179
1180 j->on_network = fd_is_network_fs(fd);
1181 }
1182
1183 static bool file_has_type_prefix(const char *prefix, const char *filename) {
1184 const char *full, *tilded, *atted;
1185
1186 full = strjoina(prefix, ".journal");
1187 tilded = strjoina(full, "~");
1188 atted = strjoina(prefix, "@");
1189
1190 return streq(filename, full) ||
1191 streq(filename, tilded) ||
1192 startswith(filename, atted);
1193 }
1194
1195 static bool file_type_wanted(int flags, const char *filename) {
1196 assert(filename);
1197
1198 if (!endswith(filename, ".journal") && !endswith(filename, ".journal~"))
1199 return false;
1200
1201 /* no flags set → every type is OK */
1202 if (!(flags & (SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)))
1203 return true;
1204
1205 if (flags & SD_JOURNAL_SYSTEM && file_has_type_prefix("system", filename))
1206 return true;
1207
1208 if (flags & SD_JOURNAL_CURRENT_USER) {
1209 char prefix[5 + DECIMAL_STR_MAX(uid_t) + 1];
1210
1211 xsprintf(prefix, "user-"UID_FMT, getuid());
1212
1213 if (file_has_type_prefix(prefix, filename))
1214 return true;
1215 }
1216
1217 return false;
1218 }
1219
1220 static bool path_has_prefix(sd_journal *j, const char *path, const char *prefix) {
1221 assert(j);
1222 assert(path);
1223 assert(prefix);
1224
1225 if (j->toplevel_fd >= 0)
1226 return false;
1227
1228 return path_startswith(path, prefix);
1229 }
1230
1231 static void track_file_disposition(sd_journal *j, JournalFile *f) {
1232 assert(j);
1233 assert(f);
1234
1235 if (!j->has_runtime_files && path_has_prefix(j, f->path, "/run"))
1236 j->has_runtime_files = true;
1237 else if (!j->has_persistent_files && path_has_prefix(j, f->path, "/var"))
1238 j->has_persistent_files = true;
1239 }
1240
1241 static const char *skip_slash(const char *p) {
1242
1243 if (!p)
1244 return NULL;
1245
1246 while (*p == '/')
1247 p++;
1248
1249 return p;
1250 }
1251
1252 static int add_any_file(
1253 sd_journal *j,
1254 int fd,
1255 const char *path) {
1256
1257 bool close_fd = false;
1258 JournalFile *f;
1259 struct stat st;
1260 int r, k;
1261
1262 assert(j);
1263 assert(fd >= 0 || path);
1264
1265 if (fd < 0) {
1266 if (j->toplevel_fd >= 0)
1267 /* If there's a top-level fd defined make the path relative, explicitly, since otherwise
1268 * openat() ignores the first argument. */
1269
1270 fd = openat(j->toplevel_fd, skip_slash(path), O_RDONLY|O_CLOEXEC|O_NONBLOCK);
1271 else
1272 fd = open(path, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
1273 if (fd < 0) {
1274 r = log_debug_errno(errno, "Failed to open journal file %s: %m", path);
1275 goto finish;
1276 }
1277
1278 close_fd = true;
1279
1280 r = fd_nonblock(fd, false);
1281 if (r < 0) {
1282 r = log_debug_errno(errno, "Failed to turn off O_NONBLOCK for %s: %m", path);
1283 goto finish;
1284 }
1285 }
1286
1287 if (fstat(fd, &st) < 0) {
1288 r = log_debug_errno(errno, "Failed to fstat file '%s': %m", path);
1289 goto finish;
1290 }
1291
1292 r = stat_verify_regular(&st);
1293 if (r < 0) {
1294 log_debug_errno(r, "Refusing to open '%s', as it is not a regular file.", path);
1295 goto finish;
1296 }
1297
1298 f = ordered_hashmap_get(j->files, path);
1299 if (f) {
1300 if (f->last_stat.st_dev == st.st_dev &&
1301 f->last_stat.st_ino == st.st_ino) {
1302
1303 /* We already track this file, under the same path and with the same device/inode numbers, it's
1304 * hence really the same. Mark this file as seen in this generation. This is used to GC old
1305 * files in process_q_overflow() to detect journal files that are still there and discern them
1306 * from those which are gone. */
1307
1308 f->last_seen_generation = j->generation;
1309 r = 0;
1310 goto finish;
1311 }
1312
1313 /* So we tracked a file under this name, but it has a different inode/device. In that case, it got
1314 * replaced (probably due to rotation?), let's drop it hence from our list. */
1315 remove_file_real(j, f);
1316 f = NULL;
1317 }
1318
1319 if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
1320 log_debug("Too many open journal files, not adding %s.", path);
1321 r = -ETOOMANYREFS;
1322 goto finish;
1323 }
1324
1325 r = journal_file_open(fd, path, O_RDONLY, 0, false, 0, false, NULL, j->mmap, NULL, NULL, &f);
1326 if (r < 0) {
1327 log_debug_errno(r, "Failed to open journal file %s: %m", path);
1328 goto finish;
1329 }
1330
1331 /* journal_file_dump(f); */
1332
1333 r = ordered_hashmap_put(j->files, f->path, f);
1334 if (r < 0) {
1335 f->close_fd = false; /* make sure journal_file_close() doesn't close the caller's fd (or our own). We'll let the caller do that, or ourselves */
1336 (void) journal_file_close(f);
1337 goto finish;
1338 }
1339
1340 close_fd = false; /* the fd is now owned by the JournalFile object */
1341
1342 f->last_seen_generation = j->generation;
1343
1344 track_file_disposition(j, f);
1345 check_network(j, f->fd);
1346
1347 j->current_invalidate_counter++;
1348
1349 log_debug("File %s added.", f->path);
1350
1351 r = 0;
1352
1353 finish:
1354 if (close_fd)
1355 safe_close(fd);
1356
1357 if (r < 0) {
1358 k = journal_put_error(j, r, path);
1359 if (k < 0)
1360 return k;
1361 }
1362
1363 return r;
1364 }
1365
1366 static int add_file_by_name(
1367 sd_journal *j,
1368 const char *prefix,
1369 const char *filename) {
1370
1371 const char *path;
1372
1373 assert(j);
1374 assert(prefix);
1375 assert(filename);
1376
1377 if (j->no_new_files)
1378 return 0;
1379
1380 if (!file_type_wanted(j->flags, filename))
1381 return 0;
1382
1383 path = strjoina(prefix, "/", filename);
1384 return add_any_file(j, -1, path);
1385 }
1386
1387 static void remove_file_by_name(
1388 sd_journal *j,
1389 const char *prefix,
1390 const char *filename) {
1391
1392 const char *path;
1393 JournalFile *f;
1394
1395 assert(j);
1396 assert(prefix);
1397 assert(filename);
1398
1399 path = strjoina(prefix, "/", filename);
1400 f = ordered_hashmap_get(j->files, path);
1401 if (!f)
1402 return;
1403
1404 remove_file_real(j, f);
1405 }
1406
1407 static void remove_file_real(sd_journal *j, JournalFile *f) {
1408 assert(j);
1409 assert(f);
1410
1411 (void) ordered_hashmap_remove(j->files, f->path);
1412
1413 log_debug("File %s removed.", f->path);
1414
1415 if (j->current_file == f) {
1416 j->current_file = NULL;
1417 j->current_field = 0;
1418 }
1419
1420 if (j->unique_file == f) {
1421 /* Jump to the next unique_file or NULL if that one was last */
1422 j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
1423 j->unique_offset = 0;
1424 if (!j->unique_file)
1425 j->unique_file_lost = true;
1426 }
1427
1428 if (j->fields_file == f) {
1429 j->fields_file = ordered_hashmap_next(j->files, j->fields_file->path);
1430 j->fields_offset = 0;
1431 if (!j->fields_file)
1432 j->fields_file_lost = true;
1433 }
1434
1435 (void) journal_file_close(f);
1436
1437 j->current_invalidate_counter++;
1438 }
1439
1440 static int dirname_is_machine_id(const char *fn) {
1441 sd_id128_t id, machine;
1442 int r;
1443
1444 r = sd_id128_get_machine(&machine);
1445 if (r < 0)
1446 return r;
1447
1448 r = sd_id128_from_string(fn, &id);
1449 if (r < 0)
1450 return r;
1451
1452 return sd_id128_equal(id, machine);
1453 }
1454
1455 static bool dirent_is_journal_file(const struct dirent *de) {
1456 assert(de);
1457
1458 if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
1459 return false;
1460
1461 return endswith(de->d_name, ".journal") ||
1462 endswith(de->d_name, ".journal~");
1463 }
1464
1465 static bool dirent_is_id128_subdir(const struct dirent *de) {
1466 assert(de);
1467
1468 if (!IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN))
1469 return false;
1470
1471 return id128_is_valid(de->d_name);
1472 }
1473
1474 static int directory_open(sd_journal *j, const char *path, DIR **ret) {
1475 DIR *d;
1476
1477 assert(j);
1478 assert(path);
1479 assert(ret);
1480
1481 if (j->toplevel_fd < 0)
1482 d = opendir(path);
1483 else
1484 /* Open the specified directory relative to the toplevel fd. Enforce that the path specified is
1485 * relative, by dropping the initial slash */
1486 d = xopendirat(j->toplevel_fd, skip_slash(path), 0);
1487 if (!d)
1488 return -errno;
1489
1490 *ret = d;
1491 return 0;
1492 }
1493
1494 static int add_directory(sd_journal *j, const char *prefix, const char *dirname);
1495
1496 static void directory_enumerate(sd_journal *j, Directory *m, DIR *d) {
1497 struct dirent *de;
1498
1499 assert(j);
1500 assert(m);
1501 assert(d);
1502
1503 FOREACH_DIRENT_ALL(de, d, goto fail) {
1504
1505 if (dirent_is_journal_file(de))
1506 (void) add_file_by_name(j, m->path, de->d_name);
1507
1508 if (m->is_root && dirent_is_id128_subdir(de))
1509 (void) add_directory(j, m->path, de->d_name);
1510 }
1511
1512 return;
1513
1514 fail:
1515 log_debug_errno(errno, "Failed to enumerate directory %s, ignoring: %m", m->path);
1516 }
1517
1518 static void directory_watch(sd_journal *j, Directory *m, int fd, uint32_t mask) {
1519 int r;
1520
1521 assert(j);
1522 assert(m);
1523 assert(fd >= 0);
1524
1525 /* Watch this directory if that's enabled and if it not being watched yet. */
1526
1527 if (m->wd > 0) /* Already have a watch? */
1528 return;
1529 if (j->inotify_fd < 0) /* Not watching at all? */
1530 return;
1531
1532 m->wd = inotify_add_watch_fd(j->inotify_fd, fd, mask);
1533 if (m->wd < 0) {
1534 log_debug_errno(errno, "Failed to watch journal directory '%s', ignoring: %m", m->path);
1535 return;
1536 }
1537
1538 r = hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m);
1539 if (r == -EEXIST)
1540 log_debug_errno(r, "Directory '%s' already being watched under a different path, ignoring: %m", m->path);
1541 if (r < 0) {
1542 log_debug_errno(r, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m->path);
1543 (void) inotify_rm_watch(j->inotify_fd, m->wd);
1544 m->wd = -1;
1545 }
1546 }
1547
1548 static int add_directory(sd_journal *j, const char *prefix, const char *dirname) {
1549 _cleanup_free_ char *path = NULL;
1550 _cleanup_closedir_ DIR *d = NULL;
1551 Directory *m;
1552 int r, k;
1553
1554 assert(j);
1555 assert(prefix);
1556
1557 /* Adds a journal file directory to watch. If the directory is already tracked this updates the inotify watch
1558 * and reenumerates directory contents */
1559
1560 if (dirname)
1561 path = strjoin(prefix, "/", dirname);
1562 else
1563 path = strdup(prefix);
1564 if (!path) {
1565 r = -ENOMEM;
1566 goto fail;
1567 }
1568
1569 log_debug("Considering directory '%s'.", path);
1570
1571 /* We consider everything local that is in a directory for the local machine ID, or that is stored in /run */
1572 if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
1573 !((dirname && dirname_is_machine_id(dirname) > 0) || path_has_prefix(j, path, "/run")))
1574 return 0;
1575
1576 r = directory_open(j, path, &d);
1577 if (r < 0) {
1578 log_debug_errno(r, "Failed to open directory '%s': %m", path);
1579 goto fail;
1580 }
1581
1582 m = hashmap_get(j->directories_by_path, path);
1583 if (!m) {
1584 m = new0(Directory, 1);
1585 if (!m) {
1586 r = -ENOMEM;
1587 goto fail;
1588 }
1589
1590 m->is_root = false;
1591 m->path = path;
1592
1593 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1594 free(m);
1595 r = -ENOMEM;
1596 goto fail;
1597 }
1598
1599 path = NULL; /* avoid freeing in cleanup */
1600 j->current_invalidate_counter++;
1601
1602 log_debug("Directory %s added.", m->path);
1603
1604 } else if (m->is_root)
1605 return 0; /* Don't 'downgrade' from root directory */
1606
1607 m->last_seen_generation = j->generation;
1608
1609 directory_watch(j, m, dirfd(d),
1610 IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1611 IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
1612 IN_ONLYDIR);
1613
1614 if (!j->no_new_files)
1615 directory_enumerate(j, m, d);
1616
1617 check_network(j, dirfd(d));
1618
1619 return 0;
1620
1621 fail:
1622 k = journal_put_error(j, r, path ?: prefix);
1623 if (k < 0)
1624 return k;
1625
1626 return r;
1627 }
1628
1629 static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
1630
1631 _cleanup_closedir_ DIR *d = NULL;
1632 Directory *m;
1633 int r, k;
1634
1635 assert(j);
1636
1637 /* Adds a root directory to our set of directories to use. If the root directory is already in the set, we
1638 * update the inotify logic, and renumerate the directory entries. This call may hence be called to initially
1639 * populate the set, as well as to update it later. */
1640
1641 if (p) {
1642 /* If there's a path specified, use it. */
1643
1644 log_debug("Considering root directory '%s'.", p);
1645
1646 if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
1647 !path_has_prefix(j, p, "/run"))
1648 return -EINVAL;
1649
1650 if (j->prefix)
1651 p = strjoina(j->prefix, p);
1652
1653 r = directory_open(j, p, &d);
1654 if (r == -ENOENT && missing_ok)
1655 return 0;
1656 if (r < 0) {
1657 log_debug_errno(r, "Failed to open root directory %s: %m", p);
1658 goto fail;
1659 }
1660 } else {
1661 int dfd;
1662
1663 /* If there's no path specified, then we use the top-level fd itself. We duplicate the fd here, since
1664 * opendir() will take possession of the fd, and close it, which we don't want. */
1665
1666 p = "."; /* store this as "." in the directories hashmap */
1667
1668 dfd = fcntl(j->toplevel_fd, F_DUPFD_CLOEXEC, 3);
1669 if (dfd < 0) {
1670 r = -errno;
1671 goto fail;
1672 }
1673
1674 d = fdopendir(dfd);
1675 if (!d) {
1676 r = -errno;
1677 safe_close(dfd);
1678 goto fail;
1679 }
1680
1681 rewinddir(d);
1682 }
1683
1684 m = hashmap_get(j->directories_by_path, p);
1685 if (!m) {
1686 m = new0(Directory, 1);
1687 if (!m) {
1688 r = -ENOMEM;
1689 goto fail;
1690 }
1691
1692 m->is_root = true;
1693
1694 m->path = strdup(p);
1695 if (!m->path) {
1696 free(m);
1697 r = -ENOMEM;
1698 goto fail;
1699 }
1700
1701 if (hashmap_put(j->directories_by_path, m->path, m) < 0) {
1702 free(m->path);
1703 free(m);
1704 r = -ENOMEM;
1705 goto fail;
1706 }
1707
1708 j->current_invalidate_counter++;
1709
1710 log_debug("Root directory %s added.", m->path);
1711
1712 } else if (!m->is_root)
1713 return 0;
1714
1715 directory_watch(j, m, dirfd(d),
1716 IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
1717 IN_ONLYDIR);
1718
1719 if (!j->no_new_files)
1720 directory_enumerate(j, m, d);
1721
1722 check_network(j, dirfd(d));
1723
1724 return 0;
1725
1726 fail:
1727 k = journal_put_error(j, r, p);
1728 if (k < 0)
1729 return k;
1730
1731 return r;
1732 }
1733
1734 static void remove_directory(sd_journal *j, Directory *d) {
1735 assert(j);
1736
1737 if (d->wd > 0) {
1738 hashmap_remove(j->directories_by_wd, INT_TO_PTR(d->wd));
1739
1740 if (j->inotify_fd >= 0)
1741 inotify_rm_watch(j->inotify_fd, d->wd);
1742 }
1743
1744 hashmap_remove(j->directories_by_path, d->path);
1745
1746 if (d->is_root)
1747 log_debug("Root directory %s removed.", d->path);
1748 else
1749 log_debug("Directory %s removed.", d->path);
1750
1751 free(d->path);
1752 free(d);
1753 }
1754
1755 static int add_search_paths(sd_journal *j) {
1756
1757 static const char search_paths[] =
1758 "/run/log/journal\0"
1759 "/var/log/journal\0";
1760 const char *p;
1761
1762 assert(j);
1763
1764 /* We ignore most errors here, since the idea is to only open
1765 * what's actually accessible, and ignore the rest. */
1766
1767 NULSTR_FOREACH(p, search_paths)
1768 (void) add_root_directory(j, p, true);
1769
1770 if (!(j->flags & SD_JOURNAL_LOCAL_ONLY))
1771 (void) add_root_directory(j, "/var/log/journal/remote", true);
1772
1773 return 0;
1774 }
1775
1776 static int add_current_paths(sd_journal *j) {
1777 Iterator i;
1778 JournalFile *f;
1779
1780 assert(j);
1781 assert(j->no_new_files);
1782
1783 /* Simply adds all directories for files we have open as directories. We don't expect errors here, so we
1784 * treat them as fatal. */
1785
1786 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
1787 _cleanup_free_ char *dir;
1788 int r;
1789
1790 dir = dirname_malloc(f->path);
1791 if (!dir)
1792 return -ENOMEM;
1793
1794 r = add_directory(j, dir, NULL);
1795 if (r < 0)
1796 return r;
1797 }
1798
1799 return 0;
1800 }
1801
1802 static int allocate_inotify(sd_journal *j) {
1803 assert(j);
1804
1805 if (j->inotify_fd < 0) {
1806 j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
1807 if (j->inotify_fd < 0)
1808 return -errno;
1809 }
1810
1811 return hashmap_ensure_allocated(&j->directories_by_wd, NULL);
1812 }
1813
1814 static sd_journal *journal_new(int flags, const char *path) {
1815 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
1816
1817 j = new0(sd_journal, 1);
1818 if (!j)
1819 return NULL;
1820
1821 j->original_pid = getpid_cached();
1822 j->toplevel_fd = -1;
1823 j->inotify_fd = -1;
1824 j->flags = flags;
1825 j->data_threshold = DEFAULT_DATA_THRESHOLD;
1826
1827 if (path) {
1828 char *t;
1829
1830 t = strdup(path);
1831 if (!t)
1832 return NULL;
1833
1834 if (flags & SD_JOURNAL_OS_ROOT)
1835 j->prefix = t;
1836 else
1837 j->path = t;
1838 }
1839
1840 j->files = ordered_hashmap_new(&path_hash_ops);
1841 if (!j->files)
1842 return NULL;
1843
1844 j->files_cache = ordered_hashmap_iterated_cache_new(j->files);
1845 j->directories_by_path = hashmap_new(&path_hash_ops);
1846 j->mmap = mmap_cache_new();
1847 if (!j->files_cache || !j->directories_by_path || !j->mmap)
1848 return NULL;
1849
1850 return TAKE_PTR(j);
1851 }
1852
1853 #define OPEN_ALLOWED_FLAGS \
1854 (SD_JOURNAL_LOCAL_ONLY | \
1855 SD_JOURNAL_RUNTIME_ONLY | \
1856 SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)
1857
1858 _public_ int sd_journal_open(sd_journal **ret, int flags) {
1859 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
1860 int r;
1861
1862 assert_return(ret, -EINVAL);
1863 assert_return((flags & ~OPEN_ALLOWED_FLAGS) == 0, -EINVAL);
1864
1865 j = journal_new(flags, NULL);
1866 if (!j)
1867 return -ENOMEM;
1868
1869 r = add_search_paths(j);
1870 if (r < 0)
1871 return r;
1872
1873 *ret = TAKE_PTR(j);
1874 return 0;
1875 }
1876
1877 #define OPEN_CONTAINER_ALLOWED_FLAGS \
1878 (SD_JOURNAL_LOCAL_ONLY | SD_JOURNAL_SYSTEM)
1879
1880 _public_ int sd_journal_open_container(sd_journal **ret, const char *machine, int flags) {
1881 _cleanup_free_ char *root = NULL, *class = NULL;
1882 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
1883 char *p;
1884 int r;
1885
1886 /* This is pretty much deprecated, people should use machined's OpenMachineRootDirectory() call instead in
1887 * combination with sd_journal_open_directory_fd(). */
1888
1889 assert_return(machine, -EINVAL);
1890 assert_return(ret, -EINVAL);
1891 assert_return((flags & ~OPEN_CONTAINER_ALLOWED_FLAGS) == 0, -EINVAL);
1892 assert_return(machine_name_is_valid(machine), -EINVAL);
1893
1894 p = strjoina("/run/systemd/machines/", machine);
1895 r = parse_env_file(NULL, p, NEWLINE, "ROOT", &root, "CLASS", &class, NULL);
1896 if (r == -ENOENT)
1897 return -EHOSTDOWN;
1898 if (r < 0)
1899 return r;
1900 if (!root)
1901 return -ENODATA;
1902
1903 if (!streq_ptr(class, "container"))
1904 return -EIO;
1905
1906 j = journal_new(flags, root);
1907 if (!j)
1908 return -ENOMEM;
1909
1910 r = add_search_paths(j);
1911 if (r < 0)
1912 return r;
1913
1914 *ret = TAKE_PTR(j);
1915 return 0;
1916 }
1917
1918 #define OPEN_DIRECTORY_ALLOWED_FLAGS \
1919 (SD_JOURNAL_OS_ROOT | \
1920 SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER )
1921
1922 _public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) {
1923 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
1924 int r;
1925
1926 assert_return(ret, -EINVAL);
1927 assert_return(path, -EINVAL);
1928 assert_return((flags & ~OPEN_DIRECTORY_ALLOWED_FLAGS) == 0, -EINVAL);
1929
1930 j = journal_new(flags, path);
1931 if (!j)
1932 return -ENOMEM;
1933
1934 if (flags & SD_JOURNAL_OS_ROOT)
1935 r = add_search_paths(j);
1936 else
1937 r = add_root_directory(j, path, false);
1938 if (r < 0)
1939 return r;
1940
1941 *ret = TAKE_PTR(j);
1942 return 0;
1943 }
1944
1945 _public_ int sd_journal_open_files(sd_journal **ret, const char **paths, int flags) {
1946 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
1947 const char **path;
1948 int r;
1949
1950 assert_return(ret, -EINVAL);
1951 assert_return(flags == 0, -EINVAL);
1952
1953 j = journal_new(flags, NULL);
1954 if (!j)
1955 return -ENOMEM;
1956
1957 STRV_FOREACH(path, paths) {
1958 r = add_any_file(j, -1, *path);
1959 if (r < 0)
1960 return r;
1961 }
1962
1963 j->no_new_files = true;
1964
1965 *ret = TAKE_PTR(j);
1966 return 0;
1967 }
1968
1969 #define OPEN_DIRECTORY_FD_ALLOWED_FLAGS \
1970 (SD_JOURNAL_OS_ROOT | \
1971 SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER )
1972
1973 _public_ int sd_journal_open_directory_fd(sd_journal **ret, int fd, int flags) {
1974 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
1975 struct stat st;
1976 int r;
1977
1978 assert_return(ret, -EINVAL);
1979 assert_return(fd >= 0, -EBADF);
1980 assert_return((flags & ~OPEN_DIRECTORY_FD_ALLOWED_FLAGS) == 0, -EINVAL);
1981
1982 if (fstat(fd, &st) < 0)
1983 return -errno;
1984
1985 if (!S_ISDIR(st.st_mode))
1986 return -EBADFD;
1987
1988 j = journal_new(flags, NULL);
1989 if (!j)
1990 return -ENOMEM;
1991
1992 j->toplevel_fd = fd;
1993
1994 if (flags & SD_JOURNAL_OS_ROOT)
1995 r = add_search_paths(j);
1996 else
1997 r = add_root_directory(j, NULL, false);
1998 if (r < 0)
1999 return r;
2000
2001 *ret = TAKE_PTR(j);
2002 return 0;
2003 }
2004
2005 _public_ int sd_journal_open_files_fd(sd_journal **ret, int fds[], unsigned n_fds, int flags) {
2006 Iterator iterator;
2007 JournalFile *f;
2008 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
2009 unsigned i;
2010 int r;
2011
2012 assert_return(ret, -EINVAL);
2013 assert_return(n_fds > 0, -EBADF);
2014 assert_return(flags == 0, -EINVAL);
2015
2016 j = journal_new(flags, NULL);
2017 if (!j)
2018 return -ENOMEM;
2019
2020 for (i = 0; i < n_fds; i++) {
2021 struct stat st;
2022
2023 if (fds[i] < 0) {
2024 r = -EBADF;
2025 goto fail;
2026 }
2027
2028 if (fstat(fds[i], &st) < 0) {
2029 r = -errno;
2030 goto fail;
2031 }
2032
2033 r = stat_verify_regular(&st);
2034 if (r < 0)
2035 goto fail;
2036
2037 r = add_any_file(j, fds[i], NULL);
2038 if (r < 0)
2039 goto fail;
2040 }
2041
2042 j->no_new_files = true;
2043 j->no_inotify = true;
2044
2045 *ret = TAKE_PTR(j);
2046 return 0;
2047
2048 fail:
2049 /* If we fail, make sure we don't take possession of the files we managed to make use of successfully, and they
2050 * remain open */
2051 ORDERED_HASHMAP_FOREACH(f, j->files, iterator)
2052 f->close_fd = false;
2053
2054 return r;
2055 }
2056
2057 _public_ void sd_journal_close(sd_journal *j) {
2058 Directory *d;
2059
2060 if (!j)
2061 return;
2062
2063 sd_journal_flush_matches(j);
2064
2065 ordered_hashmap_free_with_destructor(j->files, journal_file_close);
2066 iterated_cache_free(j->files_cache);
2067
2068 while ((d = hashmap_first(j->directories_by_path)))
2069 remove_directory(j, d);
2070
2071 while ((d = hashmap_first(j->directories_by_wd)))
2072 remove_directory(j, d);
2073
2074 hashmap_free(j->directories_by_path);
2075 hashmap_free(j->directories_by_wd);
2076
2077 safe_close(j->inotify_fd);
2078
2079 if (j->mmap) {
2080 log_debug("mmap cache statistics: %u hit, %u miss", mmap_cache_get_hit(j->mmap), mmap_cache_get_missed(j->mmap));
2081 mmap_cache_unref(j->mmap);
2082 }
2083
2084 hashmap_free_free(j->errors);
2085
2086 free(j->path);
2087 free(j->prefix);
2088 free(j->unique_field);
2089 free(j->fields_buffer);
2090 free(j);
2091 }
2092
2093 _public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) {
2094 Object *o;
2095 JournalFile *f;
2096 int r;
2097
2098 assert_return(j, -EINVAL);
2099 assert_return(!journal_pid_changed(j), -ECHILD);
2100 assert_return(ret, -EINVAL);
2101
2102 f = j->current_file;
2103 if (!f)
2104 return -EADDRNOTAVAIL;
2105
2106 if (f->current_offset <= 0)
2107 return -EADDRNOTAVAIL;
2108
2109 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2110 if (r < 0)
2111 return r;
2112
2113 *ret = le64toh(o->entry.realtime);
2114 return 0;
2115 }
2116
2117 _public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) {
2118 Object *o;
2119 JournalFile *f;
2120 int r;
2121 sd_id128_t id;
2122
2123 assert_return(j, -EINVAL);
2124 assert_return(!journal_pid_changed(j), -ECHILD);
2125
2126 f = j->current_file;
2127 if (!f)
2128 return -EADDRNOTAVAIL;
2129
2130 if (f->current_offset <= 0)
2131 return -EADDRNOTAVAIL;
2132
2133 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2134 if (r < 0)
2135 return r;
2136
2137 if (ret_boot_id)
2138 *ret_boot_id = o->entry.boot_id;
2139 else {
2140 r = sd_id128_get_boot(&id);
2141 if (r < 0)
2142 return r;
2143
2144 if (!sd_id128_equal(id, o->entry.boot_id))
2145 return -ESTALE;
2146 }
2147
2148 if (ret)
2149 *ret = le64toh(o->entry.monotonic);
2150
2151 return 0;
2152 }
2153
2154 static bool field_is_valid(const char *field) {
2155 const char *p;
2156
2157 assert(field);
2158
2159 if (isempty(field))
2160 return false;
2161
2162 if (startswith(field, "__"))
2163 return false;
2164
2165 for (p = field; *p; p++) {
2166
2167 if (*p == '_')
2168 continue;
2169
2170 if (*p >= 'A' && *p <= 'Z')
2171 continue;
2172
2173 if (*p >= '0' && *p <= '9')
2174 continue;
2175
2176 return false;
2177 }
2178
2179 return true;
2180 }
2181
2182 _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) {
2183 JournalFile *f;
2184 uint64_t i, n;
2185 size_t field_length;
2186 int r;
2187 Object *o;
2188
2189 assert_return(j, -EINVAL);
2190 assert_return(!journal_pid_changed(j), -ECHILD);
2191 assert_return(field, -EINVAL);
2192 assert_return(data, -EINVAL);
2193 assert_return(size, -EINVAL);
2194 assert_return(field_is_valid(field), -EINVAL);
2195
2196 f = j->current_file;
2197 if (!f)
2198 return -EADDRNOTAVAIL;
2199
2200 if (f->current_offset <= 0)
2201 return -EADDRNOTAVAIL;
2202
2203 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2204 if (r < 0)
2205 return r;
2206
2207 field_length = strlen(field);
2208
2209 n = journal_file_entry_n_items(o);
2210 for (i = 0; i < n; i++) {
2211 uint64_t p, l;
2212 le64_t le_hash;
2213 size_t t;
2214 int compression;
2215
2216 p = le64toh(o->entry.items[i].object_offset);
2217 le_hash = o->entry.items[i].hash;
2218 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2219 if (r < 0)
2220 return r;
2221
2222 if (le_hash != o->data.hash)
2223 return -EBADMSG;
2224
2225 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2226
2227 compression = o->object.flags & OBJECT_COMPRESSION_MASK;
2228 if (compression) {
2229 #if HAVE_XZ || HAVE_LZ4
2230 r = decompress_startswith(compression,
2231 o->data.payload, l,
2232 &f->compress_buffer, &f->compress_buffer_size,
2233 field, field_length, '=');
2234 if (r < 0)
2235 log_debug_errno(r, "Cannot decompress %s object of length %"PRIu64" at offset "OFSfmt": %m",
2236 object_compressed_to_string(compression), l, p);
2237 else if (r > 0) {
2238
2239 size_t rsize;
2240
2241 r = decompress_blob(compression,
2242 o->data.payload, l,
2243 &f->compress_buffer, &f->compress_buffer_size, &rsize,
2244 j->data_threshold);
2245 if (r < 0)
2246 return r;
2247
2248 *data = f->compress_buffer;
2249 *size = (size_t) rsize;
2250
2251 return 0;
2252 }
2253 #else
2254 return -EPROTONOSUPPORT;
2255 #endif
2256 } else if (l >= field_length+1 &&
2257 memcmp(o->data.payload, field, field_length) == 0 &&
2258 o->data.payload[field_length] == '=') {
2259
2260 t = (size_t) l;
2261
2262 if ((uint64_t) t != l)
2263 return -E2BIG;
2264
2265 *data = o->data.payload;
2266 *size = t;
2267
2268 return 0;
2269 }
2270
2271 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2272 if (r < 0)
2273 return r;
2274 }
2275
2276 return -ENOENT;
2277 }
2278
2279 static int return_data(sd_journal *j, JournalFile *f, Object *o, const void **data, size_t *size) {
2280 size_t t;
2281 uint64_t l;
2282 int compression;
2283
2284 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2285 t = (size_t) l;
2286
2287 /* We can't read objects larger than 4G on a 32bit machine */
2288 if ((uint64_t) t != l)
2289 return -E2BIG;
2290
2291 compression = o->object.flags & OBJECT_COMPRESSION_MASK;
2292 if (compression) {
2293 #if HAVE_XZ || HAVE_LZ4
2294 size_t rsize;
2295 int r;
2296
2297 r = decompress_blob(compression,
2298 o->data.payload, l, &f->compress_buffer,
2299 &f->compress_buffer_size, &rsize, j->data_threshold);
2300 if (r < 0)
2301 return r;
2302
2303 *data = f->compress_buffer;
2304 *size = (size_t) rsize;
2305 #else
2306 return -EPROTONOSUPPORT;
2307 #endif
2308 } else {
2309 *data = o->data.payload;
2310 *size = t;
2311 }
2312
2313 return 0;
2314 }
2315
2316 _public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) {
2317 JournalFile *f;
2318 uint64_t p, n;
2319 le64_t le_hash;
2320 int r;
2321 Object *o;
2322
2323 assert_return(j, -EINVAL);
2324 assert_return(!journal_pid_changed(j), -ECHILD);
2325 assert_return(data, -EINVAL);
2326 assert_return(size, -EINVAL);
2327
2328 f = j->current_file;
2329 if (!f)
2330 return -EADDRNOTAVAIL;
2331
2332 if (f->current_offset <= 0)
2333 return -EADDRNOTAVAIL;
2334
2335 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2336 if (r < 0)
2337 return r;
2338
2339 n = journal_file_entry_n_items(o);
2340 if (j->current_field >= n)
2341 return 0;
2342
2343 p = le64toh(o->entry.items[j->current_field].object_offset);
2344 le_hash = o->entry.items[j->current_field].hash;
2345 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2346 if (r < 0)
2347 return r;
2348
2349 if (le_hash != o->data.hash)
2350 return -EBADMSG;
2351
2352 r = return_data(j, f, o, data, size);
2353 if (r < 0)
2354 return r;
2355
2356 j->current_field++;
2357
2358 return 1;
2359 }
2360
2361 _public_ void sd_journal_restart_data(sd_journal *j) {
2362 if (!j)
2363 return;
2364
2365 j->current_field = 0;
2366 }
2367
2368 static int reiterate_all_paths(sd_journal *j) {
2369 assert(j);
2370
2371 if (j->no_new_files)
2372 return add_current_paths(j);
2373
2374 if (j->flags & SD_JOURNAL_OS_ROOT)
2375 return add_search_paths(j);
2376
2377 if (j->toplevel_fd >= 0)
2378 return add_root_directory(j, NULL, false);
2379
2380 if (j->path)
2381 return add_root_directory(j, j->path, true);
2382
2383 return add_search_paths(j);
2384 }
2385
2386 _public_ int sd_journal_get_fd(sd_journal *j) {
2387 int r;
2388
2389 assert_return(j, -EINVAL);
2390 assert_return(!journal_pid_changed(j), -ECHILD);
2391
2392 if (j->no_inotify)
2393 return -EMEDIUMTYPE;
2394
2395 if (j->inotify_fd >= 0)
2396 return j->inotify_fd;
2397
2398 r = allocate_inotify(j);
2399 if (r < 0)
2400 return r;
2401
2402 log_debug("Reiterating files to get inotify watches established.");
2403
2404 /* Iterate through all dirs again, to add them to the inotify */
2405 r = reiterate_all_paths(j);
2406 if (r < 0)
2407 return r;
2408
2409 return j->inotify_fd;
2410 }
2411
2412 _public_ int sd_journal_get_events(sd_journal *j) {
2413 int fd;
2414
2415 assert_return(j, -EINVAL);
2416 assert_return(!journal_pid_changed(j), -ECHILD);
2417
2418 fd = sd_journal_get_fd(j);
2419 if (fd < 0)
2420 return fd;
2421
2422 return POLLIN;
2423 }
2424
2425 _public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) {
2426 int fd;
2427
2428 assert_return(j, -EINVAL);
2429 assert_return(!journal_pid_changed(j), -ECHILD);
2430 assert_return(timeout_usec, -EINVAL);
2431
2432 fd = sd_journal_get_fd(j);
2433 if (fd < 0)
2434 return fd;
2435
2436 if (!j->on_network) {
2437 *timeout_usec = (uint64_t) -1;
2438 return 0;
2439 }
2440
2441 /* If we are on the network we need to regularly check for
2442 * changes manually */
2443
2444 *timeout_usec = j->last_process_usec + JOURNAL_FILES_RECHECK_USEC;
2445 return 1;
2446 }
2447
2448 static void process_q_overflow(sd_journal *j) {
2449 JournalFile *f;
2450 Directory *m;
2451 Iterator i;
2452
2453 assert(j);
2454
2455 /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list
2456 * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all
2457 * journal files we encounter. All journal files and all directories that don't carry it after reenumeration
2458 * are subject for unloading. */
2459
2460 log_debug("Inotify queue overrun, reiterating everything.");
2461
2462 j->generation++;
2463 (void) reiterate_all_paths(j);
2464
2465 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2466
2467 if (f->last_seen_generation == j->generation)
2468 continue;
2469
2470 log_debug("File '%s' hasn't been seen in this enumeration, removing.", f->path);
2471 remove_file_real(j, f);
2472 }
2473
2474 HASHMAP_FOREACH(m, j->directories_by_path, i) {
2475
2476 if (m->last_seen_generation == j->generation)
2477 continue;
2478
2479 if (m->is_root) /* Never GC root directories */
2480 continue;
2481
2482 log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f->path);
2483 remove_directory(j, m);
2484 }
2485
2486 log_debug("Reiteration complete.");
2487 }
2488
2489 static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
2490 Directory *d;
2491
2492 assert(j);
2493 assert(e);
2494
2495 if (e->mask & IN_Q_OVERFLOW) {
2496 process_q_overflow(j);
2497 return;
2498 }
2499
2500 /* Is this a subdirectory we watch? */
2501 d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
2502 if (d) {
2503 if (!(e->mask & IN_ISDIR) && e->len > 0 &&
2504 (endswith(e->name, ".journal") ||
2505 endswith(e->name, ".journal~"))) {
2506
2507 /* Event for a journal file */
2508
2509 if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB))
2510 (void) add_file_by_name(j, d->path, e->name);
2511 else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT))
2512 remove_file_by_name(j, d->path, e->name);
2513
2514 } else if (!d->is_root && e->len == 0) {
2515
2516 /* Event for a subdirectory */
2517
2518 if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT))
2519 remove_directory(j, d);
2520
2521 } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && id128_is_valid(e->name)) {
2522
2523 /* Event for root directory */
2524
2525 if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB))
2526 (void) add_directory(j, d->path, e->name);
2527 }
2528
2529 return;
2530 }
2531
2532 if (e->mask & IN_IGNORED)
2533 return;
2534
2535 log_debug("Unexpected inotify event.");
2536 }
2537
2538 static int determine_change(sd_journal *j) {
2539 bool b;
2540
2541 assert(j);
2542
2543 b = j->current_invalidate_counter != j->last_invalidate_counter;
2544 j->last_invalidate_counter = j->current_invalidate_counter;
2545
2546 return b ? SD_JOURNAL_INVALIDATE : SD_JOURNAL_APPEND;
2547 }
2548
2549 _public_ int sd_journal_process(sd_journal *j) {
2550 bool got_something = false;
2551
2552 assert_return(j, -EINVAL);
2553 assert_return(!journal_pid_changed(j), -ECHILD);
2554
2555 if (j->inotify_fd < 0) /* We have no inotify fd yet? Then there's noting to process. */
2556 return 0;
2557
2558 j->last_process_usec = now(CLOCK_MONOTONIC);
2559 j->last_invalidate_counter = j->current_invalidate_counter;
2560
2561 for (;;) {
2562 union inotify_event_buffer buffer;
2563 struct inotify_event *e;
2564 ssize_t l;
2565
2566 l = read(j->inotify_fd, &buffer, sizeof(buffer));
2567 if (l < 0) {
2568 if (IN_SET(errno, EAGAIN, EINTR))
2569 return got_something ? determine_change(j) : SD_JOURNAL_NOP;
2570
2571 return -errno;
2572 }
2573
2574 got_something = true;
2575
2576 FOREACH_INOTIFY_EVENT(e, buffer, l)
2577 process_inotify_event(j, e);
2578 }
2579 }
2580
2581 _public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) {
2582 int r;
2583 uint64_t t;
2584
2585 assert_return(j, -EINVAL);
2586 assert_return(!journal_pid_changed(j), -ECHILD);
2587
2588 if (j->inotify_fd < 0) {
2589
2590 /* This is the first invocation, hence create the
2591 * inotify watch */
2592 r = sd_journal_get_fd(j);
2593 if (r < 0)
2594 return r;
2595
2596 /* The journal might have changed since the context
2597 * object was created and we weren't watching before,
2598 * hence don't wait for anything, and return
2599 * immediately. */
2600 return determine_change(j);
2601 }
2602
2603 r = sd_journal_get_timeout(j, &t);
2604 if (r < 0)
2605 return r;
2606
2607 if (t != (uint64_t) -1) {
2608 usec_t n;
2609
2610 n = now(CLOCK_MONOTONIC);
2611 t = t > n ? t - n : 0;
2612
2613 if (timeout_usec == (uint64_t) -1 || timeout_usec > t)
2614 timeout_usec = t;
2615 }
2616
2617 do {
2618 r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec);
2619 } while (r == -EINTR);
2620
2621 if (r < 0)
2622 return r;
2623
2624 return sd_journal_process(j);
2625 }
2626
2627 _public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) {
2628 Iterator i;
2629 JournalFile *f;
2630 bool first = true;
2631 uint64_t fmin = 0, tmax = 0;
2632 int r;
2633
2634 assert_return(j, -EINVAL);
2635 assert_return(!journal_pid_changed(j), -ECHILD);
2636 assert_return(from || to, -EINVAL);
2637 assert_return(from != to, -EINVAL);
2638
2639 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2640 usec_t fr, t;
2641
2642 r = journal_file_get_cutoff_realtime_usec(f, &fr, &t);
2643 if (r == -ENOENT)
2644 continue;
2645 if (r < 0)
2646 return r;
2647 if (r == 0)
2648 continue;
2649
2650 if (first) {
2651 fmin = fr;
2652 tmax = t;
2653 first = false;
2654 } else {
2655 fmin = MIN(fr, fmin);
2656 tmax = MAX(t, tmax);
2657 }
2658 }
2659
2660 if (from)
2661 *from = fmin;
2662 if (to)
2663 *to = tmax;
2664
2665 return first ? 0 : 1;
2666 }
2667
2668 _public_ int sd_journal_get_cutoff_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t *from, uint64_t *to) {
2669 Iterator i;
2670 JournalFile *f;
2671 bool found = false;
2672 int r;
2673
2674 assert_return(j, -EINVAL);
2675 assert_return(!journal_pid_changed(j), -ECHILD);
2676 assert_return(from || to, -EINVAL);
2677 assert_return(from != to, -EINVAL);
2678
2679 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2680 usec_t fr, t;
2681
2682 r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &fr, &t);
2683 if (r == -ENOENT)
2684 continue;
2685 if (r < 0)
2686 return r;
2687 if (r == 0)
2688 continue;
2689
2690 if (found) {
2691 if (from)
2692 *from = MIN(fr, *from);
2693 if (to)
2694 *to = MAX(t, *to);
2695 } else {
2696 if (from)
2697 *from = fr;
2698 if (to)
2699 *to = t;
2700 found = true;
2701 }
2702 }
2703
2704 return found;
2705 }
2706
2707 void journal_print_header(sd_journal *j) {
2708 Iterator i;
2709 JournalFile *f;
2710 bool newline = false;
2711
2712 assert(j);
2713
2714 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2715 if (newline)
2716 putchar('\n');
2717 else
2718 newline = true;
2719
2720 journal_file_print_header(f);
2721 }
2722 }
2723
2724 _public_ int sd_journal_get_usage(sd_journal *j, uint64_t *bytes) {
2725 Iterator i;
2726 JournalFile *f;
2727 uint64_t sum = 0;
2728
2729 assert_return(j, -EINVAL);
2730 assert_return(!journal_pid_changed(j), -ECHILD);
2731 assert_return(bytes, -EINVAL);
2732
2733 ORDERED_HASHMAP_FOREACH(f, j->files, i) {
2734 struct stat st;
2735
2736 if (fstat(f->fd, &st) < 0)
2737 return -errno;
2738
2739 sum += (uint64_t) st.st_blocks * 512ULL;
2740 }
2741
2742 *bytes = sum;
2743 return 0;
2744 }
2745
2746 _public_ int sd_journal_query_unique(sd_journal *j, const char *field) {
2747 char *f;
2748
2749 assert_return(j, -EINVAL);
2750 assert_return(!journal_pid_changed(j), -ECHILD);
2751 assert_return(!isempty(field), -EINVAL);
2752 assert_return(field_is_valid(field), -EINVAL);
2753
2754 f = strdup(field);
2755 if (!f)
2756 return -ENOMEM;
2757
2758 free(j->unique_field);
2759 j->unique_field = f;
2760 j->unique_file = NULL;
2761 j->unique_offset = 0;
2762 j->unique_file_lost = false;
2763
2764 return 0;
2765 }
2766
2767 _public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_t *l) {
2768 size_t k;
2769
2770 assert_return(j, -EINVAL);
2771 assert_return(!journal_pid_changed(j), -ECHILD);
2772 assert_return(data, -EINVAL);
2773 assert_return(l, -EINVAL);
2774 assert_return(j->unique_field, -EINVAL);
2775
2776 k = strlen(j->unique_field);
2777
2778 if (!j->unique_file) {
2779 if (j->unique_file_lost)
2780 return 0;
2781
2782 j->unique_file = ordered_hashmap_first(j->files);
2783 if (!j->unique_file)
2784 return 0;
2785
2786 j->unique_offset = 0;
2787 }
2788
2789 for (;;) {
2790 JournalFile *of;
2791 Iterator i;
2792 Object *o;
2793 const void *odata;
2794 size_t ol;
2795 bool found;
2796 int r;
2797
2798 /* Proceed to next data object in the field's linked list */
2799 if (j->unique_offset == 0) {
2800 r = journal_file_find_field_object(j->unique_file, j->unique_field, k, &o, NULL);
2801 if (r < 0)
2802 return r;
2803
2804 j->unique_offset = r > 0 ? le64toh(o->field.head_data_offset) : 0;
2805 } else {
2806 r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
2807 if (r < 0)
2808 return r;
2809
2810 j->unique_offset = le64toh(o->data.next_field_offset);
2811 }
2812
2813 /* We reached the end of the list? Then start again, with the next file */
2814 if (j->unique_offset == 0) {
2815 j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
2816 if (!j->unique_file)
2817 return 0;
2818
2819 continue;
2820 }
2821
2822 /* We do not use OBJECT_DATA context here, but OBJECT_UNUSED
2823 * instead, so that we can look at this data object at the same
2824 * time as one on another file */
2825 r = journal_file_move_to_object(j->unique_file, OBJECT_UNUSED, j->unique_offset, &o);
2826 if (r < 0)
2827 return r;
2828
2829 /* Let's do the type check by hand, since we used 0 context above. */
2830 if (o->object.type != OBJECT_DATA) {
2831 log_debug("%s:offset " OFSfmt ": object has type %d, expected %d",
2832 j->unique_file->path, j->unique_offset,
2833 o->object.type, OBJECT_DATA);
2834 return -EBADMSG;
2835 }
2836
2837 r = return_data(j, j->unique_file, o, &odata, &ol);
2838 if (r < 0)
2839 return r;
2840
2841 /* Check if we have at least the field name and "=". */
2842 if (ol <= k) {
2843 log_debug("%s:offset " OFSfmt ": object has size %zu, expected at least %zu",
2844 j->unique_file->path, j->unique_offset,
2845 ol, k + 1);
2846 return -EBADMSG;
2847 }
2848
2849 if (memcmp(odata, j->unique_field, k) || ((const char*) odata)[k] != '=') {
2850 log_debug("%s:offset " OFSfmt ": object does not start with \"%s=\"",
2851 j->unique_file->path, j->unique_offset,
2852 j->unique_field);
2853 return -EBADMSG;
2854 }
2855
2856 /* OK, now let's see if we already returned this data
2857 * object by checking if it exists in the earlier
2858 * traversed files. */
2859 found = false;
2860 ORDERED_HASHMAP_FOREACH(of, j->files, i) {
2861 if (of == j->unique_file)
2862 break;
2863
2864 /* Skip this file it didn't have any fields indexed */
2865 if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
2866 continue;
2867
2868 r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), NULL, NULL);
2869 if (r < 0)
2870 return r;
2871 if (r > 0) {
2872 found = true;
2873 break;
2874 }
2875 }
2876
2877 if (found)
2878 continue;
2879
2880 r = return_data(j, j->unique_file, o, data, l);
2881 if (r < 0)
2882 return r;
2883
2884 return 1;
2885 }
2886 }
2887
2888 _public_ void sd_journal_restart_unique(sd_journal *j) {
2889 if (!j)
2890 return;
2891
2892 j->unique_file = NULL;
2893 j->unique_offset = 0;
2894 j->unique_file_lost = false;
2895 }
2896
2897 _public_ int sd_journal_enumerate_fields(sd_journal *j, const char **field) {
2898 int r;
2899
2900 assert_return(j, -EINVAL);
2901 assert_return(!journal_pid_changed(j), -ECHILD);
2902 assert_return(field, -EINVAL);
2903
2904 if (!j->fields_file) {
2905 if (j->fields_file_lost)
2906 return 0;
2907
2908 j->fields_file = ordered_hashmap_first(j->files);
2909 if (!j->fields_file)
2910 return 0;
2911
2912 j->fields_hash_table_index = 0;
2913 j->fields_offset = 0;
2914 }
2915
2916 for (;;) {
2917 JournalFile *f, *of;
2918 Iterator i;
2919 uint64_t m;
2920 Object *o;
2921 size_t sz;
2922 bool found;
2923
2924 f = j->fields_file;
2925
2926 if (j->fields_offset == 0) {
2927 bool eof = false;
2928
2929 /* We are not yet positioned at any field. Let's pick the first one */
2930 r = journal_file_map_field_hash_table(f);
2931 if (r < 0)
2932 return r;
2933
2934 m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
2935 for (;;) {
2936 if (j->fields_hash_table_index >= m) {
2937 /* Reached the end of the hash table, go to the next file. */
2938 eof = true;
2939 break;
2940 }
2941
2942 j->fields_offset = le64toh(f->field_hash_table[j->fields_hash_table_index].head_hash_offset);
2943
2944 if (j->fields_offset != 0)
2945 break;
2946
2947 /* Empty hash table bucket, go to next one */
2948 j->fields_hash_table_index++;
2949 }
2950
2951 if (eof) {
2952 /* Proceed with next file */
2953 j->fields_file = ordered_hashmap_next(j->files, f->path);
2954 if (!j->fields_file) {
2955 *field = NULL;
2956 return 0;
2957 }
2958
2959 j->fields_offset = 0;
2960 j->fields_hash_table_index = 0;
2961 continue;
2962 }
2963
2964 } else {
2965 /* We are already positioned at a field. If so, let's figure out the next field from it */
2966
2967 r = journal_file_move_to_object(f, OBJECT_FIELD, j->fields_offset, &o);
2968 if (r < 0)
2969 return r;
2970
2971 j->fields_offset = le64toh(o->field.next_hash_offset);
2972 if (j->fields_offset == 0) {
2973 /* Reached the end of the hash table chain */
2974 j->fields_hash_table_index++;
2975 continue;
2976 }
2977 }
2978
2979 /* We use OBJECT_UNUSED here, so that the iterator below doesn't remove our mmap window */
2980 r = journal_file_move_to_object(f, OBJECT_UNUSED, j->fields_offset, &o);
2981 if (r < 0)
2982 return r;
2983
2984 /* Because we used OBJECT_UNUSED above, we need to do our type check manually */
2985 if (o->object.type != OBJECT_FIELD) {
2986 log_debug("%s:offset " OFSfmt ": object has type %i, expected %i", f->path, j->fields_offset, o->object.type, OBJECT_FIELD);
2987 return -EBADMSG;
2988 }
2989
2990 sz = le64toh(o->object.size) - offsetof(Object, field.payload);
2991
2992 /* Let's see if we already returned this field name before. */
2993 found = false;
2994 ORDERED_HASHMAP_FOREACH(of, j->files, i) {
2995 if (of == f)
2996 break;
2997
2998 /* Skip this file it didn't have any fields indexed */
2999 if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
3000 continue;
3001
3002 r = journal_file_find_field_object_with_hash(of, o->field.payload, sz, le64toh(o->field.hash), NULL, NULL);
3003 if (r < 0)
3004 return r;
3005 if (r > 0) {
3006 found = true;
3007 break;
3008 }
3009 }
3010
3011 if (found)
3012 continue;
3013
3014 /* Check if this is really a valid string containing no NUL byte */
3015 if (memchr(o->field.payload, 0, sz))
3016 return -EBADMSG;
3017
3018 if (sz > j->data_threshold)
3019 sz = j->data_threshold;
3020
3021 if (!GREEDY_REALLOC(j->fields_buffer, j->fields_buffer_allocated, sz + 1))
3022 return -ENOMEM;
3023
3024 memcpy(j->fields_buffer, o->field.payload, sz);
3025 j->fields_buffer[sz] = 0;
3026
3027 if (!field_is_valid(j->fields_buffer))
3028 return -EBADMSG;
3029
3030 *field = j->fields_buffer;
3031 return 1;
3032 }
3033 }
3034
3035 _public_ void sd_journal_restart_fields(sd_journal *j) {
3036 if (!j)
3037 return;
3038
3039 j->fields_file = NULL;
3040 j->fields_hash_table_index = 0;
3041 j->fields_offset = 0;
3042 j->fields_file_lost = false;
3043 }
3044
3045 _public_ int sd_journal_reliable_fd(sd_journal *j) {
3046 assert_return(j, -EINVAL);
3047 assert_return(!journal_pid_changed(j), -ECHILD);
3048
3049 return !j->on_network;
3050 }
3051
3052 static char *lookup_field(const char *field, void *userdata) {
3053 sd_journal *j = userdata;
3054 const void *data;
3055 size_t size, d;
3056 int r;
3057
3058 assert(field);
3059 assert(j);
3060
3061 r = sd_journal_get_data(j, field, &data, &size);
3062 if (r < 0 ||
3063 size > REPLACE_VAR_MAX)
3064 return strdup(field);
3065
3066 d = strlen(field) + 1;
3067
3068 return strndup((const char*) data + d, size - d);
3069 }
3070
3071 _public_ int sd_journal_get_catalog(sd_journal *j, char **ret) {
3072 const void *data;
3073 size_t size;
3074 sd_id128_t id;
3075 _cleanup_free_ char *text = NULL, *cid = NULL;
3076 char *t;
3077 int r;
3078
3079 assert_return(j, -EINVAL);
3080 assert_return(!journal_pid_changed(j), -ECHILD);
3081 assert_return(ret, -EINVAL);
3082
3083 r = sd_journal_get_data(j, "MESSAGE_ID", &data, &size);
3084 if (r < 0)
3085 return r;
3086
3087 cid = strndup((const char*) data + 11, size - 11);
3088 if (!cid)
3089 return -ENOMEM;
3090
3091 r = sd_id128_from_string(cid, &id);
3092 if (r < 0)
3093 return r;
3094
3095 r = catalog_get(CATALOG_DATABASE, id, &text);
3096 if (r < 0)
3097 return r;
3098
3099 t = replace_var(text, lookup_field, j);
3100 if (!t)
3101 return -ENOMEM;
3102
3103 *ret = t;
3104 return 0;
3105 }
3106
3107 _public_ int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **ret) {
3108 assert_return(ret, -EINVAL);
3109
3110 return catalog_get(CATALOG_DATABASE, id, ret);
3111 }
3112
3113 _public_ int sd_journal_set_data_threshold(sd_journal *j, size_t sz) {
3114 assert_return(j, -EINVAL);
3115 assert_return(!journal_pid_changed(j), -ECHILD);
3116
3117 j->data_threshold = sz;
3118 return 0;
3119 }
3120
3121 _public_ int sd_journal_get_data_threshold(sd_journal *j, size_t *sz) {
3122 assert_return(j, -EINVAL);
3123 assert_return(!journal_pid_changed(j), -ECHILD);
3124 assert_return(sz, -EINVAL);
3125
3126 *sz = j->data_threshold;
3127 return 0;
3128 }
3129
3130 _public_ int sd_journal_has_runtime_files(sd_journal *j) {
3131 assert_return(j, -EINVAL);
3132
3133 return j->has_runtime_files;
3134 }
3135
3136 _public_ int sd_journal_has_persistent_files(sd_journal *j) {
3137 assert_return(j, -EINVAL);
3138
3139 return j->has_persistent_files;
3140 }