1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
6 #include <linux/magic.h>
9 #include <sys/inotify.h>
13 #include "sd-journal.h"
15 #include "alloc-util.h"
18 #include "dirent-util.h"
23 #include "format-util.h"
26 #include "hostname-util.h"
27 #include "id128-util.h"
28 #include "inotify-util.h"
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-internal.h"
35 #include "nulstr-util.h"
36 #include "origin-id.h"
37 #include "path-util.h"
39 #include "process-util.h"
40 #include "replace-var.h"
41 #include "sort-util.h"
42 #include "stat-util.h"
43 #include "stdio-util.h"
44 #include "string-util.h"
46 #include "syslog-util.h"
47 #include "uid-classification.h"
49 #define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
51 /* The maximum size of variable values we'll expand in catalog entries. We bind this to PATH_MAX for now, as
52 * we want to be able to show all officially valid paths at least */
53 #define REPLACE_VAR_MAX PATH_MAX
55 #define DEFAULT_DATA_THRESHOLD (64*1024)
57 DEFINE_PRIVATE_ORIGIN_ID_HELPERS(sd_journal
, journal
);
59 static void remove_file_real(sd_journal
*j
, JournalFile
*f
);
60 static int journal_file_read_tail_timestamp(sd_journal
*j
, JournalFile
*f
);
61 static void journal_file_unlink_newest_by_boot_id(sd_journal
*j
, JournalFile
*f
);
63 static int journal_put_error(sd_journal
*j
, int r
, const char *path
) {
64 _cleanup_free_
char *copy
= NULL
;
67 /* Memorize an error we encountered, and store which
68 * file/directory it was generated from. Note that we store
69 * only *one* path per error code, as the error code is the
70 * key into the hashmap, and the path is the value. This means
71 * we keep track only of all error kinds, but not of all error
72 * locations. This has the benefit that the hashmap cannot
75 * We return an error here only if we didn't manage to
76 * memorize the real error. */
87 k
= hashmap_ensure_put(&j
->errors
, NULL
, INT_TO_PTR(r
), copy
);
99 static void detach_location(sd_journal
*j
) {
104 j
->current_file
= NULL
;
105 j
->current_field
= 0;
107 ORDERED_HASHMAP_FOREACH(f
, j
->files
)
108 journal_file_reset_location(f
);
111 static void init_location(Location
*l
, LocationType type
, JournalFile
*f
, Object
*o
) {
113 assert(IN_SET(type
, LOCATION_DISCRETE
, LOCATION_SEEK
));
118 .seqnum
= le64toh(o
->entry
.seqnum
),
119 .seqnum_id
= f
->header
->seqnum_id
,
120 .realtime
= le64toh(o
->entry
.realtime
),
121 .monotonic
= le64toh(o
->entry
.monotonic
),
122 .boot_id
= o
->entry
.boot_id
,
123 .xor_hash
= le64toh(o
->entry
.xor_hash
),
125 .realtime_set
= true,
126 .monotonic_set
= true,
127 .xor_hash_set
= true,
131 static void set_location(sd_journal
*j
, JournalFile
*f
, Object
*o
) {
136 init_location(&j
->current_location
, LOCATION_DISCRETE
, f
, o
);
139 j
->current_field
= 0;
141 /* Let f know its candidate entry was picked. */
142 assert(f
->location_type
== LOCATION_SEEK
);
143 f
->location_type
= LOCATION_DISCRETE
;
146 static int match_is_valid(const void *data
, size_t size
) {
147 const char *b
= ASSERT_PTR(data
);
152 if (((char*) data
)[0] == '_' && ((char*) data
)[1] == '_')
155 for (const char *p
= b
; p
< b
+ size
; p
++) {
163 if (*p
>= 'A' && *p
<= 'Z')
166 if (ascii_isdigit(*p
))
175 static bool same_field(const void *_a
, size_t s
, const void *_b
, size_t t
) {
176 const uint8_t *a
= _a
, *b
= _b
;
178 for (size_t j
= 0; j
< s
&& j
< t
; j
++) {
187 assert_not_reached();
190 static Match
*match_new(Match
*p
, MatchType t
) {
203 LIST_PREPEND(matches
, p
->matches
, m
);
208 static Match
*match_free(Match
*m
) {
212 match_free(m
->matches
);
215 LIST_REMOVE(matches
, m
->parent
->matches
, m
);
221 static Match
*match_free_if_empty(Match
*m
) {
222 if (!m
|| m
->matches
)
225 return match_free(m
);
228 _public_
int sd_journal_add_match(sd_journal
*j
, const void *data
, size_t size
) {
229 Match
*add_here
= NULL
, *m
= NULL
;
232 assert_return(j
, -EINVAL
);
233 assert_return(!journal_origin_changed(j
), -ECHILD
);
234 assert_return(data
, -EINVAL
);
236 /* If the size is unspecified, assume it's a string. Note: 0 is the public value we document for
237 * this, for historical reasons. Internally, we pretty widely started using SIZE_MAX for this in
238 * similar cases however, hence accept that too. And internally we actually prefer it, to make things
239 * less surprising. */
240 if (IN_SET(size
, 0, SIZE_MAX
))
243 if (!match_is_valid(data
, size
))
250 * level 4: concrete matches */
253 j
->level0
= match_new(NULL
, MATCH_AND_TERM
);
259 j
->level1
= match_new(j
->level0
, MATCH_OR_TERM
);
265 j
->level2
= match_new(j
->level1
, MATCH_AND_TERM
);
270 assert(j
->level0
->type
== MATCH_AND_TERM
);
271 assert(j
->level1
->type
== MATCH_OR_TERM
);
272 assert(j
->level2
->type
== MATCH_AND_TERM
);
274 /* Old-style Jenkins (unkeyed) hashing only here. We do not cover new-style siphash (keyed) hashing
275 * here, since it's different for each file, and thus can't be pre-calculated in the Match object. */
276 hash
= jenkins_hash64(data
, size
);
278 LIST_FOREACH(matches
, l3
, j
->level2
->matches
) {
279 assert(l3
->type
== MATCH_OR_TERM
);
281 LIST_FOREACH(matches
, l4
, l3
->matches
) {
282 assert(l4
->type
== MATCH_DISCRETE
);
284 /* Exactly the same match already? Then ignore
286 if (l4
->hash
== hash
&&
288 memcmp(l4
->data
, data
, size
) == 0)
291 /* Same field? Then let's add this to this OR term */
292 if (same_field(data
, size
, l4
->data
, l4
->size
)) {
303 add_here
= match_new(j
->level2
, MATCH_OR_TERM
);
308 m
= match_new(add_here
, MATCH_DISCRETE
);
314 m
->data
= memdup(data
, size
);
324 match_free_if_empty(add_here
);
325 j
->level2
= match_free_if_empty(j
->level2
);
326 j
->level1
= match_free_if_empty(j
->level1
);
327 j
->level0
= match_free_if_empty(j
->level0
);
332 int journal_add_match_pair(sd_journal
*j
, const char *field
, const char *value
) {
333 _cleanup_free_
char *s
= NULL
;
339 s
= strjoin(field
, "=", value
);
343 return sd_journal_add_match(j
, s
, SIZE_MAX
);
346 int journal_add_matchf(sd_journal
*j
, const char *format
, ...) {
347 _cleanup_free_
char *s
= NULL
;
354 va_start(ap
, format
);
355 r
= vasprintf(&s
, format
, ap
);
360 return sd_journal_add_match(j
, s
, SIZE_MAX
);
363 _public_
int sd_journal_add_conjunction(sd_journal
*j
) {
364 assert_return(j
, -EINVAL
);
365 assert_return(!journal_origin_changed(j
), -ECHILD
);
373 if (!j
->level1
->matches
)
382 _public_
int sd_journal_add_disjunction(sd_journal
*j
) {
383 assert_return(j
, -EINVAL
);
384 assert_return(!journal_origin_changed(j
), -ECHILD
);
395 if (!j
->level2
->matches
)
402 static char *match_make_string(Match
*m
) {
403 _cleanup_free_
char *p
= NULL
;
404 bool enclose
= false;
407 return strdup("none");
409 if (m
->type
== MATCH_DISCRETE
)
410 return cescape_length(m
->data
, m
->size
);
412 LIST_FOREACH(matches
, i
, m
->matches
) {
413 _cleanup_free_
char *t
= NULL
;
415 t
= match_make_string(i
);
420 if (!strextend(&p
, m
->type
== MATCH_OR_TERM
? " OR " : " AND ", t
))
429 return strjoin("(", p
, ")");
434 char *journal_make_match_string(sd_journal
*j
) {
437 return match_make_string(j
->level0
);
440 _public_
void sd_journal_flush_matches(sd_journal
*j
) {
441 if (!j
|| journal_origin_changed(j
))
445 match_free(j
->level0
);
447 j
->level0
= j
->level1
= j
->level2
= NULL
;
452 static int newest_by_boot_id_compare(const NewestByBootId
*a
, const NewestByBootId
*b
) {
453 return id128_compare_func(&a
->boot_id
, &b
->boot_id
);
456 static void journal_file_unlink_newest_by_boot_id(sd_journal
*j
, JournalFile
*f
) {
457 NewestByBootId
*found
;
462 if (f
->newest_boot_id_prioq_idx
== PRIOQ_IDX_NULL
) /* not linked currently, hence this is a NOP */
465 found
= typesafe_bsearch(&(NewestByBootId
) { .boot_id
= f
->newest_boot_id
},
466 j
->newest_by_boot_id
, j
->n_newest_by_boot_id
, newest_by_boot_id_compare
);
469 assert_se(prioq_remove(found
->prioq
, f
, &f
->newest_boot_id_prioq_idx
) > 0);
470 f
->newest_boot_id_prioq_idx
= PRIOQ_IDX_NULL
;
472 /* The prioq may be empty, but that should not cause any issue. Let's keep it. */
475 static void journal_clear_newest_by_boot_id(sd_journal
*j
) {
476 FOREACH_ARRAY(i
, j
->newest_by_boot_id
, j
->n_newest_by_boot_id
) {
479 while ((f
= prioq_peek(i
->prioq
)))
480 journal_file_unlink_newest_by_boot_id(j
, f
);
482 prioq_free(i
->prioq
);
485 j
->newest_by_boot_id
= mfree(j
->newest_by_boot_id
);
486 j
->n_newest_by_boot_id
= 0;
489 static int journal_file_newest_monotonic_compare(const void *a
, const void *b
) {
490 const JournalFile
*x
= a
, *y
= b
;
492 return -CMP(x
->newest_monotonic_usec
, y
->newest_monotonic_usec
); /* Invert order, we want newest first! */
495 static int journal_file_reshuffle_newest_by_boot_id(sd_journal
*j
, JournalFile
*f
) {
496 NewestByBootId
*found
;
502 found
= typesafe_bsearch(&(NewestByBootId
) { .boot_id
= f
->newest_boot_id
},
503 j
->newest_by_boot_id
, j
->n_newest_by_boot_id
, newest_by_boot_id_compare
);
505 /* There's already a priority queue for this boot ID */
507 if (f
->newest_boot_id_prioq_idx
== PRIOQ_IDX_NULL
) {
508 r
= prioq_put(found
->prioq
, f
, &f
->newest_boot_id_prioq_idx
); /* Insert if we aren't in there yet */
512 prioq_reshuffle(found
->prioq
, f
, &f
->newest_boot_id_prioq_idx
); /* Reshuffle otherwise */
515 _cleanup_(prioq_freep
) Prioq
*q
= NULL
;
517 /* No priority queue yet, then allocate one */
519 assert(f
->newest_boot_id_prioq_idx
== PRIOQ_IDX_NULL
); /* we can't be a member either */
521 q
= prioq_new(journal_file_newest_monotonic_compare
);
525 r
= prioq_put(q
, f
, &f
->newest_boot_id_prioq_idx
);
529 if (!GREEDY_REALLOC(j
->newest_by_boot_id
, j
->n_newest_by_boot_id
+ 1)) {
530 f
->newest_boot_id_prioq_idx
= PRIOQ_IDX_NULL
;
534 j
->newest_by_boot_id
[j
->n_newest_by_boot_id
++] = (NewestByBootId
) {
535 .boot_id
= f
->newest_boot_id
,
536 .prioq
= TAKE_PTR(q
),
539 typesafe_qsort(j
->newest_by_boot_id
, j
->n_newest_by_boot_id
, newest_by_boot_id_compare
);
545 static int journal_file_find_newest_for_boot_id(
550 JournalFile
*prev
= NULL
;
556 /* Before we use it, let's refresh the timestamp from the header, and reshuffle our prioq
557 * accordingly. We do this only a bunch of times, to not be caught in some update loop. */
558 for (unsigned n_tries
= 0;; n_tries
++) {
559 NewestByBootId
*found
;
562 found
= typesafe_bsearch(&(NewestByBootId
) { .boot_id
= id
},
563 j
->newest_by_boot_id
, j
->n_newest_by_boot_id
, newest_by_boot_id_compare
);
565 f
= found
? prioq_peek(found
->prioq
) : NULL
;
567 return log_debug_errno(SYNTHETIC_ERRNO(ENODATA
),
568 "Requested delta for boot ID %s, but we have no information about that boot ID.", SD_ID128_TO_STRING(id
));
570 if (f
== prev
|| n_tries
>= 5) {
571 /* This was already the best answer in the previous run, or we tried too often, use it */
578 /* Let's read the journal file's current timestamp once, before we return it, maybe it has changed. */
579 r
= journal_file_read_tail_timestamp(j
, f
);
581 return log_debug_errno(r
, "Failed to read tail timestamp while trying to find newest journal file for boot ID %s.", SD_ID128_TO_STRING(id
));
583 /* No new entry found. */
588 /* Refreshing the timestamp we read might have reshuffled the prioq, hence let's check the
589 * prioq again and only use the information once we reached an equilibrium or hit a limit */
593 static int compare_boot_ids(sd_journal
*j
, sd_id128_t a
, sd_id128_t b
) {
598 /* Try to find the newest open journal file for the two boot ids */
599 if (journal_file_find_newest_for_boot_id(j
, a
, &x
) < 0 ||
600 journal_file_find_newest_for_boot_id(j
, b
, &y
) < 0)
603 /* Only compare the boot id timestamps if they originate from the same machine. If they are from
604 * different machines, then we timestamps of the boot ids might be as off as the timestamps on the
605 * entries and hence not useful for comparing. */
606 if (!sd_id128_equal(x
->newest_machine_id
, y
->newest_machine_id
))
609 return CMP(x
->newest_realtime_usec
, y
->newest_realtime_usec
);
612 static int compare_with_location(
614 const JournalFile
*f
,
616 const JournalFile
*current_file
) {
622 assert(f
->location_type
== LOCATION_SEEK
);
623 assert(IN_SET(l
->type
, LOCATION_DISCRETE
, LOCATION_SEEK
));
625 if (l
->monotonic_set
&&
626 sd_id128_equal(f
->current_boot_id
, l
->boot_id
) &&
628 f
->current_realtime
== l
->realtime
&&
630 f
->current_xor_hash
== l
->xor_hash
&&
632 sd_id128_equal(f
->header
->seqnum_id
, l
->seqnum_id
) &&
633 f
->current_seqnum
== l
->seqnum
&&
638 sd_id128_equal(f
->header
->seqnum_id
, l
->seqnum_id
)) {
639 r
= CMP(f
->current_seqnum
, l
->seqnum
);
644 if (l
->monotonic_set
) {
645 /* If both arguments have the same boot ID, then we can compare the monotonic timestamps. If
646 * they are distinct, then we might able to lookup the timestamps of those boot IDs (if they
647 * are from the same machine) and order by that. */
648 if (sd_id128_equal(f
->current_boot_id
, l
->boot_id
))
649 r
= CMP(f
->current_monotonic
, l
->monotonic
);
651 r
= compare_boot_ids(j
, f
->current_boot_id
, l
->boot_id
);
656 if (l
->realtime_set
) {
657 r
= CMP(f
->current_realtime
, l
->realtime
);
662 if (l
->xor_hash_set
) {
663 r
= CMP(f
->current_xor_hash
, l
->xor_hash
);
671 static int next_for_match(
675 uint64_t after_offset
,
676 direction_t direction
,
687 if (m
->type
== MATCH_DISCRETE
) {
691 /* If the keyed hash logic is used, we need to calculate the hash fresh per file. Otherwise
692 * we can use what we pre-calculated. */
693 if (JOURNAL_HEADER_KEYED_HASH(f
->header
))
694 hash
= journal_file_hash_data(f
, m
->data
, m
->size
);
698 r
= journal_file_find_data_object_with_hash(f
, m
->data
, m
->size
, hash
, &d
, NULL
);
702 return journal_file_move_to_entry_by_offset_for_data(f
, d
, after_offset
, direction
, ret
, offset
);
704 } else if (m
->type
== MATCH_OR_TERM
) {
706 /* Find the earliest match beyond after_offset */
708 LIST_FOREACH(matches
, i
, m
->matches
) {
711 r
= next_for_match(j
, i
, f
, after_offset
, direction
, NULL
, &cp
);
715 if (np
== 0 || (direction
== DIRECTION_DOWN
? cp
< np
: cp
> np
))
723 } else if (m
->type
== MATCH_AND_TERM
) {
726 /* Always jump to the next matching entry and repeat
727 * this until we find an offset that matches for all
733 r
= next_for_match(j
, m
->matches
, f
, after_offset
, direction
, NULL
, &np
);
737 assert(direction
== DIRECTION_DOWN
? np
>= after_offset
: np
<= after_offset
);
738 last_moved
= m
->matches
;
740 LIST_LOOP_BUT_ONE(matches
, i
, m
->matches
, last_moved
) {
743 r
= next_for_match(j
, i
, f
, np
, direction
, NULL
, &cp
);
747 assert(direction
== DIRECTION_DOWN
? cp
>= np
: cp
<= np
);
748 if (direction
== DIRECTION_DOWN
? cp
> np
: cp
< np
) {
758 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, np
, ret
);
769 static int find_location_for_match(
773 direction_t direction
,
783 if (m
->type
== MATCH_DISCRETE
) {
787 if (JOURNAL_HEADER_KEYED_HASH(f
->header
))
788 hash
= journal_file_hash_data(f
, m
->data
, m
->size
);
792 r
= journal_file_find_data_object_with_hash(f
, m
->data
, m
->size
, hash
, &d
, &dp
);
796 /* FIXME: missing: find by monotonic */
798 if (j
->current_location
.type
== LOCATION_HEAD
)
799 return direction
== DIRECTION_DOWN
? journal_file_move_to_entry_for_data(f
, d
, DIRECTION_DOWN
, ret
, offset
) : 0;
800 if (j
->current_location
.type
== LOCATION_TAIL
)
801 return direction
== DIRECTION_UP
? journal_file_move_to_entry_for_data(f
, d
, DIRECTION_UP
, ret
, offset
) : 0;
802 if (j
->current_location
.seqnum_set
&& sd_id128_equal(j
->current_location
.seqnum_id
, f
->header
->seqnum_id
))
803 return journal_file_move_to_entry_by_seqnum_for_data(f
, d
, j
->current_location
.seqnum
, direction
, ret
, offset
);
804 if (j
->current_location
.monotonic_set
) {
805 r
= journal_file_move_to_entry_by_monotonic_for_data(f
, d
, j
->current_location
.boot_id
, j
->current_location
.monotonic
, direction
, ret
, offset
);
809 /* The data object might have been invalidated. */
810 r
= journal_file_move_to_object(f
, OBJECT_DATA
, dp
, &d
);
814 if (j
->current_location
.realtime_set
)
815 return journal_file_move_to_entry_by_realtime_for_data(f
, d
, j
->current_location
.realtime
, direction
, ret
, offset
);
817 return journal_file_move_to_entry_for_data(f
, d
, direction
, ret
, offset
);
819 } else if (m
->type
== MATCH_OR_TERM
) {
822 /* Find the earliest match */
824 LIST_FOREACH(matches
, i
, m
->matches
) {
827 r
= find_location_for_match(j
, i
, f
, direction
, NULL
, &cp
);
831 if (np
== 0 || (direction
== DIRECTION_DOWN
? np
> cp
: np
< cp
))
840 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, np
, ret
);
853 assert(m
->type
== MATCH_AND_TERM
);
855 /* First jump to the last match, and then find the
856 * next one where all matches match */
861 LIST_FOREACH(matches
, i
, m
->matches
) {
864 r
= find_location_for_match(j
, i
, f
, direction
, NULL
, &cp
);
868 if (np
== 0 || (direction
== DIRECTION_DOWN
? cp
> np
: cp
< np
))
872 return next_for_match(j
, m
, f
, np
, direction
, ret
, offset
);
876 static int find_location_with_matches(
879 direction_t direction
,
891 /* No matches is simple */
893 if (j
->current_location
.type
== LOCATION_HEAD
)
894 return direction
== DIRECTION_DOWN
? journal_file_next_entry(f
, 0, DIRECTION_DOWN
, ret
, offset
) : 0;
895 if (j
->current_location
.type
== LOCATION_TAIL
)
896 return direction
== DIRECTION_UP
? journal_file_next_entry(f
, 0, DIRECTION_UP
, ret
, offset
) : 0;
897 if (j
->current_location
.seqnum_set
&& sd_id128_equal(j
->current_location
.seqnum_id
, f
->header
->seqnum_id
))
898 return journal_file_move_to_entry_by_seqnum(f
, j
->current_location
.seqnum
, direction
, ret
, offset
);
899 if (j
->current_location
.monotonic_set
) {
900 r
= journal_file_move_to_entry_by_monotonic(f
, j
->current_location
.boot_id
, j
->current_location
.monotonic
, direction
, ret
, offset
);
904 if (j
->current_location
.realtime_set
)
905 return journal_file_move_to_entry_by_realtime(f
, j
->current_location
.realtime
, direction
, ret
, offset
);
907 return journal_file_next_entry(f
, 0, direction
, ret
, offset
);
909 return find_location_for_match(j
, j
->level0
, f
, direction
, ret
, offset
);
912 static int next_with_matches(
915 direction_t direction
,
924 /* No matches is easy. We simple advance the file
927 return journal_file_next_entry(f
, f
->current_offset
, direction
, ret
, offset
);
929 /* If we have a match then we look for the next matching entry
930 * with an offset at least one step larger */
931 return next_for_match(j
, j
->level0
, f
,
932 direction
== DIRECTION_DOWN
? f
->current_offset
+ 1
933 : f
->current_offset
- 1,
934 direction
, ret
, offset
);
937 static int next_beyond_location(sd_journal
*j
, JournalFile
*f
, direction_t direction
) {
939 uint64_t cp
, n_entries
;
945 (void) journal_file_read_tail_timestamp(j
, f
);
947 n_entries
= le64toh(f
->header
->n_entries
);
949 /* If we hit EOF before, we don't need to look into this file again
950 * unless direction changed or new entries appeared. */
951 if (f
->last_direction
== direction
&&
952 f
->location_type
== (direction
== DIRECTION_DOWN
? LOCATION_TAIL
: LOCATION_HEAD
) &&
953 n_entries
== f
->last_n_entries
)
956 f
->last_n_entries
= n_entries
;
958 if (f
->last_direction
== direction
&& f
->current_offset
> 0) {
959 /* LOCATION_SEEK here means we did the work in a previous
960 * iteration and the current location already points to a
961 * candidate entry. */
962 if (f
->location_type
!= LOCATION_SEEK
) {
963 r
= next_with_matches(j
, f
, direction
, &c
, &cp
);
967 journal_file_save_location(f
, c
, cp
);
970 f
->last_direction
= direction
;
972 r
= find_location_with_matches(j
, f
, direction
, &c
, &cp
);
976 journal_file_save_location(f
, c
, cp
);
979 /* OK, we found the spot, now let's advance until an entry
980 * that is actually different from what we were previously
981 * looking at. This is necessary to handle entries which exist
982 * in two (or more) journal files, and which shall all be
983 * suppressed but one. */
988 if (j
->current_location
.type
== LOCATION_DISCRETE
) {
991 k
= compare_with_location(j
, f
, &j
->current_location
, j
->current_file
);
993 found
= direction
== DIRECTION_DOWN
? k
> 0 : k
< 0;
1000 r
= next_with_matches(j
, f
, direction
, &c
, &cp
);
1004 journal_file_save_location(f
, c
, cp
);
1008 static int compare_locations(sd_journal
*j
, JournalFile
*af
, JournalFile
*bf
) {
1016 assert(af
->location_type
== LOCATION_SEEK
);
1017 assert(bf
->location_type
== LOCATION_SEEK
);
1019 /* If contents, timestamps and seqnum match, these entries are identical. */
1020 if (sd_id128_equal(af
->current_boot_id
, bf
->current_boot_id
) &&
1021 af
->current_monotonic
== bf
->current_monotonic
&&
1022 af
->current_realtime
== bf
->current_realtime
&&
1023 af
->current_xor_hash
== bf
->current_xor_hash
&&
1024 sd_id128_equal(af
->header
->seqnum_id
, bf
->header
->seqnum_id
) &&
1025 af
->current_seqnum
== bf
->current_seqnum
)
1028 if (sd_id128_equal(af
->header
->seqnum_id
, bf
->header
->seqnum_id
)) {
1029 /* If this is from the same seqnum source, compare seqnums */
1030 r
= CMP(af
->current_seqnum
, bf
->current_seqnum
);
1034 /* Wow! This is weird, different data but the same seqnums? Something is borked, but let's
1035 * make the best of it and compare by time. */
1038 if (sd_id128_equal(af
->current_boot_id
, bf
->current_boot_id
))
1039 /* If the boot id matches, compare monotonic time */
1040 r
= CMP(af
->current_monotonic
, bf
->current_monotonic
);
1042 /* If they don't match try to compare boot IDs */
1043 r
= compare_boot_ids(j
, af
->current_boot_id
, bf
->current_boot_id
);
1047 /* Otherwise, compare UTC time */
1048 r
= CMP(af
->current_realtime
, bf
->current_realtime
);
1052 /* Finally, compare by contents */
1053 return CMP(af
->current_xor_hash
, bf
->current_xor_hash
);
1056 static int real_journal_next(sd_journal
*j
, direction_t direction
) {
1057 JournalFile
*new_file
= NULL
;
1063 assert_return(j
, -EINVAL
);
1064 assert_return(!journal_origin_changed(j
), -ECHILD
);
1066 r
= iterated_cache_get(j
->files_cache
, NULL
, &files
, &n_files
);
1070 FOREACH_ARRAY(_f
, files
, n_files
) {
1071 JournalFile
*f
= (JournalFile
*) *_f
;
1074 r
= next_beyond_location(j
, f
, direction
);
1076 log_debug_errno(r
, "Can't iterate through %s, ignoring: %m", f
->path
);
1077 remove_file_real(j
, f
);
1079 } else if (r
== 0) {
1080 f
->location_type
= direction
== DIRECTION_DOWN
? LOCATION_TAIL
: LOCATION_HEAD
;
1089 k
= compare_locations(j
, f
, new_file
);
1091 found
= direction
== DIRECTION_DOWN
? k
< 0 : k
> 0;
1101 r
= journal_file_move_to_object(new_file
, OBJECT_ENTRY
, new_file
->current_offset
, &o
);
1105 set_location(j
, new_file
, o
);
1110 _public_
int sd_journal_next(sd_journal
*j
) {
1111 return real_journal_next(j
, DIRECTION_DOWN
);
1114 _public_
int sd_journal_previous(sd_journal
*j
) {
1115 return real_journal_next(j
, DIRECTION_UP
);
1118 _public_
int sd_journal_step_one(sd_journal
*j
, int advanced
) {
1119 assert_return(j
, -EINVAL
);
1121 if (j
->current_location
.type
== LOCATION_HEAD
)
1122 return sd_journal_next(j
);
1123 if (j
->current_location
.type
== LOCATION_TAIL
)
1124 return sd_journal_previous(j
);
1125 return real_journal_next(j
, advanced
? DIRECTION_DOWN
: DIRECTION_UP
);
1128 static int real_journal_next_skip(sd_journal
*j
, direction_t direction
, uint64_t skip
) {
1131 assert_return(j
, -EINVAL
);
1132 assert_return(!journal_origin_changed(j
), -ECHILD
);
1133 assert_return(skip
<= INT_MAX
, -ERANGE
);
1136 /* If this is not a discrete skip, then at least
1137 * resolve the current location */
1138 if (j
->current_location
.type
!= LOCATION_DISCRETE
) {
1139 r
= real_journal_next(j
, direction
);
1148 r
= real_journal_next(j
, direction
);
1162 _public_
int sd_journal_next_skip(sd_journal
*j
, uint64_t skip
) {
1163 return real_journal_next_skip(j
, DIRECTION_DOWN
, skip
);
1166 _public_
int sd_journal_previous_skip(sd_journal
*j
, uint64_t skip
) {
1167 return real_journal_next_skip(j
, DIRECTION_UP
, skip
);
1170 _public_
int sd_journal_get_cursor(sd_journal
*j
, char **cursor
) {
1174 assert_return(j
, -EINVAL
);
1175 assert_return(!journal_origin_changed(j
), -ECHILD
);
1176 assert_return(cursor
, -EINVAL
);
1178 if (!j
->current_file
|| j
->current_file
->current_offset
<= 0)
1179 return -EADDRNOTAVAIL
;
1181 r
= journal_file_move_to_object(j
->current_file
, OBJECT_ENTRY
, j
->current_file
->current_offset
, &o
);
1185 if (asprintf(cursor
,
1186 "s=%s;i=%"PRIx64
";b=%s;m=%"PRIx64
";t=%"PRIx64
";x=%"PRIx64
,
1187 SD_ID128_TO_STRING(j
->current_file
->header
->seqnum_id
), le64toh(o
->entry
.seqnum
),
1188 SD_ID128_TO_STRING(o
->entry
.boot_id
), le64toh(o
->entry
.monotonic
),
1189 le64toh(o
->entry
.realtime
),
1190 le64toh(o
->entry
.xor_hash
)) < 0)
1196 _public_
int sd_journal_seek_cursor(sd_journal
*j
, const char *cursor
) {
1197 unsigned long long seqnum
, monotonic
, realtime
, xor_hash
;
1198 bool seqnum_id_set
= false,
1200 boot_id_set
= false,
1201 monotonic_set
= false,
1202 realtime_set
= false,
1203 xor_hash_set
= false;
1204 sd_id128_t seqnum_id
, boot_id
;
1207 assert_return(j
, -EINVAL
);
1208 assert_return(!journal_origin_changed(j
), -ECHILD
);
1209 assert_return(!isempty(cursor
), -EINVAL
);
1211 for (const char *p
= cursor
;;) {
1212 _cleanup_free_
char *word
= NULL
;
1214 r
= extract_first_word(&p
, &word
, ";", EXTRACT_DONT_COALESCE_SEPARATORS
);
1220 if (word
[0] == '\0' || word
[1] != '=')
1225 seqnum_id_set
= true;
1226 r
= sd_id128_from_string(word
+ 2, &seqnum_id
);
1233 if (sscanf(word
+ 2, "%llx", &seqnum
) != 1)
1239 r
= sd_id128_from_string(word
+ 2, &boot_id
);
1245 monotonic_set
= true;
1246 if (sscanf(word
+ 2, "%llx", &monotonic
) != 1)
1251 realtime_set
= true;
1252 if (sscanf(word
+ 2, "%llx", &realtime
) != 1)
1257 xor_hash_set
= true;
1258 if (sscanf(word
+ 2, "%llx", &xor_hash
) != 1)
1264 if ((!seqnum_set
|| !seqnum_id_set
) &&
1265 (!monotonic_set
|| !boot_id_set
) &&
1270 j
->current_location
= (Location
) {
1271 .type
= LOCATION_SEEK
,
1275 j
->current_location
.realtime
= (uint64_t) realtime
;
1276 j
->current_location
.realtime_set
= true;
1279 if (seqnum_set
&& seqnum_id_set
) {
1280 j
->current_location
.seqnum
= (uint64_t) seqnum
;
1281 j
->current_location
.seqnum_id
= seqnum_id
;
1282 j
->current_location
.seqnum_set
= true;
1285 if (monotonic_set
&& boot_id_set
) {
1286 j
->current_location
.monotonic
= (uint64_t) monotonic
;
1287 j
->current_location
.boot_id
= boot_id
;
1288 j
->current_location
.monotonic_set
= true;
1292 j
->current_location
.xor_hash
= (uint64_t) xor_hash
;
1293 j
->current_location
.xor_hash_set
= true;
1299 _public_
int sd_journal_test_cursor(sd_journal
*j
, const char *cursor
) {
1303 assert_return(j
, -EINVAL
);
1304 assert_return(!journal_origin_changed(j
), -ECHILD
);
1305 assert_return(!isempty(cursor
), -EINVAL
);
1307 if (!j
->current_file
|| j
->current_file
->current_offset
<= 0)
1308 return -EADDRNOTAVAIL
;
1310 r
= journal_file_move_to_object(j
->current_file
, OBJECT_ENTRY
, j
->current_file
->current_offset
, &o
);
1315 _cleanup_free_
char *item
= NULL
;
1316 unsigned long long ll
;
1320 r
= extract_first_word(&cursor
, &item
, ";", EXTRACT_DONT_COALESCE_SEPARATORS
);
1327 if (strlen(item
) < 2 || item
[1] != '=')
1333 k
= sd_id128_from_string(item
+2, &id
);
1336 if (!sd_id128_equal(id
, j
->current_file
->header
->seqnum_id
))
1341 if (sscanf(item
+2, "%llx", &ll
) != 1)
1343 if (ll
!= le64toh(o
->entry
.seqnum
))
1348 k
= sd_id128_from_string(item
+2, &id
);
1351 if (!sd_id128_equal(id
, o
->entry
.boot_id
))
1356 if (sscanf(item
+2, "%llx", &ll
) != 1)
1358 if (ll
!= le64toh(o
->entry
.monotonic
))
1363 if (sscanf(item
+2, "%llx", &ll
) != 1)
1365 if (ll
!= le64toh(o
->entry
.realtime
))
1370 if (sscanf(item
+2, "%llx", &ll
) != 1)
1372 if (ll
!= le64toh(o
->entry
.xor_hash
))
1381 _public_
int sd_journal_seek_monotonic_usec(sd_journal
*j
, sd_id128_t boot_id
, uint64_t usec
) {
1382 assert_return(j
, -EINVAL
);
1383 assert_return(!journal_origin_changed(j
), -ECHILD
);
1387 j
->current_location
= (Location
) {
1388 .type
= LOCATION_SEEK
,
1391 .monotonic_set
= true,
1397 _public_
int sd_journal_seek_realtime_usec(sd_journal
*j
, uint64_t usec
) {
1398 assert_return(j
, -EINVAL
);
1399 assert_return(!journal_origin_changed(j
), -ECHILD
);
1403 j
->current_location
= (Location
) {
1404 .type
= LOCATION_SEEK
,
1406 .realtime_set
= true,
1412 _public_
int sd_journal_seek_head(sd_journal
*j
) {
1413 assert_return(j
, -EINVAL
);
1414 assert_return(!journal_origin_changed(j
), -ECHILD
);
1418 j
->current_location
= (Location
) {
1419 .type
= LOCATION_HEAD
,
1425 _public_
int sd_journal_seek_tail(sd_journal
*j
) {
1426 assert_return(j
, -EINVAL
);
1427 assert_return(!journal_origin_changed(j
), -ECHILD
);
1431 j
->current_location
= (Location
) {
1432 .type
= LOCATION_TAIL
,
1438 static void check_network(sd_journal
*j
, int fd
) {
1444 j
->on_network
= fd_is_network_fs(fd
);
1447 static bool file_has_type_prefix(const char *prefix
, const char *filename
) {
1448 const char *full
, *tilded
, *atted
;
1450 full
= strjoina(prefix
, ".journal");
1451 tilded
= strjoina(full
, "~");
1452 atted
= strjoina(prefix
, "@");
1454 return STR_IN_SET(filename
, full
, tilded
) ||
1455 startswith(filename
, atted
);
1458 static bool file_type_wanted(int flags
, const char *filename
) {
1461 if (!ENDSWITH_SET(filename
, ".journal", ".journal~"))
1464 /* no flags set → every type is OK */
1465 if (!(flags
& (SD_JOURNAL_SYSTEM
| SD_JOURNAL_CURRENT_USER
)))
1468 if (FLAGS_SET(flags
, SD_JOURNAL_CURRENT_USER
)) {
1469 char prefix
[5 + DECIMAL_STR_MAX(uid_t
) + 1];
1471 xsprintf(prefix
, "user-" UID_FMT
, getuid());
1473 if (file_has_type_prefix(prefix
, filename
))
1476 /* If SD_JOURNAL_CURRENT_USER is specified and we are invoked under a system UID, then
1477 * automatically enable SD_JOURNAL_SYSTEM too, because journald will actually put system user
1478 * data into the system journal. */
1480 if (uid_for_system_journal(getuid()))
1481 flags
|= SD_JOURNAL_SYSTEM
;
1484 if (FLAGS_SET(flags
, SD_JOURNAL_SYSTEM
) && file_has_type_prefix("system", filename
))
1490 static bool path_has_prefix(sd_journal
*j
, const char *path
, const char *prefix
) {
1495 if (j
->toplevel_fd
>= 0)
1498 return path_startswith(path
, prefix
);
1501 static void track_file_disposition(sd_journal
*j
, JournalFile
*f
) {
1505 if (!j
->has_runtime_files
&& path_has_prefix(j
, f
->path
, "/run"))
1506 j
->has_runtime_files
= true;
1507 else if (!j
->has_persistent_files
&& path_has_prefix(j
, f
->path
, "/var"))
1508 j
->has_persistent_files
= true;
1511 static int add_any_file(
1516 _cleanup_close_
int our_fd
= -EBADF
;
1522 assert(fd
>= 0 || path
);
1525 assert(path
); /* For gcc. */
1526 if (j
->toplevel_fd
>= 0)
1527 /* If there's a top-level fd defined make the path relative, explicitly, since otherwise
1528 * openat() ignores the first argument. */
1530 fd
= our_fd
= openat(j
->toplevel_fd
, skip_leading_slash(path
), O_RDONLY
|O_CLOEXEC
|O_NONBLOCK
);
1532 fd
= our_fd
= open(path
, O_RDONLY
|O_CLOEXEC
|O_NONBLOCK
);
1534 r
= log_debug_errno(errno
, "Failed to open journal file %s: %m", path
);
1538 r
= fd_nonblock(fd
, false);
1540 r
= log_debug_errno(errno
, "Failed to turn off O_NONBLOCK for %s: %m", path
);
1545 if (fstat(fd
, &st
) < 0) {
1546 r
= log_debug_errno(errno
, "Failed to fstat %s: %m", path
?: "fd");
1550 r
= stat_verify_regular(&st
);
1552 log_debug_errno(r
, "Refusing to open %s: %m", path
?: "fd");
1557 f
= ordered_hashmap_get(j
->files
, path
);
1559 if (stat_inode_same(&f
->last_stat
, &st
)) {
1560 /* We already track this file, under the same path and with the same
1561 * device/inode numbers, it's hence really the same. Mark this file as seen
1562 * in this generation. This is used to GC old files in process_q_overflow()
1563 * to detect journal files that are still there and discern them from those
1564 * which are gone. */
1566 f
->last_seen_generation
= j
->generation
;
1567 (void) journal_file_read_tail_timestamp(j
, f
);
1571 /* So we tracked a file under this name, but it has a different inode/device. In that
1572 * case, it got replaced (probably due to rotation?), let's drop it hence from our
1574 remove_file_real(j
, f
);
1579 if (ordered_hashmap_size(j
->files
) >= JOURNAL_FILES_MAX
) {
1580 r
= log_debug_errno(SYNTHETIC_ERRNO(ETOOMANYREFS
),
1581 "Too many open journal files, not adding %s.", path
?: "fd");
1585 r
= journal_file_open(fd
, path
, O_RDONLY
, 0, 0, 0, NULL
, j
->mmap
, NULL
, &f
);
1587 log_debug_errno(r
, "Failed to open journal file %s: %m", path
?: "from fd");
1591 /* journal_file_dump(f); */
1593 /* journal_file_open() generates an replacement fname if necessary, so we can use f->path. */
1594 r
= ordered_hashmap_put(j
->files
, f
->path
, f
);
1596 f
->close_fd
= false; /* Make sure journal_file_close() doesn't close the caller's fd
1597 * (or our own). The caller or we will do that ourselves. */
1598 (void) journal_file_close(f
);
1602 TAKE_FD(our_fd
); /* the fd is now owned by the JournalFile object */
1604 f
->last_seen_generation
= j
->generation
;
1606 track_file_disposition(j
, f
);
1607 check_network(j
, f
->fd
);
1608 (void) journal_file_read_tail_timestamp(j
, f
);
1610 j
->current_invalidate_counter
++;
1612 log_debug("File %s added.", f
->path
);
1617 (void) journal_put_error(j
, r
, path
); /* path==NULL is OK. */
1621 int journal_get_directories(sd_journal
*j
, char ***ret
) {
1622 _cleanup_strv_free_
char **paths
= NULL
;
1625 size_t n
= SIZE_MAX
;
1631 /* This returns parent directories of opened journal files. */
1633 ORDERED_HASHMAP_FOREACH_KEY(f
, p
, j
->files
) {
1634 _cleanup_free_
char *d
= NULL
;
1636 /* Ignore paths generated from fd. */
1637 if (path_startswith(p
, "/proc/"))
1640 r
= path_extract_directory(p
, &d
);
1644 if (path_strv_contains(paths
, d
))
1647 r
= strv_extend_with_size(&paths
, &n
, d
);
1652 *ret
= TAKE_PTR(paths
);
1656 static int add_file_by_name(
1659 const char *filename
) {
1661 _cleanup_free_
char *path
= NULL
;
1667 if (j
->no_new_files
)
1670 if (!file_type_wanted(j
->flags
, filename
))
1673 path
= path_join(prefix
, filename
);
1677 return add_any_file(j
, -1, path
);
1680 static int remove_file_by_name(
1683 const char *filename
) {
1685 _cleanup_free_
char *path
= NULL
;
1692 path
= path_join(prefix
, filename
);
1696 f
= ordered_hashmap_get(j
->files
, path
);
1700 remove_file_real(j
, f
);
1704 static void remove_file_real(sd_journal
*j
, JournalFile
*f
) {
1708 (void) ordered_hashmap_remove(j
->files
, f
->path
);
1710 log_debug("File %s removed.", f
->path
);
1712 if (j
->current_file
== f
) {
1713 j
->current_file
= NULL
;
1714 j
->current_field
= 0;
1717 if (j
->unique_file
== f
) {
1718 /* Jump to the next unique_file or NULL if that one was last */
1719 j
->unique_file
= ordered_hashmap_next(j
->files
, j
->unique_file
->path
);
1720 j
->unique_offset
= 0;
1721 if (!j
->unique_file
)
1722 j
->unique_file_lost
= true;
1725 if (j
->fields_file
== f
) {
1726 j
->fields_file
= ordered_hashmap_next(j
->files
, j
->fields_file
->path
);
1727 j
->fields_offset
= 0;
1728 if (!j
->fields_file
)
1729 j
->fields_file_lost
= true;
1732 journal_file_unlink_newest_by_boot_id(j
, f
);
1733 (void) journal_file_close(f
);
1735 j
->current_invalidate_counter
++;
1738 static int dirname_is_machine_id(const char *fn
) {
1739 sd_id128_t id
, machine
;
1743 /* Returns true if the specified directory name matches the local machine ID */
1745 r
= sd_id128_get_machine(&machine
);
1749 e
= strchr(fn
, '.');
1753 /* Looks like it has a namespace suffix. Verify that. */
1754 if (!log_namespace_name_valid(e
+ 1))
1757 k
= strndupa_safe(fn
, e
- fn
);
1758 r
= sd_id128_from_string(k
, &id
);
1760 r
= sd_id128_from_string(fn
, &id
);
1764 return sd_id128_equal(id
, machine
);
1767 static int dirname_has_namespace(const char *fn
, const char *namespace) {
1770 /* Returns true if the specified directory name matches the specified namespace */
1772 e
= strchr(fn
, '.');
1779 if (!streq(e
+ 1, namespace))
1782 k
= strndupa_safe(fn
, e
- fn
);
1783 return id128_is_valid(k
);
1789 return id128_is_valid(fn
);
1792 static bool dirent_is_journal_file(const struct dirent
*de
) {
1795 /* Returns true if the specified directory entry looks like a journal file we might be interested in */
1797 if (!IN_SET(de
->d_type
, DT_REG
, DT_LNK
, DT_UNKNOWN
))
1800 return endswith(de
->d_name
, ".journal") ||
1801 endswith(de
->d_name
, ".journal~");
1804 static bool dirent_is_journal_subdir(const struct dirent
*de
) {
1808 /* returns true if the specified directory entry looks like a directory that might contain journal
1809 * files we might be interested in, i.e. is either a 128-bit ID or a 128-bit ID suffixed by a
1812 if (!IN_SET(de
->d_type
, DT_DIR
, DT_LNK
, DT_UNKNOWN
))
1815 e
= strchr(de
->d_name
, '.');
1817 return id128_is_valid(de
->d_name
); /* No namespace */
1819 n
= strndupa_safe(de
->d_name
, e
- de
->d_name
);
1820 if (!id128_is_valid(n
))
1823 return log_namespace_name_valid(e
+ 1);
1826 static int directory_open(sd_journal
*j
, const char *path
, DIR **ret
) {
1833 if (j
->toplevel_fd
< 0)
1836 /* Open the specified directory relative to the toplevel fd. Enforce that the path specified is
1837 * relative, by dropping the initial slash */
1838 d
= xopendirat(j
->toplevel_fd
, skip_leading_slash(path
), 0);
1846 static Directory
* directory_free(Directory
*d
) {
1852 hashmap_remove_value(d
->journal
->directories_by_wd
, INT_TO_PTR(d
->wd
), d
) &&
1853 d
->journal
->inotify_fd
>= 0)
1854 (void) inotify_rm_watch(d
->journal
->inotify_fd
, d
->wd
);
1857 hashmap_remove_value(d
->journal
->directories_by_path
, d
->path
, d
);
1862 log_debug("Root directory %s removed.", d
->path
);
1864 log_debug("Directory %s removed.", d
->path
);
1872 DEFINE_TRIVIAL_CLEANUP_FUNC(Directory
*, directory_free
);
1874 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
1875 directories_by_path_hash_ops
,
1882 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
1883 directories_by_wd_hash_ops
,
1886 trivial_compare_func
,
1890 static int add_directory_impl(sd_journal
*j
, const char *path
, bool is_root
, Directory
**ret
) {
1891 _cleanup_(directory_freep
) Directory
*m
= NULL
;
1892 Directory
*existing
;
1899 existing
= hashmap_get(j
->directories_by_path
, path
);
1901 if (existing
->is_root
!= is_root
) {
1902 /* Don't 'downgrade' from root directory */
1911 m
= new(Directory
, 1);
1918 .path
= strdup(path
),
1925 r
= hashmap_ensure_put(&j
->directories_by_path
, &directories_by_path_hash_ops
, m
->path
, m
);
1929 j
->current_invalidate_counter
++;
1932 log_debug("Root directory %s added.", m
->path
);
1934 log_debug("Directory %s added.", m
->path
);
1940 static int add_directory(sd_journal
*j
, const char *prefix
, const char *dirname
);
1942 static void directory_enumerate(sd_journal
*j
, Directory
*m
, DIR *d
) {
1947 FOREACH_DIRENT_ALL(de
, d
, goto fail
) {
1948 if (dirent_is_journal_file(de
))
1949 (void) add_file_by_name(j
, m
->path
, de
->d_name
);
1951 if (m
->is_root
&& dirent_is_journal_subdir(de
))
1952 (void) add_directory(j
, m
->path
, de
->d_name
);
1957 log_debug_errno(errno
, "Failed to enumerate directory %s, ignoring: %m", m
->path
);
1960 static void directory_watch(sd_journal
*j
, Directory
*m
, int fd
, uint32_t mask
) {
1967 /* Watch this directory if that's enabled and if it not being watched yet. */
1969 if (m
->wd
> 0) /* Already have a watch? */
1971 if (j
->inotify_fd
< 0) /* Not watching at all? */
1974 m
->wd
= inotify_add_watch_fd(j
->inotify_fd
, fd
, mask
);
1976 log_debug_errno(m
->wd
, "Failed to watch journal directory '%s', ignoring: %m", m
->path
);
1980 r
= hashmap_ensure_put(&j
->directories_by_wd
, &directories_by_wd_hash_ops
, INT_TO_PTR(m
->wd
), m
);
1983 log_debug_errno(r
, "Directory '%s' already being watched under a different path, ignoring: %m", m
->path
);
1985 log_debug_errno(r
, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m
->path
);
1986 (void) inotify_rm_watch(j
->inotify_fd
, m
->wd
);
1992 static int add_directory(
1995 const char *dirname
) {
1997 _cleanup_free_
char *path
= NULL
;
1998 _cleanup_closedir_
DIR *d
= NULL
;
2005 /* Adds a journal file directory to watch. If the directory is already tracked this updates the inotify watch
2006 * and reenumerates directory contents */
2008 path
= path_join(prefix
, dirname
);
2014 log_debug("Considering directory '%s'.", path
);
2016 /* We consider everything local that is in a directory for the local machine ID, or that is stored in /run */
2017 if ((j
->flags
& SD_JOURNAL_LOCAL_ONLY
) &&
2018 !((dirname
&& dirname_is_machine_id(dirname
) > 0) || path_has_prefix(j
, path
, "/run")))
2022 (!(FLAGS_SET(j
->flags
, SD_JOURNAL_ALL_NAMESPACES
) ||
2023 dirname_has_namespace(dirname
, j
->namespace) > 0 ||
2024 (FLAGS_SET(j
->flags
, SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE
) && dirname_has_namespace(dirname
, NULL
) > 0))))
2027 r
= directory_open(j
, path
, &d
);
2029 log_debug_errno(r
, "Failed to open directory '%s': %m", path
);
2033 r
= add_directory_impl(j
, path
, /* is_root = */ false, &m
);
2039 m
->last_seen_generation
= j
->generation
;
2041 directory_watch(j
, m
, dirfd(d
),
2042 IN_CREATE
|IN_MOVED_TO
|IN_MODIFY
|IN_ATTRIB
|IN_DELETE
|
2043 IN_DELETE_SELF
|IN_MOVE_SELF
|IN_UNMOUNT
|IN_MOVED_FROM
|
2046 if (!j
->no_new_files
)
2047 directory_enumerate(j
, m
, d
);
2049 check_network(j
, dirfd(d
));
2054 k
= journal_put_error(j
, r
, path
?: prefix
);
2061 static int add_root_directory(sd_journal
*j
, const char *p
, bool missing_ok
) {
2063 _cleanup_closedir_
DIR *d
= NULL
;
2069 /* Adds a root directory to our set of directories to use. If the root directory is already in the set, we
2070 * update the inotify logic, and renumerate the directory entries. This call may hence be called to initially
2071 * populate the set, as well as to update it later. */
2074 /* If there's a path specified, use it. */
2076 log_debug("Considering root directory '%s'.", p
);
2078 if ((j
->flags
& SD_JOURNAL_RUNTIME_ONLY
) &&
2079 !path_has_prefix(j
, p
, "/run"))
2083 p
= strjoina(j
->prefix
, p
);
2085 r
= directory_open(j
, p
, &d
);
2086 if (r
== -ENOENT
&& missing_ok
)
2089 log_debug_errno(r
, "Failed to open root directory %s: %m", p
);
2093 _cleanup_close_
int dfd
= -EBADF
;
2095 /* If there's no path specified, then we use the top-level fd itself. We duplicate the fd here, since
2096 * opendir() will take possession of the fd, and close it, which we don't want. */
2098 p
= "."; /* store this as "." in the directories hashmap */
2100 dfd
= fcntl(j
->toplevel_fd
, F_DUPFD_CLOEXEC
, 3);
2106 d
= take_fdopendir(&dfd
);
2115 r
= add_directory_impl(j
, p
, /* is_root = */ true, &m
);
2121 directory_watch(j
, m
, dirfd(d
),
2122 IN_CREATE
|IN_MOVED_TO
|IN_MODIFY
|IN_ATTRIB
|IN_DELETE
|
2125 if (!j
->no_new_files
)
2126 directory_enumerate(j
, m
, d
);
2128 check_network(j
, dirfd(d
));
2133 k
= journal_put_error(j
, r
, p
);
2140 static int add_search_paths(sd_journal
*j
) {
2142 static const char search_paths
[] =
2143 "/run/log/journal\0"
2144 "/var/log/journal\0";
2148 /* We ignore most errors here, since the idea is to only open
2149 * what's actually accessible, and ignore the rest. */
2151 NULSTR_FOREACH(p
, search_paths
)
2152 (void) add_root_directory(j
, p
, true);
2154 if (!(j
->flags
& SD_JOURNAL_LOCAL_ONLY
))
2155 (void) add_root_directory(j
, "/var/log/journal/remote", true);
2160 static int add_current_paths(sd_journal
*j
) {
2164 assert(j
->no_new_files
);
2166 /* Simply adds all directories for files we have open as directories. We don't expect errors here, so we
2167 * treat them as fatal. */
2169 ORDERED_HASHMAP_FOREACH(f
, j
->files
) {
2170 _cleanup_free_
char *dir
= NULL
;
2173 r
= path_extract_directory(f
->path
, &dir
);
2177 r
= add_directory(j
, dir
, NULL
);
2185 static int allocate_inotify(sd_journal
*j
) {
2188 if (j
->inotify_fd
< 0) {
2189 j
->inotify_fd
= inotify_init1(IN_NONBLOCK
|IN_CLOEXEC
);
2190 if (j
->inotify_fd
< 0)
2197 static sd_journal
*journal_new(int flags
, const char *path
, const char *namespace) {
2198 _cleanup_(sd_journal_closep
) sd_journal
*j
= NULL
;
2200 j
= new(sd_journal
, 1);
2205 .origin_id
= origin_id_query(),
2206 .toplevel_fd
= -EBADF
,
2207 .inotify_fd
= -EBADF
,
2209 .data_threshold
= DEFAULT_DATA_THRESHOLD
,
2219 if (flags
& SD_JOURNAL_OS_ROOT
)
2226 j
->namespace = strdup(namespace);
2231 j
->files
= ordered_hashmap_new(&path_hash_ops
);
2235 j
->files_cache
= ordered_hashmap_iterated_cache_new(j
->files
);
2236 j
->mmap
= mmap_cache_new();
2237 if (!j
->files_cache
|| !j
->mmap
)
2243 #define OPEN_ALLOWED_FLAGS \
2244 (SD_JOURNAL_LOCAL_ONLY | \
2245 SD_JOURNAL_RUNTIME_ONLY | \
2246 SD_JOURNAL_SYSTEM | \
2247 SD_JOURNAL_CURRENT_USER | \
2248 SD_JOURNAL_ALL_NAMESPACES | \
2249 SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE | \
2250 SD_JOURNAL_ASSUME_IMMUTABLE)
2252 _public_
int sd_journal_open_namespace(sd_journal
**ret
, const char *namespace, int flags
) {
2253 _cleanup_(sd_journal_closep
) sd_journal
*j
= NULL
;
2256 assert_return(ret
, -EINVAL
);
2257 assert_return((flags
& ~OPEN_ALLOWED_FLAGS
) == 0, -EINVAL
);
2259 j
= journal_new(flags
, NULL
, namespace);
2263 r
= add_search_paths(j
);
2271 _public_
int sd_journal_open(sd_journal
**ret
, int flags
) {
2272 return sd_journal_open_namespace(ret
, NULL
, flags
);
2275 #define OPEN_CONTAINER_ALLOWED_FLAGS \
2276 (SD_JOURNAL_LOCAL_ONLY | \
2277 SD_JOURNAL_SYSTEM | \
2278 SD_JOURNAL_ASSUME_IMMUTABLE)
2280 _public_
int sd_journal_open_container(sd_journal
**ret
, const char *machine
, int flags
) {
2281 _cleanup_free_
char *root
= NULL
, *class = NULL
;
2282 _cleanup_(sd_journal_closep
) sd_journal
*j
= NULL
;
2286 /* This is deprecated, people should use machined's OpenMachineRootDirectory() call instead in
2287 * combination with sd_journal_open_directory_fd(). */
2289 assert_return(machine
, -EINVAL
);
2290 assert_return(ret
, -EINVAL
);
2291 assert_return((flags
& ~OPEN_CONTAINER_ALLOWED_FLAGS
) == 0, -EINVAL
);
2292 assert_return(hostname_is_valid(machine
, 0), -EINVAL
);
2294 p
= strjoina("/run/systemd/machines/", machine
);
2295 r
= parse_env_file(NULL
, p
,
2305 if (!streq_ptr(class, "container"))
2308 j
= journal_new(flags
, root
, NULL
);
2312 r
= add_search_paths(j
);
2320 #define OPEN_DIRECTORY_ALLOWED_FLAGS \
2321 (SD_JOURNAL_OS_ROOT | \
2322 SD_JOURNAL_SYSTEM | \
2323 SD_JOURNAL_CURRENT_USER | \
2324 SD_JOURNAL_ASSUME_IMMUTABLE)
2326 _public_
int sd_journal_open_directory(sd_journal
**ret
, const char *path
, int flags
) {
2327 _cleanup_(sd_journal_closep
) sd_journal
*j
= NULL
;
2330 assert_return(ret
, -EINVAL
);
2331 assert_return(path
, -EINVAL
);
2332 assert_return((flags
& ~OPEN_DIRECTORY_ALLOWED_FLAGS
) == 0, -EINVAL
);
2334 j
= journal_new(flags
, path
, NULL
);
2338 if (flags
& SD_JOURNAL_OS_ROOT
)
2339 r
= add_search_paths(j
);
2341 r
= add_root_directory(j
, path
, false);
2349 #define OPEN_FILES_ALLOWED_FLAGS \
2350 (SD_JOURNAL_ASSUME_IMMUTABLE)
2352 _public_
int sd_journal_open_files(sd_journal
**ret
, const char **paths
, int flags
) {
2353 _cleanup_(sd_journal_closep
) sd_journal
*j
= NULL
;
2356 assert_return(ret
, -EINVAL
);
2357 assert_return((flags
& ~OPEN_FILES_ALLOWED_FLAGS
) == 0, -EINVAL
);
2359 j
= journal_new(flags
, NULL
, NULL
);
2363 STRV_FOREACH(path
, paths
) {
2364 r
= add_any_file(j
, -1, *path
);
2369 j
->no_new_files
= true;
2375 #define OPEN_DIRECTORY_FD_ALLOWED_FLAGS \
2376 (SD_JOURNAL_OS_ROOT | \
2377 SD_JOURNAL_SYSTEM | \
2378 SD_JOURNAL_CURRENT_USER | \
2379 SD_JOURNAL_TAKE_DIRECTORY_FD | \
2380 SD_JOURNAL_ASSUME_IMMUTABLE)
2382 _public_
int sd_journal_open_directory_fd(sd_journal
**ret
, int fd
, int flags
) {
2383 _cleanup_(sd_journal_closep
) sd_journal
*j
= NULL
;
2388 assert_return(ret
, -EINVAL
);
2389 assert_return(fd
>= 0, -EBADF
);
2390 assert_return((flags
& ~OPEN_DIRECTORY_FD_ALLOWED_FLAGS
) == 0, -EINVAL
);
2392 if (fstat(fd
, &st
) < 0)
2395 if (!S_ISDIR(st
.st_mode
))
2398 take_fd
= FLAGS_SET(flags
, SD_JOURNAL_TAKE_DIRECTORY_FD
);
2399 j
= journal_new(flags
& ~SD_JOURNAL_TAKE_DIRECTORY_FD
, NULL
, NULL
);
2403 j
->toplevel_fd
= fd
;
2405 if (flags
& SD_JOURNAL_OS_ROOT
)
2406 r
= add_search_paths(j
);
2408 r
= add_root_directory(j
, NULL
, false);
2412 SET_FLAG(j
->flags
, SD_JOURNAL_TAKE_DIRECTORY_FD
, take_fd
);
2418 #define OPEN_FILES_FD_ALLOWED_FLAGS \
2419 (SD_JOURNAL_ASSUME_IMMUTABLE)
2421 _public_
int sd_journal_open_files_fd(sd_journal
**ret
, int fds
[], unsigned n_fds
, int flags
) {
2423 _cleanup_(sd_journal_closep
) sd_journal
*j
= NULL
;
2426 assert_return(ret
, -EINVAL
);
2427 assert_return(n_fds
> 0, -EBADF
);
2428 assert_return((flags
& ~OPEN_FILES_FD_ALLOWED_FLAGS
) == 0, -EINVAL
);
2430 j
= journal_new(flags
, NULL
, NULL
);
2434 for (unsigned i
= 0; i
< n_fds
; i
++) {
2442 if (fstat(fds
[i
], &st
) < 0) {
2447 r
= stat_verify_regular(&st
);
2451 r
= add_any_file(j
, fds
[i
], NULL
);
2456 j
->no_new_files
= true;
2457 j
->no_inotify
= true;
2463 /* If we fail, make sure we don't take possession of the files we managed to make use of successfully, and they
2465 ORDERED_HASHMAP_FOREACH(f
, j
->files
)
2466 f
->close_fd
= false;
2471 _public_
void sd_journal_close(sd_journal
*j
) {
2472 if (!j
|| journal_origin_changed(j
))
2475 journal_clear_newest_by_boot_id(j
);
2477 sd_journal_flush_matches(j
);
2479 ordered_hashmap_free_with_destructor(j
->files
, journal_file_close
);
2480 iterated_cache_free(j
->files_cache
);
2482 hashmap_free(j
->directories_by_path
);
2483 hashmap_free(j
->directories_by_wd
);
2485 if (FLAGS_SET(j
->flags
, SD_JOURNAL_TAKE_DIRECTORY_FD
))
2486 safe_close(j
->toplevel_fd
);
2488 safe_close(j
->inotify_fd
);
2491 mmap_cache_stats_log_debug(j
->mmap
);
2492 mmap_cache_unref(j
->mmap
);
2495 hashmap_free_free(j
->errors
);
2497 set_free(j
->exclude_syslog_identifiers
);
2502 free(j
->unique_field
);
2503 free(j
->fields_buffer
);
2507 static int journal_file_read_tail_timestamp(sd_journal
*j
, JournalFile
*f
) {
2508 uint64_t offset
, mo
, rt
;
2518 /* Tries to read the timestamp of the most recently written entry. */
2520 if (FLAGS_SET(j
->flags
, SD_JOURNAL_ASSUME_IMMUTABLE
) && f
->newest_entry_offset
!= 0)
2521 return 0; /* We have already read the file, and we assume that the file is immutable. */
2523 if (f
->header
->state
== f
->newest_state
&&
2524 f
->header
->state
== STATE_ARCHIVED
&&
2525 f
->newest_entry_offset
!= 0)
2526 return 0; /* We have already read archived file. */
2528 if (JOURNAL_HEADER_CONTAINS(f
->header
, tail_entry_offset
)) {
2529 offset
= le64toh(READ_NOW(f
->header
->tail_entry_offset
));
2530 type
= OBJECT_ENTRY
;
2532 offset
= le64toh(READ_NOW(f
->header
->tail_object_offset
));
2533 type
= OBJECT_UNUSED
;
2536 return -ENODATA
; /* not a single object/entry, hence no tail timestamp */
2537 if (offset
== f
->newest_entry_offset
)
2538 return 0; /* No new entry is added after we read last time. */
2540 /* Move to the last object in the journal file, in the hope it is an entry (which it usually will
2541 * be). If we lack the "tail_entry_offset" field in the header, we specify the type as OBJECT_UNUSED
2542 * here, since we cannot be sure what the last object will be, and want no noisy logging if it isn't
2543 * an entry. We instead check after figuring out the pointer. */
2544 r
= journal_file_move_to_object(f
, type
, offset
, &o
);
2546 log_debug_errno(r
, "Failed to move to last object in journal file, ignoring: %m");
2550 if (o
&& o
->object
.type
== OBJECT_ENTRY
) {
2551 /* Yay, last object is an entry, let's use the data. */
2552 id
= o
->entry
.boot_id
;
2553 mo
= le64toh(o
->entry
.monotonic
);
2554 rt
= le64toh(o
->entry
.realtime
);
2556 /* So the object is not an entry or we couldn't access it? In that case, let's read the most
2557 * recent entry timestamps from the header. It's equally good. Unfortunately though, in old
2558 * versions of the journal the boot ID in the header doesn't have to match the monotonic
2559 * timestamp of the header. Let's check the header flag that indicates whether this strictly
2560 * matches first hence, before using the data. */
2562 if (JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f
->header
) && f
->header
->state
== STATE_ARCHIVED
) {
2563 mo
= le64toh(f
->header
->tail_entry_monotonic
);
2564 rt
= le64toh(f
->header
->tail_entry_realtime
);
2565 id
= f
->header
->tail_entry_boot_id
;
2566 offset
= UINT64_MAX
;
2568 /* Otherwise let's find the last entry manually (this possibly means traversing the
2569 * chain of entry arrays, till the end */
2570 r
= journal_file_next_entry(f
, 0, DIRECTION_UP
, &o
, offset
== 0 ? &offset
: NULL
);
2576 id
= o
->entry
.boot_id
;
2577 mo
= le64toh(o
->entry
.monotonic
);
2578 rt
= le64toh(o
->entry
.realtime
);
2582 if (mo
> rt
) /* monotonic clock is further ahead than realtime? that's weird, refuse to use the data */
2585 if (offset
== f
->newest_entry_offset
) {
2586 /* Cached data and the current one should be equivalent. */
2587 if (!sd_id128_equal(f
->newest_machine_id
, f
->header
->machine_id
) ||
2588 !sd_id128_equal(f
->newest_boot_id
, id
) ||
2589 f
->newest_monotonic_usec
!= mo
||
2590 f
->newest_realtime_usec
!= rt
)
2593 return 0; /* No new entry is added after we read last time. */
2596 if (!sd_id128_equal(f
->newest_boot_id
, id
))
2597 journal_file_unlink_newest_by_boot_id(j
, f
);
2599 f
->newest_boot_id
= id
;
2600 f
->newest_monotonic_usec
= mo
;
2601 f
->newest_realtime_usec
= rt
;
2602 f
->newest_machine_id
= f
->header
->machine_id
;
2603 f
->newest_entry_offset
= offset
;
2604 f
->newest_state
= f
->header
->state
;
2606 r
= journal_file_reshuffle_newest_by_boot_id(j
, f
);
2610 return 1; /* Updated. */
2613 _public_
int sd_journal_get_realtime_usec(sd_journal
*j
, uint64_t *ret
) {
2618 assert_return(j
, -EINVAL
);
2619 assert_return(!journal_origin_changed(j
), -ECHILD
);
2621 f
= j
->current_file
;
2623 return -EADDRNOTAVAIL
;
2624 if (f
->current_offset
<= 0)
2625 return -EADDRNOTAVAIL
;
2627 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2631 uint64_t t
= le64toh(o
->entry
.realtime
);
2632 if (!VALID_REALTIME(t
))
2641 _public_
int sd_journal_get_monotonic_usec(sd_journal
*j
, uint64_t *ret
, sd_id128_t
*ret_boot_id
) {
2646 assert_return(j
, -EINVAL
);
2647 assert_return(!journal_origin_changed(j
), -ECHILD
);
2649 f
= j
->current_file
;
2651 return -EADDRNOTAVAIL
;
2652 if (f
->current_offset
<= 0)
2653 return -EADDRNOTAVAIL
;
2655 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2662 r
= sd_id128_get_boot(&id
);
2666 if (!sd_id128_equal(id
, o
->entry
.boot_id
))
2670 uint64_t t
= le64toh(o
->entry
.monotonic
);
2671 if (!VALID_MONOTONIC(t
))
2677 *ret_boot_id
= o
->entry
.boot_id
;
2682 _public_
int sd_journal_get_seqnum(
2684 uint64_t *ret_seqnum
,
2685 sd_id128_t
*ret_seqnum_id
) {
2691 assert_return(j
, -EINVAL
);
2692 assert_return(!journal_origin_changed(j
), -ECHILD
);
2694 f
= j
->current_file
;
2696 return -EADDRNOTAVAIL
;
2698 if (f
->current_offset
<= 0)
2699 return -EADDRNOTAVAIL
;
2701 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2706 *ret_seqnum_id
= f
->header
->seqnum_id
;
2708 *ret_seqnum
= le64toh(o
->entry
.seqnum
);
2713 static bool field_is_valid(const char *field
) {
2719 if (startswith(field
, "__"))
2722 for (const char *p
= field
; *p
; p
++) {
2727 if (*p
>= 'A' && *p
<= 'Z')
2730 if (ascii_isdigit(*p
))
2739 _public_
int sd_journal_get_data(sd_journal
*j
, const char *field
, const void **data
, size_t *size
) {
2741 size_t field_length
;
2745 assert_return(j
, -EINVAL
);
2746 assert_return(!journal_origin_changed(j
), -ECHILD
);
2747 assert_return(field
, -EINVAL
);
2748 assert_return(data
, -EINVAL
);
2749 assert_return(size
, -EINVAL
);
2750 assert_return(field_is_valid(field
), -EINVAL
);
2752 f
= j
->current_file
;
2754 return -EADDRNOTAVAIL
;
2756 if (f
->current_offset
<= 0)
2757 return -EADDRNOTAVAIL
;
2759 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2763 field_length
= strlen(field
);
2765 uint64_t n
= journal_file_entry_n_items(f
, o
);
2766 for (uint64_t i
= 0; i
< n
; i
++) {
2771 p
= journal_file_entry_item_object_offset(f
, o
, i
);
2772 r
= journal_file_data_payload(f
, NULL
, p
, field
, field_length
, j
->data_threshold
, &d
, &l
);
2775 if (IN_SET(r
, -EADDRNOTAVAIL
, -EBADMSG
)) {
2776 log_debug_errno(r
, "Entry item %"PRIu64
" data object is bad, skipping over it: %m", i
);
2791 _public_
int sd_journal_enumerate_data(sd_journal
*j
, const void **data
, size_t *size
) {
2796 assert_return(j
, -EINVAL
);
2797 assert_return(!journal_origin_changed(j
), -ECHILD
);
2798 assert_return(data
, -EINVAL
);
2799 assert_return(size
, -EINVAL
);
2801 f
= j
->current_file
;
2803 return -EADDRNOTAVAIL
;
2805 if (f
->current_offset
<= 0)
2806 return -EADDRNOTAVAIL
;
2808 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2812 for (uint64_t n
= journal_file_entry_n_items(f
, o
); j
->current_field
< n
; j
->current_field
++) {
2817 p
= journal_file_entry_item_object_offset(f
, o
, j
->current_field
);
2818 r
= journal_file_data_payload(f
, NULL
, p
, NULL
, 0, j
->data_threshold
, &d
, &l
);
2819 if (IN_SET(r
, -EADDRNOTAVAIL
, -EBADMSG
)) {
2820 log_debug_errno(r
, "Entry item %"PRIu64
" data object is bad, skipping over it: %m", j
->current_field
);
2838 _public_
int sd_journal_enumerate_available_data(sd_journal
*j
, const void **data
, size_t *size
) {
2842 r
= sd_journal_enumerate_data(j
, data
, size
);
2845 if (!JOURNAL_ERRNO_IS_UNAVAILABLE_FIELD(r
))
2847 j
->current_field
++; /* Try with the next field */
2851 _public_
void sd_journal_restart_data(sd_journal
*j
) {
2852 if (!j
|| journal_origin_changed(j
))
2855 j
->current_field
= 0;
2858 static int reiterate_all_paths(sd_journal
*j
) {
2861 if (j
->no_new_files
)
2862 return add_current_paths(j
);
2864 if (j
->flags
& SD_JOURNAL_OS_ROOT
)
2865 return add_search_paths(j
);
2867 if (j
->toplevel_fd
>= 0)
2868 return add_root_directory(j
, NULL
, false);
2871 return add_root_directory(j
, j
->path
, true);
2873 return add_search_paths(j
);
2876 _public_
int sd_journal_get_fd(sd_journal
*j
) {
2879 assert_return(j
, -EINVAL
);
2880 assert_return(!journal_origin_changed(j
), -ECHILD
);
2881 assert_return(!FLAGS_SET(j
->flags
, SD_JOURNAL_ASSUME_IMMUTABLE
), -EUNATCH
);
2884 return -EMEDIUMTYPE
;
2886 if (j
->inotify_fd
>= 0)
2887 return j
->inotify_fd
;
2889 r
= allocate_inotify(j
);
2893 log_debug("Reiterating files to get inotify watches established.");
2895 /* Iterate through all dirs again, to add them to the inotify */
2896 r
= reiterate_all_paths(j
);
2900 return j
->inotify_fd
;
2903 _public_
int sd_journal_get_events(sd_journal
*j
) {
2906 assert_return(j
, -EINVAL
);
2907 assert_return(!journal_origin_changed(j
), -ECHILD
);
2908 assert_return(!FLAGS_SET(j
->flags
, SD_JOURNAL_ASSUME_IMMUTABLE
), -EUNATCH
);
2910 fd
= sd_journal_get_fd(j
);
2917 _public_
int sd_journal_get_timeout(sd_journal
*j
, uint64_t *timeout_usec
) {
2920 assert_return(j
, -EINVAL
);
2921 assert_return(!journal_origin_changed(j
), -ECHILD
);
2922 assert_return(!FLAGS_SET(j
->flags
, SD_JOURNAL_ASSUME_IMMUTABLE
), -EUNATCH
);
2923 assert_return(timeout_usec
, -EINVAL
);
2925 fd
= sd_journal_get_fd(j
);
2929 if (!j
->on_network
) {
2930 *timeout_usec
= UINT64_MAX
;
2934 /* If we are on the network we need to regularly check for
2935 * changes manually */
2937 *timeout_usec
= j
->last_process_usec
+ JOURNAL_FILES_RECHECK_USEC
;
2941 static void process_q_overflow(sd_journal
*j
) {
2947 /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list
2948 * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all
2949 * journal files we encounter. All journal files and all directories that don't carry it after reenumeration
2950 * are subject for unloading. */
2952 log_debug("Inotify queue overrun, reiterating everything.");
2955 (void) reiterate_all_paths(j
);
2957 ORDERED_HASHMAP_FOREACH(f
, j
->files
) {
2959 if (f
->last_seen_generation
== j
->generation
)
2962 log_debug("File '%s' hasn't been seen in this enumeration, removing.", f
->path
);
2963 remove_file_real(j
, f
);
2966 HASHMAP_FOREACH(m
, j
->directories_by_path
) {
2968 if (m
->last_seen_generation
== j
->generation
)
2971 if (m
->is_root
) /* Never GC root directories */
2974 log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f
->path
);
2978 log_debug("Reiteration complete.");
2981 static void process_inotify_event(sd_journal
*j
, const struct inotify_event
*e
) {
2987 if (e
->mask
& IN_Q_OVERFLOW
) {
2988 process_q_overflow(j
);
2992 /* Is this a subdirectory we watch? */
2993 d
= hashmap_get(j
->directories_by_wd
, INT_TO_PTR(e
->wd
));
2995 if (!(e
->mask
& IN_ISDIR
) && e
->len
> 0 &&
2996 (endswith(e
->name
, ".journal") ||
2997 endswith(e
->name
, ".journal~"))) {
2999 /* Event for a journal file */
3001 if (e
->mask
& (IN_CREATE
|IN_MOVED_TO
|IN_MODIFY
|IN_ATTRIB
))
3002 (void) add_file_by_name(j
, d
->path
, e
->name
);
3003 else if (e
->mask
& (IN_DELETE
|IN_MOVED_FROM
|IN_UNMOUNT
))
3004 (void) remove_file_by_name(j
, d
->path
, e
->name
);
3006 } else if (!d
->is_root
&& e
->len
== 0) {
3008 /* Event for a subdirectory */
3010 if (e
->mask
& (IN_DELETE_SELF
|IN_MOVE_SELF
|IN_UNMOUNT
))
3013 } else if (d
->is_root
&& (e
->mask
& IN_ISDIR
) && e
->len
> 0 && id128_is_valid(e
->name
)) {
3015 /* Event for root directory */
3017 if (e
->mask
& (IN_CREATE
|IN_MOVED_TO
|IN_MODIFY
|IN_ATTRIB
))
3018 (void) add_directory(j
, d
->path
, e
->name
);
3024 if (e
->mask
& IN_IGNORED
)
3027 log_debug("Unexpected inotify event.");
3030 static int determine_change(sd_journal
*j
) {
3035 b
= j
->current_invalidate_counter
!= j
->last_invalidate_counter
;
3036 j
->last_invalidate_counter
= j
->current_invalidate_counter
;
3038 return b
? SD_JOURNAL_INVALIDATE
: SD_JOURNAL_APPEND
;
3041 _public_
int sd_journal_process(sd_journal
*j
) {
3042 bool got_something
= false;
3044 assert_return(j
, -EINVAL
);
3045 assert_return(!journal_origin_changed(j
), -ECHILD
);
3047 if (j
->inotify_fd
< 0) /* We have no inotify fd yet? Then there's noting to process. */
3050 assert_return(!FLAGS_SET(j
->flags
, SD_JOURNAL_ASSUME_IMMUTABLE
), -EUNATCH
);
3052 j
->last_process_usec
= now(CLOCK_MONOTONIC
);
3053 j
->last_invalidate_counter
= j
->current_invalidate_counter
;
3056 union inotify_event_buffer buffer
;
3059 l
= read(j
->inotify_fd
, &buffer
, sizeof(buffer
));
3061 if (ERRNO_IS_TRANSIENT(errno
))
3062 return got_something
? determine_change(j
) : SD_JOURNAL_NOP
;
3067 got_something
= true;
3069 FOREACH_INOTIFY_EVENT(e
, buffer
, l
)
3070 process_inotify_event(j
, e
);
3074 _public_
int sd_journal_wait(sd_journal
*j
, uint64_t timeout_usec
) {
3078 assert_return(j
, -EINVAL
);
3079 assert_return(!journal_origin_changed(j
), -ECHILD
);
3080 assert_return(!FLAGS_SET(j
->flags
, SD_JOURNAL_ASSUME_IMMUTABLE
), -EUNATCH
);
3082 if (j
->inotify_fd
< 0) {
3085 /* This is the first invocation, hence create the inotify watch */
3086 r
= sd_journal_get_fd(j
);
3090 /* Server might have done some vacuuming while we weren't watching. Get rid of the deleted
3091 * files now so they don't stay around indefinitely. */
3092 ORDERED_HASHMAP_FOREACH(f
, j
->files
) {
3093 r
= journal_file_fstat(f
);
3095 remove_file_real(j
, f
);
3097 log_debug_errno(r
, "Failed to fstat() journal file '%s', ignoring: %m", f
->path
);
3100 /* The journal might have changed since the context object was created and we weren't
3101 * watching before, hence don't wait for anything, and return immediately. */
3102 return determine_change(j
);
3105 r
= sd_journal_get_timeout(j
, &t
);
3109 if (t
!= UINT64_MAX
) {
3110 t
= usec_sub_unsigned(t
, now(CLOCK_MONOTONIC
));
3112 if (timeout_usec
== UINT64_MAX
|| timeout_usec
> t
)
3117 r
= fd_wait_for_event(j
->inotify_fd
, POLLIN
, timeout_usec
);
3118 } while (r
== -EINTR
);
3123 return sd_journal_process(j
);
3126 _public_
int sd_journal_get_cutoff_realtime_usec(sd_journal
*j
, uint64_t *from
, uint64_t *to
) {
3129 uint64_t fmin
= 0, tmax
= 0;
3132 assert_return(j
, -EINVAL
);
3133 assert_return(!journal_origin_changed(j
), -ECHILD
);
3134 assert_return(from
|| to
, -EINVAL
);
3135 assert_return(from
!= to
, -EINVAL
);
3137 ORDERED_HASHMAP_FOREACH(f
, j
->files
) {
3140 r
= journal_file_get_cutoff_realtime_usec(f
, &fr
, &t
);
3153 fmin
= MIN(fr
, fmin
);
3154 tmax
= MAX(t
, tmax
);
3163 return first
? 0 : 1;
3166 _public_
int sd_journal_get_cutoff_monotonic_usec(
3172 uint64_t from
= UINT64_MAX
, to
= UINT64_MAX
;
3177 assert_return(j
, -EINVAL
);
3178 assert_return(!journal_origin_changed(j
), -ECHILD
);
3179 assert_return(ret_from
!= ret_to
, -EINVAL
);
3181 ORDERED_HASHMAP_FOREACH(f
, j
->files
) {
3184 r
= journal_file_get_cutoff_monotonic_usec(f
, boot_id
, &ff
, &tt
);
3193 from
= MIN(ff
, from
);
3210 void journal_print_header(sd_journal
*j
) {
3212 bool newline
= false;
3216 ORDERED_HASHMAP_FOREACH(f
, j
->files
) {
3222 journal_file_print_header(f
);
3226 _public_
int sd_journal_get_usage(sd_journal
*j
, uint64_t *ret
) {
3230 assert_return(j
, -EINVAL
);
3231 assert_return(!journal_origin_changed(j
), -ECHILD
);
3232 assert_return(ret
, -EINVAL
);
3234 ORDERED_HASHMAP_FOREACH(f
, j
->files
) {
3238 if (fstat(f
->fd
, &st
) < 0)
3241 b
= (uint64_t) st
.st_blocks
;
3242 if (b
> UINT64_MAX
/ 512)
3246 if (sum
> UINT64_MAX
- b
)
3255 _public_
int sd_journal_query_unique(sd_journal
*j
, const char *field
) {
3258 assert_return(j
, -EINVAL
);
3259 assert_return(!journal_origin_changed(j
), -ECHILD
);
3261 if (!field_is_valid(field
))
3264 r
= free_and_strdup(&j
->unique_field
, field
);
3268 j
->unique_file
= NULL
;
3269 j
->unique_offset
= 0;
3270 j
->unique_file_lost
= false;
3275 _public_
int sd_journal_enumerate_unique(
3277 const void **ret_data
,
3282 assert_return(j
, -EINVAL
);
3283 assert_return(!journal_origin_changed(j
), -ECHILD
);
3284 assert_return(j
->unique_field
, -EINVAL
);
3286 k
= strlen(j
->unique_field
);
3288 if (!j
->unique_file
) {
3289 if (j
->unique_file_lost
)
3292 j
->unique_file
= ordered_hashmap_first(j
->files
);
3293 if (!j
->unique_file
)
3296 j
->unique_offset
= 0;
3307 /* Proceed to next data object in the field's linked list */
3308 if (j
->unique_offset
== 0) {
3309 r
= journal_file_find_field_object(j
->unique_file
, j
->unique_field
, k
, &o
, NULL
);
3313 j
->unique_offset
= r
> 0 ? le64toh(o
->field
.head_data_offset
) : 0;
3315 r
= journal_file_move_to_object(j
->unique_file
, OBJECT_DATA
, j
->unique_offset
, &o
);
3319 j
->unique_offset
= le64toh(o
->data
.next_field_offset
);
3322 /* We reached the end of the list? Then start again, with the next file */
3323 if (j
->unique_offset
== 0) {
3324 j
->unique_file
= ordered_hashmap_next(j
->files
, j
->unique_file
->path
);
3325 if (!j
->unique_file
)
3331 r
= journal_file_move_to_object(j
->unique_file
, OBJECT_DATA
, j
->unique_offset
, &o
);
3335 /* Let's pin the data object, so we can look at it at the same time as one on another file. */
3336 r
= journal_file_pin_object(j
->unique_file
, o
);
3340 r
= journal_file_data_payload(j
->unique_file
, o
, j
->unique_offset
, NULL
, 0,
3341 j
->data_threshold
, &odata
, &ol
);
3345 /* Check if we have at least the field name and "=". */
3347 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG
),
3348 "%s:offset " OFSfmt
": object has size %zu, expected at least %zu",
3349 j
->unique_file
->path
,
3350 j
->unique_offset
, ol
, k
+ 1);
3352 if (memcmp(odata
, j
->unique_field
, k
) != 0 || ((const char*) odata
)[k
] != '=')
3353 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG
),
3354 "%s:offset " OFSfmt
": object does not start with \"%s=\"",
3355 j
->unique_file
->path
,
3359 /* OK, now let's see if we already returned this data object by checking if it exists in the
3360 * earlier traversed files. */
3362 ORDERED_HASHMAP_FOREACH(of
, j
->files
) {
3363 if (of
== j
->unique_file
)
3366 /* Skip this file it didn't have any fields indexed */
3367 if (JOURNAL_HEADER_CONTAINS(of
->header
, n_fields
) && le64toh(of
->header
->n_fields
) <= 0)
3370 /* We can reuse the hash from our current file only on old-style journal files
3371 * without keyed hashes. On new-style files we have to calculate the hash anew, to
3372 * take the per-file hash seed into consideration. */
3373 if (!JOURNAL_HEADER_KEYED_HASH(j
->unique_file
->header
) && !JOURNAL_HEADER_KEYED_HASH(of
->header
))
3374 r
= journal_file_find_data_object_with_hash(of
, odata
, ol
, le64toh(o
->data
.hash
), NULL
, NULL
);
3376 r
= journal_file_find_data_object(of
, odata
, ol
, NULL
, NULL
);
3395 _public_
int sd_journal_enumerate_available_unique(sd_journal
*j
, const void **data
, size_t *size
) {
3399 r
= sd_journal_enumerate_unique(j
, data
, size
);
3402 if (!JOURNAL_ERRNO_IS_UNAVAILABLE_FIELD(r
))
3404 /* Try with the next field. sd_journal_enumerate_unique() modifies state, so on the next try
3405 * we will access the next field. */
3409 _public_
void sd_journal_restart_unique(sd_journal
*j
) {
3410 if (!j
|| journal_origin_changed(j
))
3413 j
->unique_file
= NULL
;
3414 j
->unique_offset
= 0;
3415 j
->unique_file_lost
= false;
3418 _public_
int sd_journal_enumerate_fields(sd_journal
*j
, const char **field
) {
3421 assert_return(j
, -EINVAL
);
3422 assert_return(!journal_origin_changed(j
), -ECHILD
);
3423 assert_return(field
, -EINVAL
);
3425 if (!j
->fields_file
) {
3426 if (j
->fields_file_lost
)
3429 j
->fields_file
= ordered_hashmap_first(j
->files
);
3430 if (!j
->fields_file
)
3433 j
->fields_hash_table_index
= 0;
3434 j
->fields_offset
= 0;
3438 JournalFile
*f
, *of
;
3446 if (j
->fields_offset
== 0) {
3449 /* We are not yet positioned at any field. Let's pick the first one */
3450 r
= journal_file_map_field_hash_table(f
);
3454 m
= le64toh(f
->header
->field_hash_table_size
) / sizeof(HashItem
);
3456 if (j
->fields_hash_table_index
>= m
) {
3457 /* Reached the end of the hash table, go to the next file. */
3462 j
->fields_offset
= le64toh(f
->field_hash_table
[j
->fields_hash_table_index
].head_hash_offset
);
3464 if (j
->fields_offset
!= 0)
3467 /* Empty hash table bucket, go to next one */
3468 j
->fields_hash_table_index
++;
3472 /* Proceed with next file */
3473 j
->fields_file
= ordered_hashmap_next(j
->files
, f
->path
);
3474 if (!j
->fields_file
) {
3479 j
->fields_offset
= 0;
3480 j
->fields_hash_table_index
= 0;
3485 /* We are already positioned at a field. If so, let's figure out the next field from it */
3487 r
= journal_file_move_to_object(f
, OBJECT_FIELD
, j
->fields_offset
, &o
);
3491 j
->fields_offset
= le64toh(o
->field
.next_hash_offset
);
3492 if (j
->fields_offset
== 0) {
3493 /* Reached the end of the hash table chain */
3494 j
->fields_hash_table_index
++;
3499 /* We use OBJECT_UNUSED here, so that the iterator below doesn't remove our mmap window */
3500 r
= journal_file_move_to_object(f
, OBJECT_UNUSED
, j
->fields_offset
, &o
);
3504 /* Because we used OBJECT_UNUSED above, we need to do our type check manually */
3505 if (o
->object
.type
!= OBJECT_FIELD
)
3506 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG
),
3507 "%s:offset " OFSfmt
": object has type %i, expected %i",
3508 f
->path
, j
->fields_offset
,
3509 o
->object
.type
, OBJECT_FIELD
);
3511 sz
= le64toh(o
->object
.size
) - offsetof(Object
, field
.payload
);
3513 /* Let's see if we already returned this field name before. */
3515 ORDERED_HASHMAP_FOREACH(of
, j
->files
) {
3519 /* Skip this file it didn't have any fields indexed */
3520 if (JOURNAL_HEADER_CONTAINS(of
->header
, n_fields
) && le64toh(of
->header
->n_fields
) <= 0)
3523 if (!JOURNAL_HEADER_KEYED_HASH(f
->header
) && !JOURNAL_HEADER_KEYED_HASH(of
->header
))
3524 r
= journal_file_find_field_object_with_hash(of
, o
->field
.payload
, sz
,
3525 le64toh(o
->field
.hash
), NULL
, NULL
);
3527 r
= journal_file_find_field_object(of
, o
->field
.payload
, sz
, NULL
, NULL
);
3539 /* Check if this is really a valid string containing no NUL byte */
3540 if (memchr(o
->field
.payload
, 0, sz
))
3543 if (j
->data_threshold
> 0 && sz
> j
->data_threshold
)
3544 sz
= j
->data_threshold
;
3546 if (!GREEDY_REALLOC(j
->fields_buffer
, sz
+ 1))
3549 memcpy(j
->fields_buffer
, o
->field
.payload
, sz
);
3550 j
->fields_buffer
[sz
] = 0;
3552 if (!field_is_valid(j
->fields_buffer
))
3555 *field
= j
->fields_buffer
;
3560 _public_
void sd_journal_restart_fields(sd_journal
*j
) {
3561 if (!j
|| journal_origin_changed(j
))
3564 j
->fields_file
= NULL
;
3565 j
->fields_hash_table_index
= 0;
3566 j
->fields_offset
= 0;
3567 j
->fields_file_lost
= false;
3570 _public_
int sd_journal_reliable_fd(sd_journal
*j
) {
3571 assert_return(j
, -EINVAL
);
3572 assert_return(!journal_origin_changed(j
), -ECHILD
);
3574 return !j
->on_network
;
3577 static char *lookup_field(const char *field
, void *userdata
) {
3578 sd_journal
*j
= ASSERT_PTR(userdata
);
3585 r
= sd_journal_get_data(j
, field
, &data
, &size
);
3587 size
> REPLACE_VAR_MAX
)
3588 return strdup(field
);
3590 d
= strlen(field
) + 1;
3592 return strndup((const char*) data
+ d
, size
- d
);
3595 _public_
int sd_journal_get_catalog(sd_journal
*j
, char **ret
) {
3599 _cleanup_free_
char *text
= NULL
, *cid
= NULL
;
3603 assert_return(j
, -EINVAL
);
3604 assert_return(!journal_origin_changed(j
), -ECHILD
);
3605 assert_return(ret
, -EINVAL
);
3607 r
= sd_journal_get_data(j
, "MESSAGE_ID", &data
, &size
);
3611 cid
= strndup((const char*) data
+ 11, size
- 11);
3615 r
= sd_id128_from_string(cid
, &id
);
3619 r
= catalog_get(secure_getenv("SYSTEMD_CATALOG") ?: CATALOG_DATABASE
, id
, &text
);
3623 t
= replace_var(text
, lookup_field
, j
);
3631 _public_
int sd_journal_get_catalog_for_message_id(sd_id128_t id
, char **ret
) {
3632 assert_return(ret
, -EINVAL
);
3634 return catalog_get(CATALOG_DATABASE
, id
, ret
);
3637 _public_
int sd_journal_set_data_threshold(sd_journal
*j
, size_t sz
) {
3638 assert_return(j
, -EINVAL
);
3639 assert_return(!journal_origin_changed(j
), -ECHILD
);
3641 j
->data_threshold
= sz
;
3645 _public_
int sd_journal_get_data_threshold(sd_journal
*j
, size_t *sz
) {
3646 assert_return(j
, -EINVAL
);
3647 assert_return(!journal_origin_changed(j
), -ECHILD
);
3648 assert_return(sz
, -EINVAL
);
3650 *sz
= j
->data_threshold
;
3654 _public_
int sd_journal_has_runtime_files(sd_journal
*j
) {
3655 assert_return(j
, -EINVAL
);
3657 return j
->has_runtime_files
;
3660 _public_
int sd_journal_has_persistent_files(sd_journal
*j
) {
3661 assert_return(j
, -EINVAL
);
3663 return j
->has_persistent_files
;