1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
6 #include <sys/inotify.h>
10 #include "sd-journal.h"
12 #include "alloc-util.h"
14 #include "dirent-util.h"
17 #include "extract-word.h"
20 #include "format-util.h"
22 #include "hostname-util.h"
23 #include "id128-util.h"
24 #include "inotify-util.h"
26 #include "journal-def.h"
27 #include "journal-file.h"
28 #include "journal-internal.h"
32 #include "nulstr-util.h"
33 #include "origin-id.h"
34 #include "path-util.h"
36 #include "replace-var.h"
38 #include "sort-util.h"
39 #include "stat-util.h"
40 #include "stdio-util.h"
41 #include "string-util.h"
43 #include "syslog-util.h"
44 #include "time-util.h"
45 #include "uid-classification.h"
47 #define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
49 /* The maximum size of variable values we'll expand in catalog entries. We bind this to PATH_MAX for now, as
50 * we want to be able to show all officially valid paths at least */
51 #define REPLACE_VAR_MAX PATH_MAX
53 #define DEFAULT_DATA_THRESHOLD (64*1024)
55 DEFINE_PRIVATE_ORIGIN_ID_HELPERS(sd_journal
, journal
);
57 static void remove_file_real(sd_journal
*j
, JournalFile
*f
);
58 static int journal_file_read_tail_timestamp(sd_journal
*j
, JournalFile
*f
);
59 static void journal_file_unlink_newest_by_boot_id(sd_journal
*j
, JournalFile
*f
);
61 static int journal_put_error(sd_journal
*j
, int r
, const char *path
) {
62 _cleanup_free_
char *copy
= NULL
;
64 /* Memorize an error we encountered, and store which
65 * file/directory it was generated from. Note that we store
66 * only *one* path per error code, as the error code is the
67 * key into the hashmap, and the path is the value. This means
68 * we keep track only of all error kinds, but not of all error
69 * locations. This has the benefit that the hashmap cannot
72 * We return an error here only if we didn't manage to
73 * memorize the real error. */
84 r
= hashmap_ensure_put(&j
->errors
, &trivial_hash_ops_value_free
, INT_TO_PTR(r
), copy
);
94 static void detach_location(sd_journal
*j
) {
99 j
->current_file
= NULL
;
100 j
->current_field
= 0;
102 ORDERED_HASHMAP_FOREACH(f
, j
->files
)
103 journal_file_reset_location(f
);
106 static void init_location(Location
*l
, LocationType type
, JournalFile
*f
, Object
*o
) {
108 assert(IN_SET(type
, LOCATION_DISCRETE
, LOCATION_SEEK
));
113 .seqnum
= le64toh(o
->entry
.seqnum
),
114 .seqnum_id
= f
->header
->seqnum_id
,
115 .realtime
= le64toh(o
->entry
.realtime
),
116 .monotonic
= le64toh(o
->entry
.monotonic
),
117 .boot_id
= o
->entry
.boot_id
,
118 .xor_hash
= le64toh(o
->entry
.xor_hash
),
120 .realtime_set
= true,
121 .monotonic_set
= true,
122 .xor_hash_set
= true,
126 static void set_location(sd_journal
*j
, JournalFile
*f
, Object
*o
) {
131 init_location(&j
->current_location
, LOCATION_DISCRETE
, f
, o
);
134 j
->current_field
= 0;
136 /* Let f know its candidate entry was picked. */
137 assert(f
->location_type
== LOCATION_SEEK
);
138 f
->location_type
= LOCATION_DISCRETE
;
141 static int match_is_valid(const void *data
, size_t size
) {
142 const char *b
= ASSERT_PTR(data
);
147 if (((char*) data
)[0] == '_' && ((char*) data
)[1] == '_')
150 for (const char *p
= b
; p
< b
+ size
; p
++) {
158 if (*p
>= 'A' && *p
<= 'Z')
161 if (ascii_isdigit(*p
))
170 static bool same_field(const void *_a
, size_t s
, const void *_b
, size_t t
) {
171 const uint8_t *a
= _a
, *b
= _b
;
173 for (size_t j
= 0; j
< s
&& j
< t
; j
++) {
182 assert_not_reached();
185 static Match
*match_new(Match
*p
, MatchType t
) {
198 LIST_PREPEND(matches
, p
->matches
, m
);
203 static Match
*match_free(Match
*m
) {
207 match_free(m
->matches
);
210 LIST_REMOVE(matches
, m
->parent
->matches
, m
);
216 static Match
*match_free_if_empty(Match
*m
) {
217 if (!m
|| m
->matches
)
220 return match_free(m
);
223 _public_
int sd_journal_add_match(sd_journal
*j
, const void *data
, size_t size
) {
224 Match
*add_here
= NULL
, *m
= NULL
;
227 assert_return(j
, -EINVAL
);
228 assert_return(!journal_origin_changed(j
), -ECHILD
);
229 assert_return(data
, -EINVAL
);
231 /* If the size is unspecified, assume it's a string. Note: 0 is the public value we document for
232 * this, for historical reasons. Internally, we pretty widely started using SIZE_MAX for this in
233 * similar cases however, hence accept that too. And internally we actually prefer it, to make things
234 * less surprising. */
235 if (IN_SET(size
, 0, SIZE_MAX
))
238 if (!match_is_valid(data
, size
))
245 * level 4: concrete matches */
248 j
->level0
= match_new(NULL
, MATCH_AND_TERM
);
254 j
->level1
= match_new(j
->level0
, MATCH_OR_TERM
);
260 j
->level2
= match_new(j
->level1
, MATCH_AND_TERM
);
265 assert(j
->level0
->type
== MATCH_AND_TERM
);
266 assert(j
->level1
->type
== MATCH_OR_TERM
);
267 assert(j
->level2
->type
== MATCH_AND_TERM
);
269 /* Old-style Jenkins (unkeyed) hashing only here. We do not cover new-style siphash (keyed) hashing
270 * here, since it's different for each file, and thus can't be pre-calculated in the Match object. */
271 hash
= jenkins_hash64(data
, size
);
273 LIST_FOREACH(matches
, l3
, j
->level2
->matches
) {
274 assert(l3
->type
== MATCH_OR_TERM
);
276 LIST_FOREACH(matches
, l4
, l3
->matches
) {
277 assert(l4
->type
== MATCH_DISCRETE
);
279 /* Exactly the same match already? Then ignore
281 if (l4
->hash
== hash
&&
283 memcmp(l4
->data
, data
, size
) == 0)
286 /* Same field? Then let's add this to this OR term */
287 if (same_field(data
, size
, l4
->data
, l4
->size
)) {
298 add_here
= match_new(j
->level2
, MATCH_OR_TERM
);
303 m
= match_new(add_here
, MATCH_DISCRETE
);
309 m
->data
= memdup(data
, size
);
319 match_free_if_empty(add_here
);
320 j
->level2
= match_free_if_empty(j
->level2
);
321 j
->level1
= match_free_if_empty(j
->level1
);
322 j
->level0
= match_free_if_empty(j
->level0
);
327 int journal_add_match_pair(sd_journal
*j
, const char *field
, const char *value
) {
328 _cleanup_free_
char *s
= NULL
;
334 s
= strjoin(field
, "=", value
);
338 return sd_journal_add_match(j
, s
, SIZE_MAX
);
341 int journal_add_matchf(sd_journal
*j
, const char *format
, ...) {
342 _cleanup_free_
char *s
= NULL
;
349 va_start(ap
, format
);
350 r
= vasprintf(&s
, format
, ap
);
355 return sd_journal_add_match(j
, s
, SIZE_MAX
);
358 _public_
int sd_journal_add_conjunction(sd_journal
*j
) {
359 assert_return(j
, -EINVAL
);
360 assert_return(!journal_origin_changed(j
), -ECHILD
);
368 if (!j
->level1
->matches
)
377 _public_
int sd_journal_add_disjunction(sd_journal
*j
) {
378 assert_return(j
, -EINVAL
);
379 assert_return(!journal_origin_changed(j
), -ECHILD
);
390 if (!j
->level2
->matches
)
397 static char *match_make_string(Match
*m
) {
398 _cleanup_free_
char *p
= NULL
;
399 bool enclose
= false;
402 return strdup("none");
404 if (m
->type
== MATCH_DISCRETE
)
405 return cescape_length(m
->data
, m
->size
);
407 LIST_FOREACH(matches
, i
, m
->matches
) {
408 _cleanup_free_
char *t
= NULL
;
410 t
= match_make_string(i
);
415 if (!strextend(&p
, m
->type
== MATCH_OR_TERM
? " OR " : " AND ", t
))
424 return strjoin("(", p
, ")");
429 char* journal_make_match_string(sd_journal
*j
) {
432 return match_make_string(j
->level0
);
435 _public_
void sd_journal_flush_matches(sd_journal
*j
) {
436 if (!j
|| journal_origin_changed(j
))
440 match_free(j
->level0
);
442 j
->level0
= j
->level1
= j
->level2
= NULL
;
447 static int newest_by_boot_id_compare(const NewestByBootId
*a
, const NewestByBootId
*b
) {
448 return id128_compare_func(&a
->boot_id
, &b
->boot_id
);
451 static void journal_file_unlink_newest_by_boot_id(sd_journal
*j
, JournalFile
*f
) {
452 NewestByBootId
*found
;
457 if (f
->newest_boot_id_prioq_idx
== PRIOQ_IDX_NULL
) /* not linked currently, hence this is a NOP */
460 found
= typesafe_bsearch(&(NewestByBootId
) { .boot_id
= f
->newest_boot_id
},
461 j
->newest_by_boot_id
, j
->n_newest_by_boot_id
, newest_by_boot_id_compare
);
464 assert_se(prioq_remove(found
->prioq
, f
, &f
->newest_boot_id_prioq_idx
) > 0);
465 f
->newest_boot_id_prioq_idx
= PRIOQ_IDX_NULL
;
467 /* The prioq may be empty, but that should not cause any issue. Let's keep it. */
470 static void journal_clear_newest_by_boot_id(sd_journal
*j
) {
471 FOREACH_ARRAY(i
, j
->newest_by_boot_id
, j
->n_newest_by_boot_id
) {
474 while ((f
= prioq_peek(i
->prioq
)))
475 journal_file_unlink_newest_by_boot_id(j
, f
);
477 prioq_free(i
->prioq
);
480 j
->newest_by_boot_id
= mfree(j
->newest_by_boot_id
);
481 j
->n_newest_by_boot_id
= 0;
484 static int journal_file_newest_monotonic_compare(const void *a
, const void *b
) {
485 const JournalFile
*x
= a
, *y
= b
;
487 return -CMP(x
->newest_monotonic_usec
, y
->newest_monotonic_usec
); /* Invert order, we want newest first! */
490 static int journal_file_reshuffle_newest_by_boot_id(sd_journal
*j
, JournalFile
*f
) {
491 NewestByBootId
*found
;
497 found
= typesafe_bsearch(&(NewestByBootId
) { .boot_id
= f
->newest_boot_id
},
498 j
->newest_by_boot_id
, j
->n_newest_by_boot_id
, newest_by_boot_id_compare
);
500 /* There's already a priority queue for this boot ID */
502 if (f
->newest_boot_id_prioq_idx
== PRIOQ_IDX_NULL
) {
503 r
= prioq_put(found
->prioq
, f
, &f
->newest_boot_id_prioq_idx
); /* Insert if we aren't in there yet */
507 prioq_reshuffle(found
->prioq
, f
, &f
->newest_boot_id_prioq_idx
); /* Reshuffle otherwise */
510 _cleanup_(prioq_freep
) Prioq
*q
= NULL
;
512 /* No priority queue yet, then allocate one */
514 assert(f
->newest_boot_id_prioq_idx
== PRIOQ_IDX_NULL
); /* we can't be a member either */
516 q
= prioq_new(journal_file_newest_monotonic_compare
);
520 r
= prioq_put(q
, f
, &f
->newest_boot_id_prioq_idx
);
524 if (!GREEDY_REALLOC(j
->newest_by_boot_id
, j
->n_newest_by_boot_id
+ 1)) {
525 f
->newest_boot_id_prioq_idx
= PRIOQ_IDX_NULL
;
529 j
->newest_by_boot_id
[j
->n_newest_by_boot_id
++] = (NewestByBootId
) {
530 .boot_id
= f
->newest_boot_id
,
531 .prioq
= TAKE_PTR(q
),
534 typesafe_qsort(j
->newest_by_boot_id
, j
->n_newest_by_boot_id
, newest_by_boot_id_compare
);
540 static int journal_file_find_newest_for_boot_id(
545 JournalFile
*prev
= NULL
;
551 /* Before we use it, let's refresh the timestamp from the header, and reshuffle our prioq
552 * accordingly. We do this only a bunch of times, to not be caught in some update loop. */
553 for (unsigned n_tries
= 0;; n_tries
++) {
554 NewestByBootId
*found
;
557 found
= typesafe_bsearch(&(NewestByBootId
) { .boot_id
= id
},
558 j
->newest_by_boot_id
, j
->n_newest_by_boot_id
, newest_by_boot_id_compare
);
560 f
= found
? prioq_peek(found
->prioq
) : NULL
;
562 return log_debug_errno(SYNTHETIC_ERRNO(ENODATA
),
563 "Requested delta for boot ID %s, but we have no information about that boot ID.", SD_ID128_TO_STRING(id
));
565 if (f
== prev
|| n_tries
>= 5) {
566 /* This was already the best answer in the previous run, or we tried too often, use it */
573 /* Let's read the journal file's current timestamp once, before we return it, maybe it has changed. */
574 r
= journal_file_read_tail_timestamp(j
, f
);
576 return log_debug_errno(r
, "Failed to read tail timestamp while trying to find newest journal file for boot ID %s.", SD_ID128_TO_STRING(id
));
578 /* No new entry found. */
583 /* Refreshing the timestamp we read might have reshuffled the prioq, hence let's check the
584 * prioq again and only use the information once we reached an equilibrium or hit a limit */
588 static int compare_boot_ids(sd_journal
*j
, sd_id128_t a
, sd_id128_t b
) {
593 /* Try to find the newest open journal file for the two boot ids */
594 if (journal_file_find_newest_for_boot_id(j
, a
, &x
) < 0 ||
595 journal_file_find_newest_for_boot_id(j
, b
, &y
) < 0)
598 /* Only compare the boot id timestamps if they originate from the same machine. If they are from
599 * different machines, then we timestamps of the boot ids might be as off as the timestamps on the
600 * entries and hence not useful for comparing. */
601 if (!sd_id128_equal(x
->newest_machine_id
, y
->newest_machine_id
))
604 return CMP(x
->newest_realtime_usec
, y
->newest_realtime_usec
);
607 static int compare_with_location(
609 const JournalFile
*f
,
611 const JournalFile
*current_file
) {
617 assert(f
->location_type
== LOCATION_SEEK
);
618 assert(IN_SET(l
->type
, LOCATION_DISCRETE
, LOCATION_SEEK
));
620 if (l
->monotonic_set
&&
621 sd_id128_equal(f
->current_boot_id
, l
->boot_id
) &&
623 f
->current_realtime
== l
->realtime
&&
625 f
->current_xor_hash
== l
->xor_hash
&&
627 sd_id128_equal(f
->header
->seqnum_id
, l
->seqnum_id
) &&
628 f
->current_seqnum
== l
->seqnum
&&
633 sd_id128_equal(f
->header
->seqnum_id
, l
->seqnum_id
)) {
634 r
= CMP(f
->current_seqnum
, l
->seqnum
);
639 if (l
->monotonic_set
) {
640 /* If both arguments have the same boot ID, then we can compare the monotonic timestamps. If
641 * they are distinct, then we might able to lookup the timestamps of those boot IDs (if they
642 * are from the same machine) and order by that. */
643 if (sd_id128_equal(f
->current_boot_id
, l
->boot_id
))
644 r
= CMP(f
->current_monotonic
, l
->monotonic
);
646 r
= compare_boot_ids(j
, f
->current_boot_id
, l
->boot_id
);
651 if (l
->realtime_set
) {
652 r
= CMP(f
->current_realtime
, l
->realtime
);
657 if (l
->xor_hash_set
) {
658 r
= CMP(f
->current_xor_hash
, l
->xor_hash
);
666 static int next_for_match(
670 uint64_t after_offset
,
671 direction_t direction
,
673 uint64_t *ret_offset
) {
682 if (m
->type
== MATCH_DISCRETE
) {
686 /* If the keyed hash logic is used, we need to calculate the hash fresh per file. Otherwise
687 * we can use what we pre-calculated. */
688 if (JOURNAL_HEADER_KEYED_HASH(f
->header
))
689 hash
= journal_file_hash_data(f
, m
->data
, m
->size
);
693 r
= journal_file_find_data_object_with_hash(f
, m
->data
, m
->size
, hash
, &d
, NULL
);
697 return journal_file_move_to_entry_by_offset_for_data(f
, d
, after_offset
, direction
, ret
, ret_offset
);
699 } else if (m
->type
== MATCH_OR_TERM
) {
701 /* Find the earliest match beyond after_offset */
703 LIST_FOREACH(matches
, i
, m
->matches
) {
706 r
= next_for_match(j
, i
, f
, after_offset
, direction
, NULL
, &cp
);
710 if (np
== 0 || (direction
== DIRECTION_DOWN
? cp
< np
: cp
> np
))
718 } else if (m
->type
== MATCH_AND_TERM
) {
721 /* Always jump to the next matching entry and repeat
722 * this until we find an offset that matches for all
728 r
= next_for_match(j
, m
->matches
, f
, after_offset
, direction
, NULL
, &np
);
732 assert(direction
== DIRECTION_DOWN
? np
>= after_offset
: np
<= after_offset
);
733 last_moved
= m
->matches
;
735 LIST_LOOP_BUT_ONE(matches
, i
, m
->matches
, last_moved
) {
738 r
= next_for_match(j
, i
, f
, np
, direction
, NULL
, &cp
);
742 assert(direction
== DIRECTION_DOWN
? cp
>= np
: cp
<= np
);
743 if (direction
== DIRECTION_DOWN
? cp
> np
: cp
< np
) {
753 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, np
, ret
);
764 static int move_by_boot_for_data(
767 direction_t direction
,
769 uint64_t data_offset
,
771 uint64_t *ret_offset
) {
777 assert(IN_SET(direction
, DIRECTION_DOWN
, DIRECTION_UP
));
780 /* First, move to the last (or first when DIRECTION_UP) entry for the boot. */
782 r
= journal_file_move_to_entry_by_monotonic(f
, boot_id
,
783 direction
== DIRECTION_DOWN
? USEC_INFINITY
: 0,
784 direction
== DIRECTION_DOWN
? DIRECTION_UP
: DIRECTION_DOWN
,
789 /* Then, move to the first entry of the next boot (or the last entry of the previous boot with DIRECTION_UP). */
791 r
= journal_file_next_entry(f
, p
, direction
, &entry
, NULL
);
792 if (r
<= 0) /* r == 0 means that no next (or previous) boot found. That is, we are at HEAD or TAIL now. */
795 assert(entry
->object
.type
== OBJECT_ENTRY
);
796 boot_id
= entry
->entry
.boot_id
;
798 /* Note, this object cannot be reused, as journal_file_move_to_entry_by_monotonic() may invalidate the object. */
800 r
= journal_file_move_to_object(f
, OBJECT_DATA
, data_offset
, &data
);
804 /* Then, move to the matching entry. */
805 r
= journal_file_move_to_entry_by_monotonic_for_data(f
, data
, boot_id
,
806 direction
== DIRECTION_DOWN
? 0 : USEC_INFINITY
, direction
,
808 if (r
!= 0) /* Here r == 0 is OK, as that means the boot contains no entry matching with the data. */
813 static int find_location_for_match(
817 direction_t direction
,
819 uint64_t *ret_offset
) {
827 if (m
->type
== MATCH_DISCRETE
) {
831 if (JOURNAL_HEADER_KEYED_HASH(f
->header
))
832 hash
= journal_file_hash_data(f
, m
->data
, m
->size
);
836 r
= journal_file_find_data_object_with_hash(f
, m
->data
, m
->size
, hash
, &d
, &dp
);
840 if (j
->current_location
.type
== LOCATION_HEAD
)
841 return direction
== DIRECTION_DOWN
? journal_file_move_to_entry_for_data(f
, d
, DIRECTION_DOWN
, ret
, ret_offset
) : 0;
842 if (j
->current_location
.type
== LOCATION_TAIL
)
843 return direction
== DIRECTION_UP
? journal_file_move_to_entry_for_data(f
, d
, DIRECTION_UP
, ret
, ret_offset
) : 0;
844 if (j
->current_location
.seqnum_set
&& sd_id128_equal(j
->current_location
.seqnum_id
, f
->header
->seqnum_id
))
845 return journal_file_move_to_entry_by_seqnum_for_data(f
, d
, j
->current_location
.seqnum
, direction
, ret
, ret_offset
);
846 if (j
->current_location
.monotonic_set
) {
847 r
= journal_file_move_to_entry_by_monotonic_for_data(f
, d
, j
->current_location
.boot_id
, j
->current_location
.monotonic
, direction
, ret
, ret_offset
);
851 /* The data object might have been invalidated. */
852 r
= journal_file_move_to_object(f
, OBJECT_DATA
, dp
, &d
);
856 /* If not found, fall back to realtime if set, or go to the first entry of the next boot
857 * (or the last entry of the previous boot when DIRECTION_UP). */
859 if (j
->current_location
.realtime_set
)
860 return journal_file_move_to_entry_by_realtime_for_data(f
, d
, j
->current_location
.realtime
, direction
, ret
, ret_offset
);
862 if (j
->current_location
.monotonic_set
)
863 return move_by_boot_for_data(j
, f
, direction
, j
->current_location
.boot_id
, dp
, ret
, ret_offset
);
865 return journal_file_move_to_entry_for_data(f
, d
, direction
, ret
, ret_offset
);
867 } else if (m
->type
== MATCH_OR_TERM
) {
870 /* Find the earliest match */
872 LIST_FOREACH(matches
, i
, m
->matches
) {
875 r
= find_location_for_match(j
, i
, f
, direction
, NULL
, &cp
);
879 if (np
== 0 || (direction
== DIRECTION_DOWN
? np
> cp
: np
< cp
))
888 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, np
, ret
);
901 assert(m
->type
== MATCH_AND_TERM
);
903 /* First jump to the last match, and then find the
904 * next one where all matches match */
909 LIST_FOREACH(matches
, i
, m
->matches
) {
912 r
= find_location_for_match(j
, i
, f
, direction
, NULL
, &cp
);
916 if (np
== 0 || (direction
== DIRECTION_DOWN
? cp
> np
: cp
< np
))
920 return next_for_match(j
, m
, f
, np
, direction
, ret
, ret_offset
);
924 static int find_location_with_matches(
927 direction_t direction
,
929 uint64_t *ret_offset
) {
937 return find_location_for_match(j
, j
->level0
, f
, direction
, ret
, ret_offset
);
939 /* No matches is simple */
941 if (j
->current_location
.type
== LOCATION_HEAD
)
942 return direction
== DIRECTION_DOWN
? journal_file_next_entry(f
, 0, DIRECTION_DOWN
, ret
, ret_offset
) : 0;
943 if (j
->current_location
.type
== LOCATION_TAIL
)
944 return direction
== DIRECTION_UP
? journal_file_next_entry(f
, 0, DIRECTION_UP
, ret
, ret_offset
) : 0;
945 if (j
->current_location
.seqnum_set
&& sd_id128_equal(j
->current_location
.seqnum_id
, f
->header
->seqnum_id
))
946 return journal_file_move_to_entry_by_seqnum(f
, j
->current_location
.seqnum
, direction
, ret
, ret_offset
);
947 if (j
->current_location
.monotonic_set
) {
948 r
= journal_file_move_to_entry_by_monotonic(f
, j
->current_location
.boot_id
, j
->current_location
.monotonic
, direction
, ret
, ret_offset
);
952 /* If not found, fall back to realtime if set, or go to the first entry of the next boot
953 * (or the last entry of the previous boot when DIRECTION_UP). */
955 if (j
->current_location
.realtime_set
)
956 return journal_file_move_to_entry_by_realtime(f
, j
->current_location
.realtime
, direction
, ret
, ret_offset
);
958 if (j
->current_location
.monotonic_set
) {
961 /* If not found in the above, first move to the last (or first when DIRECTION_UP) entry for the boot. */
962 r
= journal_file_move_to_entry_by_monotonic(f
, j
->current_location
.boot_id
,
963 direction
== DIRECTION_DOWN
? USEC_INFINITY
: 0,
964 direction
== DIRECTION_DOWN
? DIRECTION_UP
: DIRECTION_DOWN
,
969 /* Then, move to the next or previous boot. */
970 return journal_file_next_entry(f
, p
, direction
, ret
, ret_offset
);
973 return journal_file_next_entry(f
, 0, direction
, ret
, ret_offset
);
976 static int next_with_matches(
979 direction_t direction
,
981 uint64_t *ret_offset
) {
986 /* No matches is easy. We simple advance the file
989 return journal_file_next_entry(f
, f
->current_offset
, direction
, ret
, ret_offset
);
991 /* If we have a match then we look for the next matching entry
992 * with an offset at least one step larger */
993 return next_for_match(j
, j
->level0
, f
,
994 direction
== DIRECTION_DOWN
? f
->current_offset
+ 1
995 : f
->current_offset
- 1,
996 direction
, ret
, ret_offset
);
999 static int next_beyond_location(sd_journal
*j
, JournalFile
*f
, direction_t direction
) {
1001 uint64_t cp
, n_entries
;
1007 (void) journal_file_read_tail_timestamp(j
, f
);
1009 n_entries
= le64toh(f
->header
->n_entries
);
1011 /* If we hit EOF before, we don't need to look into this file again
1012 * unless direction changed or new entries appeared. */
1013 if (f
->last_direction
== direction
&&
1014 f
->location_type
== (direction
== DIRECTION_DOWN
? LOCATION_TAIL
: LOCATION_HEAD
) &&
1015 n_entries
== f
->last_n_entries
)
1018 f
->last_n_entries
= n_entries
;
1020 if (f
->last_direction
== direction
&& f
->current_offset
> 0) {
1021 /* LOCATION_SEEK here means we did the work in a previous
1022 * iteration and the current location already points to a
1023 * candidate entry. */
1024 if (f
->location_type
!= LOCATION_SEEK
) {
1025 r
= next_with_matches(j
, f
, direction
, &c
, &cp
);
1029 journal_file_save_location(f
, c
, cp
);
1032 r
= find_location_with_matches(j
, f
, direction
, &c
, &cp
);
1033 /* LOCATION_SEEK specified to j->current_location.type here means that this is called first
1034 * after sd_journal_seek_monotonic_usec() or friends was called. In that case, this file may
1035 * not contain any matching entries with the user-specified location, but another file may
1036 * contain them. If so, the second call of this function will use the seqnum, and we may find
1037 * an entry in _this_ file with the seqnum. To prevent the second call of this function exits
1038 * earlier by the first 'if' block of this function, do not save the direction if the current
1039 * location is LOCATION_SEEK. */
1040 if (r
> 0 || j
->current_location
.type
!= LOCATION_SEEK
)
1041 f
->last_direction
= direction
;
1043 assert(f
->last_direction
== _DIRECTION_INVALID
);
1047 journal_file_save_location(f
, c
, cp
);
1050 /* OK, we found the spot, now let's advance until an entry
1051 * that is actually different from what we were previously
1052 * looking at. This is necessary to handle entries which exist
1053 * in two (or more) journal files, and which shall all be
1054 * suppressed but one. */
1059 if (j
->current_location
.type
== LOCATION_DISCRETE
) {
1060 r
= compare_with_location(j
, f
, &j
->current_location
, j
->current_file
);
1061 found
= direction
== DIRECTION_DOWN
? r
> 0 : r
< 0;
1068 r
= next_with_matches(j
, f
, direction
, &c
, &cp
);
1072 journal_file_save_location(f
, c
, cp
);
1076 static int compare_locations(sd_journal
*j
, JournalFile
*af
, JournalFile
*bf
) {
1084 assert(af
->location_type
== LOCATION_SEEK
);
1085 assert(bf
->location_type
== LOCATION_SEEK
);
1087 /* If contents, timestamps and seqnum match, these entries are identical. */
1088 if (sd_id128_equal(af
->current_boot_id
, bf
->current_boot_id
) &&
1089 af
->current_monotonic
== bf
->current_monotonic
&&
1090 af
->current_realtime
== bf
->current_realtime
&&
1091 af
->current_xor_hash
== bf
->current_xor_hash
&&
1092 sd_id128_equal(af
->header
->seqnum_id
, bf
->header
->seqnum_id
) &&
1093 af
->current_seqnum
== bf
->current_seqnum
)
1096 if (sd_id128_equal(af
->header
->seqnum_id
, bf
->header
->seqnum_id
)) {
1097 /* If this is from the same seqnum source, compare seqnums */
1098 r
= CMP(af
->current_seqnum
, bf
->current_seqnum
);
1102 /* Wow! This is weird, different data but the same seqnums? Something is borked, but let's
1103 * make the best of it and compare by time. */
1106 if (sd_id128_equal(af
->current_boot_id
, bf
->current_boot_id
))
1107 /* If the boot id matches, compare monotonic time */
1108 r
= CMP(af
->current_monotonic
, bf
->current_monotonic
);
1110 /* If they don't match try to compare boot IDs */
1111 r
= compare_boot_ids(j
, af
->current_boot_id
, bf
->current_boot_id
);
1115 /* Otherwise, compare UTC time */
1116 r
= CMP(af
->current_realtime
, bf
->current_realtime
);
1120 /* Finally, compare by contents */
1121 return CMP(af
->current_xor_hash
, bf
->current_xor_hash
);
1124 static int real_journal_next(sd_journal
*j
, direction_t direction
) {
1125 JournalFile
*new_file
= NULL
;
1131 assert_return(j
, -EINVAL
);
1132 assert_return(!journal_origin_changed(j
), -ECHILD
);
1134 r
= iterated_cache_get(j
->files_cache
, NULL
, &files
, &n_files
);
1138 FOREACH_ARRAY(_f
, files
, n_files
) {
1139 JournalFile
*f
= (JournalFile
*) *_f
;
1142 r
= next_beyond_location(j
, f
, direction
);
1144 log_debug_errno(r
, "Can't iterate through %s, ignoring: %m", f
->path
);
1145 remove_file_real(j
, f
);
1147 } else if (r
== 0) {
1148 f
->location_type
= direction
== DIRECTION_DOWN
? LOCATION_TAIL
: LOCATION_HEAD
;
1155 r
= compare_locations(j
, f
, new_file
);
1156 found
= direction
== DIRECTION_DOWN
? r
< 0 : r
> 0;
1166 r
= journal_file_move_to_object(new_file
, OBJECT_ENTRY
, new_file
->current_offset
, &o
);
1170 set_location(j
, new_file
, o
);
1175 _public_
int sd_journal_next(sd_journal
*j
) {
1176 return real_journal_next(j
, DIRECTION_DOWN
);
1179 _public_
int sd_journal_previous(sd_journal
*j
) {
1180 return real_journal_next(j
, DIRECTION_UP
);
1183 _public_
int sd_journal_step_one(sd_journal
*j
, int advanced
) {
1184 assert_return(j
, -EINVAL
);
1186 if (j
->current_location
.type
== LOCATION_HEAD
)
1187 return sd_journal_next(j
);
1188 if (j
->current_location
.type
== LOCATION_TAIL
)
1189 return sd_journal_previous(j
);
1190 return real_journal_next(j
, advanced
? DIRECTION_DOWN
: DIRECTION_UP
);
1193 static int real_journal_next_skip(sd_journal
*j
, direction_t direction
, uint64_t skip
) {
1196 assert_return(j
, -EINVAL
);
1197 assert_return(!journal_origin_changed(j
), -ECHILD
);
1198 assert_return(skip
<= INT_MAX
, -ERANGE
);
1201 /* If this is not a discrete skip, then at least
1202 * resolve the current location */
1203 if (j
->current_location
.type
!= LOCATION_DISCRETE
) {
1204 r
= real_journal_next(j
, direction
);
1213 r
= real_journal_next(j
, direction
);
1227 _public_
int sd_journal_next_skip(sd_journal
*j
, uint64_t skip
) {
1228 return real_journal_next_skip(j
, DIRECTION_DOWN
, skip
);
1231 _public_
int sd_journal_previous_skip(sd_journal
*j
, uint64_t skip
) {
1232 return real_journal_next_skip(j
, DIRECTION_UP
, skip
);
1235 _public_
int sd_journal_get_cursor(sd_journal
*j
, char **ret_cursor
) {
1239 assert_return(j
, -EINVAL
);
1240 assert_return(!journal_origin_changed(j
), -ECHILD
);
1242 if (!j
->current_file
|| j
->current_file
->current_offset
<= 0)
1243 return -EADDRNOTAVAIL
;
1245 r
= journal_file_move_to_object(j
->current_file
, OBJECT_ENTRY
, j
->current_file
->current_offset
, &o
);
1252 if (asprintf(ret_cursor
,
1253 "s=%s;i=%"PRIx64
";b=%s;m=%"PRIx64
";t=%"PRIx64
";x=%"PRIx64
,
1254 SD_ID128_TO_STRING(j
->current_file
->header
->seqnum_id
), le64toh(o
->entry
.seqnum
),
1255 SD_ID128_TO_STRING(o
->entry
.boot_id
), le64toh(o
->entry
.monotonic
),
1256 le64toh(o
->entry
.realtime
),
1257 le64toh(o
->entry
.xor_hash
)) < 0)
1263 _public_
int sd_journal_seek_cursor(sd_journal
*j
, const char *cursor
) {
1264 unsigned long long seqnum
, monotonic
, realtime
, xor_hash
;
1265 bool seqnum_id_set
= false,
1267 boot_id_set
= false,
1268 monotonic_set
= false,
1269 realtime_set
= false,
1270 xor_hash_set
= false;
1271 sd_id128_t seqnum_id
, boot_id
;
1274 assert_return(j
, -EINVAL
);
1275 assert_return(!journal_origin_changed(j
), -ECHILD
);
1276 assert_return(!isempty(cursor
), -EINVAL
);
1278 for (const char *p
= cursor
;;) {
1279 _cleanup_free_
char *word
= NULL
;
1281 r
= extract_first_word(&p
, &word
, ";", EXTRACT_DONT_COALESCE_SEPARATORS
);
1287 if (word
[0] == '\0' || word
[1] != '=')
1292 seqnum_id_set
= true;
1293 r
= sd_id128_from_string(word
+ 2, &seqnum_id
);
1300 if (sscanf(word
+ 2, "%llx", &seqnum
) != 1)
1306 r
= sd_id128_from_string(word
+ 2, &boot_id
);
1312 monotonic_set
= true;
1313 if (sscanf(word
+ 2, "%llx", &monotonic
) != 1)
1318 realtime_set
= true;
1319 if (sscanf(word
+ 2, "%llx", &realtime
) != 1)
1324 xor_hash_set
= true;
1325 if (sscanf(word
+ 2, "%llx", &xor_hash
) != 1)
1331 if ((!seqnum_set
|| !seqnum_id_set
) &&
1332 (!monotonic_set
|| !boot_id_set
) &&
1337 j
->current_location
= (Location
) {
1338 .type
= LOCATION_SEEK
,
1342 j
->current_location
.realtime
= (uint64_t) realtime
;
1343 j
->current_location
.realtime_set
= true;
1346 if (seqnum_set
&& seqnum_id_set
) {
1347 j
->current_location
.seqnum
= (uint64_t) seqnum
;
1348 j
->current_location
.seqnum_id
= seqnum_id
;
1349 j
->current_location
.seqnum_set
= true;
1352 if (monotonic_set
&& boot_id_set
) {
1353 j
->current_location
.monotonic
= (uint64_t) monotonic
;
1354 j
->current_location
.boot_id
= boot_id
;
1355 j
->current_location
.monotonic_set
= true;
1359 j
->current_location
.xor_hash
= (uint64_t) xor_hash
;
1360 j
->current_location
.xor_hash_set
= true;
1366 _public_
int sd_journal_test_cursor(sd_journal
*j
, const char *cursor
) {
1370 assert_return(j
, -EINVAL
);
1371 assert_return(!journal_origin_changed(j
), -ECHILD
);
1372 assert_return(!isempty(cursor
), -EINVAL
);
1374 if (!j
->current_file
|| j
->current_file
->current_offset
<= 0)
1375 return -EADDRNOTAVAIL
;
1377 r
= journal_file_move_to_object(j
->current_file
, OBJECT_ENTRY
, j
->current_file
->current_offset
, &o
);
1382 _cleanup_free_
char *item
= NULL
;
1383 unsigned long long ll
;
1386 r
= extract_first_word(&cursor
, &item
, ";", EXTRACT_DONT_COALESCE_SEPARATORS
);
1393 if (strlen(item
) < 2 || item
[1] != '=')
1399 r
= sd_id128_from_string(item
+2, &id
);
1402 if (!sd_id128_equal(id
, j
->current_file
->header
->seqnum_id
))
1407 if (sscanf(item
+2, "%llx", &ll
) != 1)
1409 if (ll
!= le64toh(o
->entry
.seqnum
))
1414 r
= sd_id128_from_string(item
+2, &id
);
1417 if (!sd_id128_equal(id
, o
->entry
.boot_id
))
1422 if (sscanf(item
+2, "%llx", &ll
) != 1)
1424 if (ll
!= le64toh(o
->entry
.monotonic
))
1429 if (sscanf(item
+2, "%llx", &ll
) != 1)
1431 if (ll
!= le64toh(o
->entry
.realtime
))
1436 if (sscanf(item
+2, "%llx", &ll
) != 1)
1438 if (ll
!= le64toh(o
->entry
.xor_hash
))
1447 _public_
int sd_journal_seek_monotonic_usec(sd_journal
*j
, sd_id128_t boot_id
, uint64_t usec
) {
1448 assert_return(j
, -EINVAL
);
1449 assert_return(!journal_origin_changed(j
), -ECHILD
);
1453 j
->current_location
= (Location
) {
1454 .type
= LOCATION_SEEK
,
1457 .monotonic_set
= true,
1463 _public_
int sd_journal_seek_realtime_usec(sd_journal
*j
, uint64_t usec
) {
1464 assert_return(j
, -EINVAL
);
1465 assert_return(!journal_origin_changed(j
), -ECHILD
);
1469 j
->current_location
= (Location
) {
1470 .type
= LOCATION_SEEK
,
1472 .realtime_set
= true,
1478 _public_
int sd_journal_seek_head(sd_journal
*j
) {
1479 assert_return(j
, -EINVAL
);
1480 assert_return(!journal_origin_changed(j
), -ECHILD
);
1484 j
->current_location
= (Location
) {
1485 .type
= LOCATION_HEAD
,
1491 _public_
int sd_journal_seek_tail(sd_journal
*j
) {
1492 assert_return(j
, -EINVAL
);
1493 assert_return(!journal_origin_changed(j
), -ECHILD
);
1497 j
->current_location
= (Location
) {
1498 .type
= LOCATION_TAIL
,
1504 static void check_network(sd_journal
*j
, int fd
) {
1510 j
->on_network
= fd_is_network_fs(fd
);
1513 static bool file_has_type_prefix(const char *prefix
, const char *filename
) {
1514 const char *full
, *tilded
, *atted
;
1516 full
= strjoina(prefix
, ".journal");
1517 tilded
= strjoina(full
, "~");
1518 atted
= strjoina(prefix
, "@");
1520 return STR_IN_SET(filename
, full
, tilded
) ||
1521 startswith(filename
, atted
);
1524 static bool file_type_wanted(int flags
, const char *filename
) {
1527 if (!ENDSWITH_SET(filename
, ".journal", ".journal~"))
1530 /* no flags set → every type is OK */
1531 if (!(flags
& (SD_JOURNAL_SYSTEM
| SD_JOURNAL_CURRENT_USER
)))
1534 if (FLAGS_SET(flags
, SD_JOURNAL_CURRENT_USER
)) {
1535 char prefix
[5 + DECIMAL_STR_MAX(uid_t
) + 1];
1537 xsprintf(prefix
, "user-" UID_FMT
, getuid());
1539 if (file_has_type_prefix(prefix
, filename
))
1542 /* If SD_JOURNAL_CURRENT_USER is specified and we are invoked under a system UID, then
1543 * automatically enable SD_JOURNAL_SYSTEM too, because journald will actually put system user
1544 * data into the system journal. */
1546 if (uid_for_system_journal(getuid()))
1547 flags
|= SD_JOURNAL_SYSTEM
;
1550 if (FLAGS_SET(flags
, SD_JOURNAL_SYSTEM
) && file_has_type_prefix("system", filename
))
1556 static bool path_has_prefix(sd_journal
*j
, const char *path
, const char *prefix
) {
1561 if (j
->toplevel_fd
>= 0)
1564 return path_startswith(path
, prefix
);
1567 static void track_file_disposition(sd_journal
*j
, JournalFile
*f
) {
1571 if (!j
->has_runtime_files
&& path_has_prefix(j
, f
->path
, "/run"))
1572 j
->has_runtime_files
= true;
1573 else if (!j
->has_persistent_files
&& path_has_prefix(j
, f
->path
, "/var"))
1574 j
->has_persistent_files
= true;
1577 static int add_any_file(
1582 _cleanup_close_
int our_fd
= -EBADF
;
1588 assert(fd
>= 0 || path
);
1591 assert(path
); /* For gcc. */
1592 if (j
->toplevel_fd
>= 0)
1593 /* If there's a top-level fd defined make the path relative, explicitly, since otherwise
1594 * openat() ignores the first argument. */
1596 fd
= our_fd
= openat(j
->toplevel_fd
, skip_leading_slash(path
), O_RDONLY
|O_CLOEXEC
|O_NONBLOCK
);
1598 fd
= our_fd
= open(path
, O_RDONLY
|O_CLOEXEC
|O_NONBLOCK
);
1600 r
= log_debug_errno(errno
, "Failed to open journal file %s: %m", path
);
1604 r
= fd_nonblock(fd
, false);
1606 r
= log_debug_errno(errno
, "Failed to turn off O_NONBLOCK for %s: %m", path
);
1611 if (fstat(fd
, &st
) < 0) {
1612 r
= log_debug_errno(errno
, "Failed to fstat %s: %m", path
?: "fd");
1616 r
= stat_verify_regular(&st
);
1618 log_debug_errno(r
, "Refusing to open %s: %m", path
?: "fd");
1623 f
= ordered_hashmap_get(j
->files
, path
);
1625 if (stat_inode_same(&f
->last_stat
, &st
)) {
1626 /* We already track this file, under the same path and with the same
1627 * device/inode numbers, it's hence really the same. Mark this file as seen
1628 * in this generation. This is used to GC old files in process_q_overflow()
1629 * to detect journal files that are still there and discern them from those
1630 * which are gone. */
1632 f
->last_seen_generation
= j
->generation
;
1633 (void) journal_file_read_tail_timestamp(j
, f
);
1637 /* So we tracked a file under this name, but it has a different inode/device. In that
1638 * case, it got replaced (probably due to rotation?), let's drop it hence from our
1640 remove_file_real(j
, f
);
1645 if (ordered_hashmap_size(j
->files
) >= JOURNAL_FILES_MAX
) {
1646 r
= log_debug_errno(SYNTHETIC_ERRNO(ETOOMANYREFS
),
1647 "Too many open journal files, not adding %s.", path
?: "fd");
1651 r
= journal_file_open(fd
, path
, O_RDONLY
, 0, 0, 0, NULL
, j
->mmap
, NULL
, &f
);
1653 log_debug_errno(r
, "Failed to open journal file %s: %m", path
?: "from fd");
1657 /* journal_file_dump(f); */
1659 /* journal_file_open() generates an replacement fname if necessary, so we can use f->path. */
1660 r
= ordered_hashmap_put(j
->files
, f
->path
, f
);
1662 f
->close_fd
= false; /* Make sure journal_file_close() doesn't close the caller's fd
1663 * (or our own). The caller or we will do that ourselves. */
1664 (void) journal_file_close(f
);
1668 TAKE_FD(our_fd
); /* the fd is now owned by the JournalFile object */
1670 f
->last_seen_generation
= j
->generation
;
1672 track_file_disposition(j
, f
);
1673 check_network(j
, f
->fd
);
1674 (void) journal_file_read_tail_timestamp(j
, f
);
1676 j
->current_invalidate_counter
++;
1678 log_debug("File %s added.", f
->path
);
1683 (void) journal_put_error(j
, r
, path
); /* path==NULL is OK. */
1687 int journal_get_directories(sd_journal
*j
, char ***ret
) {
1688 _cleanup_strv_free_
char **paths
= NULL
;
1691 size_t n
= SIZE_MAX
;
1697 /* This returns parent directories of opened journal files. */
1699 ORDERED_HASHMAP_FOREACH_KEY(f
, p
, j
->files
) {
1700 _cleanup_free_
char *d
= NULL
;
1702 /* Ignore paths generated from fd. */
1703 if (path_startswith(p
, "/proc/"))
1706 r
= path_extract_directory(p
, &d
);
1710 if (path_strv_contains(paths
, d
))
1713 r
= strv_extend_with_size(&paths
, &n
, d
);
1718 *ret
= TAKE_PTR(paths
);
1722 static int add_file_by_name(
1725 const char *filename
) {
1727 _cleanup_free_
char *path
= NULL
;
1733 if (j
->no_new_files
)
1736 if (!file_type_wanted(j
->flags
, filename
))
1739 path
= path_join(prefix
, filename
);
1743 return add_any_file(j
, -1, path
);
1746 static int remove_file_by_name(
1749 const char *filename
) {
1751 _cleanup_free_
char *path
= NULL
;
1758 path
= path_join(prefix
, filename
);
1762 f
= ordered_hashmap_get(j
->files
, path
);
1766 remove_file_real(j
, f
);
1770 static void remove_file_real(sd_journal
*j
, JournalFile
*f
) {
1774 (void) ordered_hashmap_remove(j
->files
, f
->path
);
1776 log_debug("File %s removed.", f
->path
);
1778 if (j
->current_file
== f
) {
1779 j
->current_file
= NULL
;
1780 j
->current_field
= 0;
1783 if (j
->unique_file
== f
) {
1784 /* Jump to the next unique_file or NULL if that one was last */
1785 j
->unique_file
= ordered_hashmap_next(j
->files
, j
->unique_file
->path
);
1786 j
->unique_offset
= 0;
1787 if (!j
->unique_file
)
1788 j
->unique_file_lost
= true;
1791 if (j
->fields_file
== f
) {
1792 j
->fields_file
= ordered_hashmap_next(j
->files
, j
->fields_file
->path
);
1793 j
->fields_offset
= 0;
1794 if (!j
->fields_file
)
1795 j
->fields_file_lost
= true;
1798 journal_file_unlink_newest_by_boot_id(j
, f
);
1799 (void) journal_file_close(f
);
1801 j
->current_invalidate_counter
++;
1804 static int dirname_is_machine_id(const char *fn
) {
1805 sd_id128_t id
, machine
;
1809 /* Returns true if the specified directory name matches the local machine ID */
1811 r
= sd_id128_get_machine(&machine
);
1815 e
= strchr(fn
, '.');
1819 /* Looks like it has a namespace suffix. Verify that. */
1820 if (!log_namespace_name_valid(e
+ 1))
1823 k
= strndupa_safe(fn
, e
- fn
);
1824 r
= sd_id128_from_string(k
, &id
);
1826 r
= sd_id128_from_string(fn
, &id
);
1830 return sd_id128_equal(id
, machine
);
1833 static int dirname_has_namespace(const char *fn
, const char *namespace) {
1836 /* Returns true if the specified directory name matches the specified namespace */
1838 e
= strchr(fn
, '.');
1845 if (!streq(e
+ 1, namespace))
1848 k
= strndupa_safe(fn
, e
- fn
);
1849 return id128_is_valid(k
);
1855 return id128_is_valid(fn
);
1858 static bool dirent_is_journal_file(const struct dirent
*de
) {
1861 /* Returns true if the specified directory entry looks like a journal file we might be interested in */
1863 if (!IN_SET(de
->d_type
, DT_REG
, DT_LNK
, DT_UNKNOWN
))
1866 return endswith(de
->d_name
, ".journal") ||
1867 endswith(de
->d_name
, ".journal~");
1870 static bool dirent_is_journal_subdir(const struct dirent
*de
) {
1874 /* returns true if the specified directory entry looks like a directory that might contain journal
1875 * files we might be interested in, i.e. is either a 128-bit ID or a 128-bit ID suffixed by a
1878 if (!IN_SET(de
->d_type
, DT_DIR
, DT_LNK
, DT_UNKNOWN
))
1881 e
= strchr(de
->d_name
, '.');
1883 return id128_is_valid(de
->d_name
); /* No namespace */
1885 n
= strndupa_safe(de
->d_name
, e
- de
->d_name
);
1886 if (!id128_is_valid(n
))
1889 return log_namespace_name_valid(e
+ 1);
1892 static int directory_open(sd_journal
*j
, const char *path
, DIR **ret
) {
1899 if (j
->toplevel_fd
< 0)
1902 /* Open the specified directory relative to the toplevel fd. Enforce that the path specified is
1903 * relative, by dropping the initial slash */
1904 d
= xopendirat(j
->toplevel_fd
, skip_leading_slash(path
), 0);
1912 static Directory
* directory_free(Directory
*d
) {
1918 hashmap_remove_value(d
->journal
->directories_by_wd
, INT_TO_PTR(d
->wd
), d
) &&
1919 d
->journal
->inotify_fd
>= 0)
1920 (void) inotify_rm_watch(d
->journal
->inotify_fd
, d
->wd
);
1923 hashmap_remove_value(d
->journal
->directories_by_path
, d
->path
, d
);
1928 log_debug("Root directory %s removed.", d
->path
);
1930 log_debug("Directory %s removed.", d
->path
);
1938 DEFINE_TRIVIAL_CLEANUP_FUNC(Directory
*, directory_free
);
1940 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
1941 directories_by_path_hash_ops
,
1948 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
1949 directories_by_wd_hash_ops
,
1952 trivial_compare_func
,
1956 static int add_directory_impl(sd_journal
*j
, const char *path
, bool is_root
, Directory
**ret
) {
1957 _cleanup_(directory_freep
) Directory
*m
= NULL
;
1958 Directory
*existing
;
1965 existing
= hashmap_get(j
->directories_by_path
, path
);
1967 if (existing
->is_root
!= is_root
) {
1968 /* Don't 'downgrade' from root directory */
1977 m
= new(Directory
, 1);
1984 .path
= strdup(path
),
1991 r
= hashmap_ensure_put(&j
->directories_by_path
, &directories_by_path_hash_ops
, m
->path
, m
);
1995 j
->current_invalidate_counter
++;
1998 log_debug("Root directory %s added.", m
->path
);
2000 log_debug("Directory %s added.", m
->path
);
2006 static int add_directory(sd_journal
*j
, const char *prefix
, const char *dirname
);
2008 static void directory_enumerate(sd_journal
*j
, Directory
*m
, DIR *d
) {
2013 FOREACH_DIRENT_ALL(de
, d
, goto fail
) {
2014 if (dirent_is_journal_file(de
))
2015 (void) add_file_by_name(j
, m
->path
, de
->d_name
);
2017 if (m
->is_root
&& dirent_is_journal_subdir(de
))
2018 (void) add_directory(j
, m
->path
, de
->d_name
);
2023 log_debug_errno(errno
, "Failed to enumerate directory %s, ignoring: %m", m
->path
);
2026 static void directory_watch(sd_journal
*j
, Directory
*m
, int fd
, uint32_t mask
) {
2033 /* Watch this directory if that's enabled and if it not being watched yet. */
2035 if (m
->wd
> 0) /* Already have a watch? */
2037 if (j
->inotify_fd
< 0) /* Not watching at all? */
2040 m
->wd
= inotify_add_watch_fd(j
->inotify_fd
, fd
, mask
);
2042 log_debug_errno(m
->wd
, "Failed to watch journal directory '%s', ignoring: %m", m
->path
);
2046 r
= hashmap_ensure_put(&j
->directories_by_wd
, &directories_by_wd_hash_ops
, INT_TO_PTR(m
->wd
), m
);
2049 log_debug_errno(r
, "Directory '%s' already being watched under a different path, ignoring: %m", m
->path
);
2051 log_debug_errno(r
, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m
->path
);
2052 (void) inotify_rm_watch(j
->inotify_fd
, m
->wd
);
2058 static int add_directory(
2061 const char *dirname
) {
2063 _cleanup_free_
char *path
= NULL
;
2064 _cleanup_closedir_
DIR *d
= NULL
;
2071 /* Adds a journal file directory to watch. If the directory is already tracked this updates the inotify watch
2072 * and reenumerates directory contents */
2074 path
= path_join(prefix
, dirname
);
2080 log_debug("Considering directory '%s'.", path
);
2082 /* We consider everything local that is in a directory for the local machine ID, or that is stored in /run */
2083 if ((j
->flags
& SD_JOURNAL_LOCAL_ONLY
) &&
2084 !((dirname
&& dirname_is_machine_id(dirname
) > 0) || path_has_prefix(j
, path
, "/run")))
2088 (!(FLAGS_SET(j
->flags
, SD_JOURNAL_ALL_NAMESPACES
) ||
2089 dirname_has_namespace(dirname
, j
->namespace) > 0 ||
2090 (FLAGS_SET(j
->flags
, SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE
) && dirname_has_namespace(dirname
, NULL
) > 0))))
2093 r
= directory_open(j
, path
, &d
);
2095 log_debug_errno(r
, "Failed to open directory '%s': %m", path
);
2099 r
= add_directory_impl(j
, path
, /* is_root = */ false, &m
);
2105 m
->last_seen_generation
= j
->generation
;
2107 directory_watch(j
, m
, dirfd(d
),
2108 IN_CREATE
|IN_MOVED_TO
|IN_MODIFY
|IN_ATTRIB
|IN_DELETE
|
2109 IN_DELETE_SELF
|IN_MOVE_SELF
|IN_UNMOUNT
|IN_MOVED_FROM
|
2112 if (!j
->no_new_files
)
2113 directory_enumerate(j
, m
, d
);
2115 check_network(j
, dirfd(d
));
2120 k
= journal_put_error(j
, r
, path
?: prefix
);
2127 static int add_root_directory(sd_journal
*j
, const char *p
, bool missing_ok
) {
2129 _cleanup_closedir_
DIR *d
= NULL
;
2135 /* Adds a root directory to our set of directories to use. If the root directory is already in the set, we
2136 * update the inotify logic, and renumerate the directory entries. This call may hence be called to initially
2137 * populate the set, as well as to update it later. */
2140 /* If there's a path specified, use it. */
2142 log_debug("Considering root directory '%s'.", p
);
2144 if ((j
->flags
& SD_JOURNAL_RUNTIME_ONLY
) &&
2145 !path_has_prefix(j
, p
, "/run"))
2149 p
= strjoina(j
->prefix
, p
);
2151 r
= directory_open(j
, p
, &d
);
2152 if (r
== -ENOENT
&& missing_ok
)
2155 log_debug_errno(r
, "Failed to open root directory %s: %m", p
);
2159 _cleanup_close_
int dfd
= -EBADF
;
2161 /* If there's no path specified, then we use the top-level fd itself. We duplicate the fd here, since
2162 * opendir() will take possession of the fd, and close it, which we don't want. */
2164 p
= "."; /* store this as "." in the directories hashmap */
2166 dfd
= fcntl(j
->toplevel_fd
, F_DUPFD_CLOEXEC
, 3);
2172 d
= take_fdopendir(&dfd
);
2181 r
= add_directory_impl(j
, p
, /* is_root = */ true, &m
);
2187 directory_watch(j
, m
, dirfd(d
),
2188 IN_CREATE
|IN_MOVED_TO
|IN_MODIFY
|IN_ATTRIB
|IN_DELETE
|
2191 if (!j
->no_new_files
)
2192 directory_enumerate(j
, m
, d
);
2194 check_network(j
, dirfd(d
));
2199 k
= journal_put_error(j
, r
, p
);
2206 static int add_search_paths(sd_journal
*j
) {
2208 static const char search_paths
[] =
2209 "/run/log/journal\0"
2210 "/var/log/journal\0";
2214 /* We ignore most errors here, since the idea is to only open
2215 * what's actually accessible, and ignore the rest. */
2217 NULSTR_FOREACH(p
, search_paths
)
2218 (void) add_root_directory(j
, p
, true);
2220 if (!(j
->flags
& SD_JOURNAL_LOCAL_ONLY
))
2221 (void) add_root_directory(j
, "/var/log/journal/remote", true);
2226 static int add_current_paths(sd_journal
*j
) {
2230 assert(j
->no_new_files
);
2232 /* Simply adds all directories for files we have open as directories. We don't expect errors here, so we
2233 * treat them as fatal. */
2235 ORDERED_HASHMAP_FOREACH(f
, j
->files
) {
2236 _cleanup_free_
char *dir
= NULL
;
2239 r
= path_extract_directory(f
->path
, &dir
);
2243 r
= add_directory(j
, dir
, NULL
);
2251 static int allocate_inotify(sd_journal
*j
) {
2254 if (j
->inotify_fd
< 0) {
2255 j
->inotify_fd
= inotify_init1(IN_NONBLOCK
|IN_CLOEXEC
);
2256 if (j
->inotify_fd
< 0)
2263 static sd_journal
*journal_new(int flags
, const char *path
, const char *namespace) {
2264 _cleanup_(sd_journal_closep
) sd_journal
*j
= NULL
;
2266 j
= new(sd_journal
, 1);
2271 .origin_id
= origin_id_query(),
2272 .toplevel_fd
= -EBADF
,
2273 .inotify_fd
= -EBADF
,
2275 .data_threshold
= DEFAULT_DATA_THRESHOLD
,
2285 if (flags
& SD_JOURNAL_OS_ROOT
)
2292 j
->namespace = strdup(namespace);
2297 j
->files
= ordered_hashmap_new(&journal_file_hash_ops_by_path
);
2301 j
->files_cache
= ordered_hashmap_iterated_cache_new(j
->files
);
2302 j
->mmap
= mmap_cache_new();
2303 if (!j
->files_cache
|| !j
->mmap
)
2309 #define OPEN_ALLOWED_FLAGS \
2310 (SD_JOURNAL_LOCAL_ONLY | \
2311 SD_JOURNAL_RUNTIME_ONLY | \
2312 SD_JOURNAL_SYSTEM | \
2313 SD_JOURNAL_CURRENT_USER | \
2314 SD_JOURNAL_ALL_NAMESPACES | \
2315 SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE | \
2316 SD_JOURNAL_ASSUME_IMMUTABLE)
2318 _public_
int sd_journal_open_namespace(sd_journal
**ret
, const char *namespace, int flags
) {
2319 _cleanup_(sd_journal_closep
) sd_journal
*j
= NULL
;
2322 assert_return(ret
, -EINVAL
);
2323 assert_return((flags
& ~OPEN_ALLOWED_FLAGS
) == 0, -EINVAL
);
2325 j
= journal_new(flags
, NULL
, namespace);
2329 r
= add_search_paths(j
);
2337 _public_
int sd_journal_open(sd_journal
**ret
, int flags
) {
2338 return sd_journal_open_namespace(ret
, NULL
, flags
);
2341 #define OPEN_CONTAINER_ALLOWED_FLAGS \
2342 (SD_JOURNAL_LOCAL_ONLY | \
2343 SD_JOURNAL_SYSTEM | \
2344 SD_JOURNAL_ASSUME_IMMUTABLE)
2346 _public_
int sd_journal_open_container(sd_journal
**ret
, const char *machine
, int flags
) {
2347 _cleanup_free_
char *root
= NULL
, *class = NULL
;
2348 _cleanup_(sd_journal_closep
) sd_journal
*j
= NULL
;
2352 /* This is deprecated, people should use machined's OpenMachineRootDirectory() call instead in
2353 * combination with sd_journal_open_directory_fd(). */
2355 assert_return(machine
, -EINVAL
);
2356 assert_return(ret
, -EINVAL
);
2357 assert_return((flags
& ~OPEN_CONTAINER_ALLOWED_FLAGS
) == 0, -EINVAL
);
2358 assert_return(hostname_is_valid(machine
, 0), -EINVAL
);
2360 p
= strjoina("/run/systemd/machines/", machine
);
2361 r
= parse_env_file(NULL
, p
,
2371 if (!streq_ptr(class, "container"))
2374 j
= journal_new(flags
, root
, NULL
);
2378 r
= add_search_paths(j
);
2386 #define OPEN_DIRECTORY_ALLOWED_FLAGS \
2387 (SD_JOURNAL_OS_ROOT | \
2388 SD_JOURNAL_SYSTEM | \
2389 SD_JOURNAL_CURRENT_USER | \
2390 SD_JOURNAL_ASSUME_IMMUTABLE)
2392 _public_
int sd_journal_open_directory(sd_journal
**ret
, const char *path
, int flags
) {
2393 _cleanup_(sd_journal_closep
) sd_journal
*j
= NULL
;
2396 assert_return(ret
, -EINVAL
);
2397 assert_return(path
, -EINVAL
);
2398 assert_return((flags
& ~OPEN_DIRECTORY_ALLOWED_FLAGS
) == 0, -EINVAL
);
2400 j
= journal_new(flags
, path
, NULL
);
2404 if (flags
& SD_JOURNAL_OS_ROOT
)
2405 r
= add_search_paths(j
);
2407 r
= add_root_directory(j
, path
, false);
2415 #define OPEN_FILES_ALLOWED_FLAGS \
2416 (SD_JOURNAL_ASSUME_IMMUTABLE)
2418 _public_
int sd_journal_open_files(sd_journal
**ret
, const char **paths
, int flags
) {
2419 _cleanup_(sd_journal_closep
) sd_journal
*j
= NULL
;
2422 assert_return(ret
, -EINVAL
);
2423 assert_return((flags
& ~OPEN_FILES_ALLOWED_FLAGS
) == 0, -EINVAL
);
2425 j
= journal_new(flags
, NULL
, NULL
);
2429 STRV_FOREACH(path
, paths
) {
2430 r
= add_any_file(j
, -1, *path
);
2435 j
->no_new_files
= true;
2441 #define OPEN_DIRECTORY_FD_ALLOWED_FLAGS \
2442 (SD_JOURNAL_OS_ROOT | \
2443 SD_JOURNAL_SYSTEM | \
2444 SD_JOURNAL_CURRENT_USER | \
2445 SD_JOURNAL_TAKE_DIRECTORY_FD | \
2446 SD_JOURNAL_ASSUME_IMMUTABLE)
2448 _public_
int sd_journal_open_directory_fd(sd_journal
**ret
, int fd
, int flags
) {
2449 _cleanup_(sd_journal_closep
) sd_journal
*j
= NULL
;
2454 assert_return(ret
, -EINVAL
);
2455 assert_return(fd
>= 0, -EBADF
);
2456 assert_return((flags
& ~OPEN_DIRECTORY_FD_ALLOWED_FLAGS
) == 0, -EINVAL
);
2458 if (fstat(fd
, &st
) < 0)
2461 if (!S_ISDIR(st
.st_mode
))
2464 take_fd
= FLAGS_SET(flags
, SD_JOURNAL_TAKE_DIRECTORY_FD
);
2465 j
= journal_new(flags
& ~SD_JOURNAL_TAKE_DIRECTORY_FD
, NULL
, NULL
);
2469 j
->toplevel_fd
= fd
;
2471 if (flags
& SD_JOURNAL_OS_ROOT
)
2472 r
= add_search_paths(j
);
2474 r
= add_root_directory(j
, NULL
, false);
2478 SET_FLAG(j
->flags
, SD_JOURNAL_TAKE_DIRECTORY_FD
, take_fd
);
2484 #define OPEN_FILES_FD_ALLOWED_FLAGS \
2485 (SD_JOURNAL_ASSUME_IMMUTABLE)
2487 _public_
int sd_journal_open_files_fd(sd_journal
**ret
, int fds
[], unsigned n_fds
, int flags
) {
2489 _cleanup_(sd_journal_closep
) sd_journal
*j
= NULL
;
2492 assert_return(ret
, -EINVAL
);
2493 assert_return(n_fds
> 0, -EBADF
);
2494 assert_return((flags
& ~OPEN_FILES_FD_ALLOWED_FLAGS
) == 0, -EINVAL
);
2496 j
= journal_new(flags
, NULL
, NULL
);
2500 for (unsigned i
= 0; i
< n_fds
; i
++) {
2508 if (fstat(fds
[i
], &st
) < 0) {
2513 r
= stat_verify_regular(&st
);
2517 r
= add_any_file(j
, fds
[i
], NULL
);
2522 j
->no_new_files
= true;
2523 j
->no_inotify
= true;
2529 /* If we fail, make sure we don't take possession of the files we managed to make use of successfully, and they
2531 ORDERED_HASHMAP_FOREACH(f
, j
->files
)
2532 f
->close_fd
= false;
2537 _public_
void sd_journal_close(sd_journal
*j
) {
2538 if (!j
|| journal_origin_changed(j
))
2541 journal_clear_newest_by_boot_id(j
);
2543 sd_journal_flush_matches(j
);
2545 /* log stats before closing files so we can see the windows state */
2547 mmap_cache_stats_log_debug(j
->mmap
);
2549 ordered_hashmap_free(j
->files
);
2550 iterated_cache_free(j
->files_cache
);
2552 hashmap_free(j
->directories_by_path
);
2553 hashmap_free(j
->directories_by_wd
);
2555 if (FLAGS_SET(j
->flags
, SD_JOURNAL_TAKE_DIRECTORY_FD
))
2556 safe_close(j
->toplevel_fd
);
2558 safe_close(j
->inotify_fd
);
2561 mmap_cache_unref(j
->mmap
);
2563 hashmap_free(j
->errors
);
2565 set_free(j
->exclude_syslog_identifiers
);
2570 free(j
->unique_field
);
2571 free(j
->fields_buffer
);
2575 static int journal_file_read_tail_timestamp(sd_journal
*j
, JournalFile
*f
) {
2576 uint64_t offset
, mo
, rt
;
2586 /* Tries to read the timestamp of the most recently written entry. */
2588 if (FLAGS_SET(j
->flags
, SD_JOURNAL_ASSUME_IMMUTABLE
) && f
->newest_entry_offset
!= 0)
2589 return 0; /* We have already read the file, and we assume that the file is immutable. */
2591 if (f
->header
->state
== f
->newest_state
&&
2592 f
->header
->state
== STATE_ARCHIVED
&&
2593 f
->newest_entry_offset
!= 0)
2594 return 0; /* We have already read archived file. */
2596 if (JOURNAL_HEADER_CONTAINS(f
->header
, tail_entry_offset
)) {
2597 offset
= le64toh(READ_NOW(f
->header
->tail_entry_offset
));
2598 type
= OBJECT_ENTRY
;
2600 offset
= le64toh(READ_NOW(f
->header
->tail_object_offset
));
2601 type
= OBJECT_UNUSED
;
2604 return -ENODATA
; /* not a single object/entry, hence no tail timestamp */
2605 if (offset
== f
->newest_entry_offset
)
2606 return 0; /* No new entry is added after we read last time. */
2608 /* Move to the last object in the journal file, in the hope it is an entry (which it usually will
2609 * be). If we lack the "tail_entry_offset" field in the header, we specify the type as OBJECT_UNUSED
2610 * here, since we cannot be sure what the last object will be, and want no noisy logging if it isn't
2611 * an entry. We instead check after figuring out the pointer. */
2612 r
= journal_file_move_to_object(f
, type
, offset
, &o
);
2614 log_debug_errno(r
, "Failed to move to last object in journal file, ignoring: %m");
2618 if (o
&& o
->object
.type
== OBJECT_ENTRY
) {
2619 /* Yay, last object is an entry, let's use the data. */
2620 id
= o
->entry
.boot_id
;
2621 mo
= le64toh(o
->entry
.monotonic
);
2622 rt
= le64toh(o
->entry
.realtime
);
2624 /* So the object is not an entry or we couldn't access it? In that case, let's read the most
2625 * recent entry timestamps from the header. It's equally good. Unfortunately though, in old
2626 * versions of the journal the boot ID in the header doesn't have to match the monotonic
2627 * timestamp of the header. Let's check the header flag that indicates whether this strictly
2628 * matches first hence, before using the data. */
2630 if (JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f
->header
) && f
->header
->state
== STATE_ARCHIVED
) {
2631 mo
= le64toh(f
->header
->tail_entry_monotonic
);
2632 rt
= le64toh(f
->header
->tail_entry_realtime
);
2633 id
= f
->header
->tail_entry_boot_id
;
2634 offset
= UINT64_MAX
;
2636 /* Otherwise let's find the last entry manually (this possibly means traversing the
2637 * chain of entry arrays, till the end */
2638 r
= journal_file_next_entry(f
, 0, DIRECTION_UP
, &o
, offset
== 0 ? &offset
: NULL
);
2644 id
= o
->entry
.boot_id
;
2645 mo
= le64toh(o
->entry
.monotonic
);
2646 rt
= le64toh(o
->entry
.realtime
);
2650 if (mo
> rt
) /* monotonic clock is further ahead than realtime? that's weird, refuse to use the data */
2653 if (offset
== f
->newest_entry_offset
) {
2654 /* Cached data and the current one should be equivalent. */
2655 if (!sd_id128_equal(f
->newest_machine_id
, f
->header
->machine_id
) ||
2656 !sd_id128_equal(f
->newest_boot_id
, id
) ||
2657 f
->newest_monotonic_usec
!= mo
||
2658 f
->newest_realtime_usec
!= rt
)
2661 return 0; /* No new entry is added after we read last time. */
2664 if (!sd_id128_equal(f
->newest_boot_id
, id
))
2665 journal_file_unlink_newest_by_boot_id(j
, f
);
2667 f
->newest_boot_id
= id
;
2668 f
->newest_monotonic_usec
= mo
;
2669 f
->newest_realtime_usec
= rt
;
2670 f
->newest_machine_id
= f
->header
->machine_id
;
2671 f
->newest_entry_offset
= offset
;
2672 f
->newest_state
= f
->header
->state
;
2674 r
= journal_file_reshuffle_newest_by_boot_id(j
, f
);
2678 return 1; /* Updated. */
2681 _public_
int sd_journal_get_realtime_usec(sd_journal
*j
, uint64_t *ret
) {
2686 assert_return(j
, -EINVAL
);
2687 assert_return(!journal_origin_changed(j
), -ECHILD
);
2689 f
= j
->current_file
;
2691 return -EADDRNOTAVAIL
;
2692 if (f
->current_offset
<= 0)
2693 return -EADDRNOTAVAIL
;
2695 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2699 uint64_t t
= le64toh(o
->entry
.realtime
);
2700 if (!VALID_REALTIME(t
))
2709 _public_
int sd_journal_get_monotonic_usec(sd_journal
*j
, uint64_t *ret_monotonic
, sd_id128_t
*ret_boot_id
) {
2714 assert_return(j
, -EINVAL
);
2715 assert_return(!journal_origin_changed(j
), -ECHILD
);
2717 f
= j
->current_file
;
2719 return -EADDRNOTAVAIL
;
2720 if (f
->current_offset
<= 0)
2721 return -EADDRNOTAVAIL
;
2723 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2730 r
= sd_id128_get_boot(&id
);
2734 if (!sd_id128_equal(id
, o
->entry
.boot_id
))
2738 uint64_t t
= le64toh(o
->entry
.monotonic
);
2739 if (!VALID_MONOTONIC(t
))
2745 *ret_boot_id
= o
->entry
.boot_id
;
2750 _public_
int sd_journal_get_seqnum(
2752 uint64_t *ret_seqnum
,
2753 sd_id128_t
*ret_seqnum_id
) {
2759 assert_return(j
, -EINVAL
);
2760 assert_return(!journal_origin_changed(j
), -ECHILD
);
2762 f
= j
->current_file
;
2764 return -EADDRNOTAVAIL
;
2766 if (f
->current_offset
<= 0)
2767 return -EADDRNOTAVAIL
;
2769 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2774 *ret_seqnum_id
= f
->header
->seqnum_id
;
2776 *ret_seqnum
= le64toh(o
->entry
.seqnum
);
2781 static bool field_is_valid(const char *field
) {
2787 if (startswith(field
, "__"))
2790 for (const char *p
= field
; *p
; p
++) {
2795 if (*p
>= 'A' && *p
<= 'Z')
2798 if (ascii_isdigit(*p
))
2807 _public_
int sd_journal_get_data(sd_journal
*j
, const char *field
, const void **data
, size_t *size
) {
2809 size_t field_length
;
2813 assert_return(j
, -EINVAL
);
2814 assert_return(!journal_origin_changed(j
), -ECHILD
);
2815 assert_return(field
, -EINVAL
);
2816 assert_return(data
, -EINVAL
);
2817 assert_return(size
, -EINVAL
);
2818 assert_return(field_is_valid(field
), -EINVAL
);
2820 f
= j
->current_file
;
2822 return -EADDRNOTAVAIL
;
2824 if (f
->current_offset
<= 0)
2825 return -EADDRNOTAVAIL
;
2827 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2831 field_length
= strlen(field
);
2833 uint64_t n
= journal_file_entry_n_items(f
, o
);
2834 for (uint64_t i
= 0; i
< n
; i
++) {
2839 p
= journal_file_entry_item_object_offset(f
, o
, i
);
2840 r
= journal_file_data_payload(f
, NULL
, p
, field
, field_length
, j
->data_threshold
, &d
, &l
);
2843 if (IN_SET(r
, -EADDRNOTAVAIL
, -EBADMSG
)) {
2844 log_debug_errno(r
, "Entry item %"PRIu64
" data object is bad, skipping over it: %m", i
);
2859 _public_
int sd_journal_enumerate_data(sd_journal
*j
, const void **data
, size_t *size
) {
2864 assert_return(j
, -EINVAL
);
2865 assert_return(!journal_origin_changed(j
), -ECHILD
);
2866 assert_return(data
, -EINVAL
);
2867 assert_return(size
, -EINVAL
);
2869 f
= j
->current_file
;
2871 return -EADDRNOTAVAIL
;
2873 if (f
->current_offset
<= 0)
2874 return -EADDRNOTAVAIL
;
2876 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2880 for (uint64_t n
= journal_file_entry_n_items(f
, o
); j
->current_field
< n
; j
->current_field
++) {
2885 p
= journal_file_entry_item_object_offset(f
, o
, j
->current_field
);
2886 r
= journal_file_data_payload(f
, NULL
, p
, NULL
, 0, j
->data_threshold
, &d
, &l
);
2887 if (IN_SET(r
, -EADDRNOTAVAIL
, -EBADMSG
)) {
2888 log_debug_errno(r
, "Entry item %"PRIu64
" data object is bad, skipping over it: %m", j
->current_field
);
2906 _public_
int sd_journal_enumerate_available_data(sd_journal
*j
, const void **data
, size_t *size
) {
2910 r
= sd_journal_enumerate_data(j
, data
, size
);
2913 if (!JOURNAL_ERRNO_IS_UNAVAILABLE_FIELD(r
))
2915 j
->current_field
++; /* Try with the next field */
2919 _public_
void sd_journal_restart_data(sd_journal
*j
) {
2920 if (!j
|| journal_origin_changed(j
))
2923 j
->current_field
= 0;
2926 static int reiterate_all_paths(sd_journal
*j
) {
2929 if (j
->no_new_files
)
2930 return add_current_paths(j
);
2932 if (j
->flags
& SD_JOURNAL_OS_ROOT
)
2933 return add_search_paths(j
);
2935 if (j
->toplevel_fd
>= 0)
2936 return add_root_directory(j
, NULL
, false);
2939 return add_root_directory(j
, j
->path
, true);
2941 return add_search_paths(j
);
2944 _public_
int sd_journal_get_fd(sd_journal
*j
) {
2947 assert_return(j
, -EINVAL
);
2948 assert_return(!journal_origin_changed(j
), -ECHILD
);
2949 assert_return(!FLAGS_SET(j
->flags
, SD_JOURNAL_ASSUME_IMMUTABLE
), -EUNATCH
);
2952 return -EMEDIUMTYPE
;
2954 if (j
->inotify_fd
>= 0)
2955 return j
->inotify_fd
;
2957 r
= allocate_inotify(j
);
2961 log_debug("Reiterating files to get inotify watches established.");
2963 /* Iterate through all dirs again, to add them to the inotify */
2964 r
= reiterate_all_paths(j
);
2968 return j
->inotify_fd
;
2971 _public_
int sd_journal_get_events(sd_journal
*j
) {
2974 assert_return(j
, -EINVAL
);
2975 assert_return(!journal_origin_changed(j
), -ECHILD
);
2976 assert_return(!FLAGS_SET(j
->flags
, SD_JOURNAL_ASSUME_IMMUTABLE
), -EUNATCH
);
2978 fd
= sd_journal_get_fd(j
);
2985 _public_
int sd_journal_get_timeout(sd_journal
*j
, uint64_t *timeout_usec
) {
2988 assert_return(j
, -EINVAL
);
2989 assert_return(!journal_origin_changed(j
), -ECHILD
);
2990 assert_return(!FLAGS_SET(j
->flags
, SD_JOURNAL_ASSUME_IMMUTABLE
), -EUNATCH
);
2991 assert_return(timeout_usec
, -EINVAL
);
2993 fd
= sd_journal_get_fd(j
);
2997 if (!j
->on_network
) {
2998 *timeout_usec
= UINT64_MAX
;
3002 /* If we are on the network we need to regularly check for
3003 * changes manually */
3005 *timeout_usec
= j
->last_process_usec
+ JOURNAL_FILES_RECHECK_USEC
;
3009 static void process_q_overflow(sd_journal
*j
) {
3015 /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list
3016 * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all
3017 * journal files we encounter. All journal files and all directories that don't carry it after reenumeration
3018 * are subject for unloading. */
3020 log_debug("Inotify queue overrun, reiterating everything.");
3023 (void) reiterate_all_paths(j
);
3025 ORDERED_HASHMAP_FOREACH(f
, j
->files
) {
3027 if (f
->last_seen_generation
== j
->generation
)
3030 log_debug("File '%s' hasn't been seen in this enumeration, removing.", f
->path
);
3031 remove_file_real(j
, f
);
3034 HASHMAP_FOREACH(m
, j
->directories_by_path
) {
3036 if (m
->last_seen_generation
== j
->generation
)
3039 if (m
->is_root
) /* Never GC root directories */
3042 log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f
->path
);
3046 log_debug("Reiteration complete.");
3049 static void process_inotify_event(sd_journal
*j
, const struct inotify_event
*e
) {
3055 if (e
->mask
& IN_Q_OVERFLOW
) {
3056 process_q_overflow(j
);
3060 /* Is this a subdirectory we watch? */
3061 d
= hashmap_get(j
->directories_by_wd
, INT_TO_PTR(e
->wd
));
3063 if (!(e
->mask
& IN_ISDIR
) && e
->len
> 0 &&
3064 (endswith(e
->name
, ".journal") ||
3065 endswith(e
->name
, ".journal~"))) {
3067 /* Event for a journal file */
3069 if (e
->mask
& (IN_CREATE
|IN_MOVED_TO
|IN_MODIFY
|IN_ATTRIB
))
3070 (void) add_file_by_name(j
, d
->path
, e
->name
);
3071 else if (e
->mask
& (IN_DELETE
|IN_MOVED_FROM
|IN_UNMOUNT
))
3072 (void) remove_file_by_name(j
, d
->path
, e
->name
);
3074 } else if (!d
->is_root
&& e
->len
== 0) {
3076 /* Event for a subdirectory */
3078 if (e
->mask
& (IN_DELETE_SELF
|IN_MOVE_SELF
|IN_UNMOUNT
))
3081 } else if (d
->is_root
&& (e
->mask
& IN_ISDIR
) && e
->len
> 0 && id128_is_valid(e
->name
)) {
3083 /* Event for root directory */
3085 if (e
->mask
& (IN_CREATE
|IN_MOVED_TO
|IN_MODIFY
|IN_ATTRIB
))
3086 (void) add_directory(j
, d
->path
, e
->name
);
3092 if (e
->mask
& IN_IGNORED
)
3095 log_debug("Unexpected inotify event.");
3098 static int determine_change(sd_journal
*j
) {
3103 b
= j
->current_invalidate_counter
!= j
->last_invalidate_counter
;
3104 j
->last_invalidate_counter
= j
->current_invalidate_counter
;
3106 return b
? SD_JOURNAL_INVALIDATE
: SD_JOURNAL_APPEND
;
3109 _public_
int sd_journal_process(sd_journal
*j
) {
3110 bool got_something
= false;
3112 assert_return(j
, -EINVAL
);
3113 assert_return(!journal_origin_changed(j
), -ECHILD
);
3115 if (j
->inotify_fd
< 0) /* We have no inotify fd yet? Then there's noting to process. */
3118 assert_return(!FLAGS_SET(j
->flags
, SD_JOURNAL_ASSUME_IMMUTABLE
), -EUNATCH
);
3120 j
->last_process_usec
= now(CLOCK_MONOTONIC
);
3121 j
->last_invalidate_counter
= j
->current_invalidate_counter
;
3124 union inotify_event_buffer buffer
;
3127 l
= read(j
->inotify_fd
, &buffer
, sizeof(buffer
));
3129 if (ERRNO_IS_TRANSIENT(errno
))
3130 return got_something
? determine_change(j
) : SD_JOURNAL_NOP
;
3135 got_something
= true;
3137 FOREACH_INOTIFY_EVENT(e
, buffer
, l
)
3138 process_inotify_event(j
, e
);
3142 _public_
int sd_journal_wait(sd_journal
*j
, uint64_t timeout_usec
) {
3146 assert_return(j
, -EINVAL
);
3147 assert_return(!journal_origin_changed(j
), -ECHILD
);
3148 assert_return(!FLAGS_SET(j
->flags
, SD_JOURNAL_ASSUME_IMMUTABLE
), -EUNATCH
);
3150 if (j
->inotify_fd
< 0) {
3153 /* This is the first invocation, hence create the inotify watch */
3154 r
= sd_journal_get_fd(j
);
3158 /* Server might have done some vacuuming while we weren't watching. Get rid of the deleted
3159 * files now so they don't stay around indefinitely. */
3160 ORDERED_HASHMAP_FOREACH(f
, j
->files
) {
3161 r
= journal_file_fstat(f
);
3163 remove_file_real(j
, f
);
3165 log_debug_errno(r
, "Failed to fstat() journal file '%s', ignoring: %m", f
->path
);
3168 /* The journal might have changed since the context object was created and we weren't
3169 * watching before, hence don't wait for anything, and return immediately. */
3170 return determine_change(j
);
3173 r
= sd_journal_get_timeout(j
, &t
);
3177 if (t
!= UINT64_MAX
) {
3178 t
= usec_sub_unsigned(t
, now(CLOCK_MONOTONIC
));
3180 if (timeout_usec
== UINT64_MAX
|| timeout_usec
> t
)
3185 r
= fd_wait_for_event(j
->inotify_fd
, POLLIN
, timeout_usec
);
3186 } while (r
== -EINTR
);
3191 return sd_journal_process(j
);
3194 _public_
int sd_journal_get_cutoff_realtime_usec(sd_journal
*j
, uint64_t *from
, uint64_t *to
) {
3197 uint64_t fmin
= 0, tmax
= 0;
3200 assert_return(j
, -EINVAL
);
3201 assert_return(!journal_origin_changed(j
), -ECHILD
);
3202 assert_return(from
|| to
, -EINVAL
);
3203 assert_return(from
!= to
, -EINVAL
);
3205 ORDERED_HASHMAP_FOREACH(f
, j
->files
) {
3208 r
= journal_file_get_cutoff_realtime_usec(f
, &fr
, &t
);
3221 fmin
= MIN(fr
, fmin
);
3222 tmax
= MAX(t
, tmax
);
3231 return first
? 0 : 1;
3234 _public_
int sd_journal_get_cutoff_monotonic_usec(
3240 uint64_t from
= UINT64_MAX
, to
= UINT64_MAX
;
3245 assert_return(j
, -EINVAL
);
3246 assert_return(!journal_origin_changed(j
), -ECHILD
);
3247 assert_return(ret_from
!= ret_to
, -EINVAL
);
3249 ORDERED_HASHMAP_FOREACH(f
, j
->files
) {
3252 r
= journal_file_get_cutoff_monotonic_usec(f
, boot_id
, &ff
, &tt
);
3261 from
= MIN(ff
, from
);
3278 void journal_print_header(sd_journal
*j
) {
3280 bool newline
= false;
3284 ORDERED_HASHMAP_FOREACH(f
, j
->files
) {
3290 journal_file_print_header(f
);
3294 _public_
int sd_journal_get_usage(sd_journal
*j
, uint64_t *ret
) {
3298 assert_return(j
, -EINVAL
);
3299 assert_return(!journal_origin_changed(j
), -ECHILD
);
3300 assert_return(ret
, -EINVAL
);
3302 ORDERED_HASHMAP_FOREACH(f
, j
->files
) {
3306 if (fstat(f
->fd
, &st
) < 0)
3309 b
= (uint64_t) st
.st_blocks
;
3310 if (b
> UINT64_MAX
/ 512)
3314 if (sum
> UINT64_MAX
- b
)
3323 _public_
int sd_journal_query_unique(sd_journal
*j
, const char *field
) {
3326 assert_return(j
, -EINVAL
);
3327 assert_return(!journal_origin_changed(j
), -ECHILD
);
3329 if (!field_is_valid(field
))
3332 r
= free_and_strdup(&j
->unique_field
, field
);
3336 j
->unique_file
= NULL
;
3337 j
->unique_offset
= 0;
3338 j
->unique_file_lost
= false;
3343 _public_
int sd_journal_enumerate_unique(
3345 const void **ret_data
,
3350 assert_return(j
, -EINVAL
);
3351 assert_return(!journal_origin_changed(j
), -ECHILD
);
3352 assert_return(j
->unique_field
, -EINVAL
);
3354 k
= strlen(j
->unique_field
);
3356 if (!j
->unique_file
) {
3357 if (j
->unique_file_lost
)
3360 j
->unique_file
= ordered_hashmap_first(j
->files
);
3361 if (!j
->unique_file
)
3364 j
->unique_offset
= 0;
3375 /* Proceed to next data object in the field's linked list */
3376 if (j
->unique_offset
== 0) {
3377 r
= journal_file_find_field_object(j
->unique_file
, j
->unique_field
, k
, &o
, NULL
);
3381 j
->unique_offset
= r
> 0 ? le64toh(o
->field
.head_data_offset
) : 0;
3383 r
= journal_file_move_to_object(j
->unique_file
, OBJECT_DATA
, j
->unique_offset
, &o
);
3387 j
->unique_offset
= le64toh(o
->data
.next_field_offset
);
3390 /* We reached the end of the list? Then start again, with the next file */
3391 if (j
->unique_offset
== 0) {
3392 j
->unique_file
= ordered_hashmap_next(j
->files
, j
->unique_file
->path
);
3393 if (!j
->unique_file
)
3399 r
= journal_file_move_to_object(j
->unique_file
, OBJECT_DATA
, j
->unique_offset
, &o
);
3403 /* Let's pin the data object, so we can look at it at the same time as one on another file. */
3404 r
= journal_file_pin_object(j
->unique_file
, o
);
3408 r
= journal_file_data_payload(j
->unique_file
, o
, j
->unique_offset
, NULL
, 0,
3409 j
->data_threshold
, &odata
, &ol
);
3413 /* Check if we have at least the field name and "=". */
3415 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG
),
3416 "%s:offset " OFSfmt
": object has size %zu, expected at least %zu",
3417 j
->unique_file
->path
,
3418 j
->unique_offset
, ol
, k
+ 1);
3420 if (memcmp(odata
, j
->unique_field
, k
) != 0 || ((const char*) odata
)[k
] != '=')
3421 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG
),
3422 "%s:offset " OFSfmt
": object does not start with \"%s=\"",
3423 j
->unique_file
->path
,
3427 /* OK, now let's see if we already returned this data object by checking if it exists in the
3428 * earlier traversed files. */
3430 ORDERED_HASHMAP_FOREACH(of
, j
->files
) {
3431 if (of
== j
->unique_file
)
3434 /* Skip this file it didn't have any fields indexed */
3435 if (JOURNAL_HEADER_CONTAINS(of
->header
, n_fields
) && le64toh(of
->header
->n_fields
) <= 0)
3438 /* We can reuse the hash from our current file only on old-style journal files
3439 * without keyed hashes. On new-style files we have to calculate the hash anew, to
3440 * take the per-file hash seed into consideration. */
3441 if (!JOURNAL_HEADER_KEYED_HASH(j
->unique_file
->header
) && !JOURNAL_HEADER_KEYED_HASH(of
->header
))
3442 r
= journal_file_find_data_object_with_hash(of
, odata
, ol
, le64toh(o
->data
.hash
), NULL
, NULL
);
3444 r
= journal_file_find_data_object(of
, odata
, ol
, NULL
, NULL
);
3463 _public_
int sd_journal_enumerate_available_unique(sd_journal
*j
, const void **data
, size_t *size
) {
3467 r
= sd_journal_enumerate_unique(j
, data
, size
);
3470 if (!JOURNAL_ERRNO_IS_UNAVAILABLE_FIELD(r
))
3472 /* Try with the next field. sd_journal_enumerate_unique() modifies state, so on the next try
3473 * we will access the next field. */
3477 _public_
void sd_journal_restart_unique(sd_journal
*j
) {
3478 if (!j
|| journal_origin_changed(j
))
3481 j
->unique_file
= NULL
;
3482 j
->unique_offset
= 0;
3483 j
->unique_file_lost
= false;
3486 _public_
int sd_journal_enumerate_fields(sd_journal
*j
, const char **field
) {
3489 assert_return(j
, -EINVAL
);
3490 assert_return(!journal_origin_changed(j
), -ECHILD
);
3491 assert_return(field
, -EINVAL
);
3493 if (!j
->fields_file
) {
3494 if (j
->fields_file_lost
)
3497 j
->fields_file
= ordered_hashmap_first(j
->files
);
3498 if (!j
->fields_file
)
3501 j
->fields_hash_table_index
= 0;
3502 j
->fields_offset
= 0;
3506 JournalFile
*f
, *of
;
3514 if (j
->fields_offset
== 0) {
3517 /* We are not yet positioned at any field. Let's pick the first one */
3518 r
= journal_file_map_field_hash_table(f
);
3522 m
= le64toh(f
->header
->field_hash_table_size
) / sizeof(HashItem
);
3524 if (j
->fields_hash_table_index
>= m
) {
3525 /* Reached the end of the hash table, go to the next file. */
3530 j
->fields_offset
= le64toh(f
->field_hash_table
[j
->fields_hash_table_index
].head_hash_offset
);
3532 if (j
->fields_offset
!= 0)
3535 /* Empty hash table bucket, go to next one */
3536 j
->fields_hash_table_index
++;
3540 /* Proceed with next file */
3541 j
->fields_file
= ordered_hashmap_next(j
->files
, f
->path
);
3542 if (!j
->fields_file
) {
3547 j
->fields_offset
= 0;
3548 j
->fields_hash_table_index
= 0;
3553 /* We are already positioned at a field. If so, let's figure out the next field from it */
3555 r
= journal_file_move_to_object(f
, OBJECT_FIELD
, j
->fields_offset
, &o
);
3559 j
->fields_offset
= le64toh(o
->field
.next_hash_offset
);
3560 if (j
->fields_offset
== 0) {
3561 /* Reached the end of the hash table chain */
3562 j
->fields_hash_table_index
++;
3567 /* We use OBJECT_UNUSED here, so that the iterator below doesn't remove our mmap window */
3568 r
= journal_file_move_to_object(f
, OBJECT_UNUSED
, j
->fields_offset
, &o
);
3572 /* Because we used OBJECT_UNUSED above, we need to do our type check manually */
3573 if (o
->object
.type
!= OBJECT_FIELD
)
3574 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG
),
3575 "%s:offset " OFSfmt
": object has type %i, expected %i",
3576 f
->path
, j
->fields_offset
,
3577 o
->object
.type
, OBJECT_FIELD
);
3579 sz
= le64toh(o
->object
.size
) - offsetof(Object
, field
.payload
);
3581 /* Let's see if we already returned this field name before. */
3583 ORDERED_HASHMAP_FOREACH(of
, j
->files
) {
3587 /* Skip this file it didn't have any fields indexed */
3588 if (JOURNAL_HEADER_CONTAINS(of
->header
, n_fields
) && le64toh(of
->header
->n_fields
) <= 0)
3591 if (!JOURNAL_HEADER_KEYED_HASH(f
->header
) && !JOURNAL_HEADER_KEYED_HASH(of
->header
))
3592 r
= journal_file_find_field_object_with_hash(of
, o
->field
.payload
, sz
,
3593 le64toh(o
->field
.hash
), NULL
, NULL
);
3595 r
= journal_file_find_field_object(of
, o
->field
.payload
, sz
, NULL
, NULL
);
3607 /* Check if this is really a valid string containing no NUL byte */
3608 if (memchr(o
->field
.payload
, 0, sz
))
3611 if (j
->data_threshold
> 0 && sz
> j
->data_threshold
)
3612 sz
= j
->data_threshold
;
3614 if (!GREEDY_REALLOC(j
->fields_buffer
, sz
+ 1))
3617 memcpy(j
->fields_buffer
, o
->field
.payload
, sz
);
3618 j
->fields_buffer
[sz
] = 0;
3620 if (!field_is_valid(j
->fields_buffer
))
3623 *field
= j
->fields_buffer
;
3628 _public_
void sd_journal_restart_fields(sd_journal
*j
) {
3629 if (!j
|| journal_origin_changed(j
))
3632 j
->fields_file
= NULL
;
3633 j
->fields_hash_table_index
= 0;
3634 j
->fields_offset
= 0;
3635 j
->fields_file_lost
= false;
3638 _public_
int sd_journal_reliable_fd(sd_journal
*j
) {
3639 assert_return(j
, -EINVAL
);
3640 assert_return(!journal_origin_changed(j
), -ECHILD
);
3642 return !j
->on_network
;
3645 static char *lookup_field(const char *field
, void *userdata
) {
3646 sd_journal
*j
= ASSERT_PTR(userdata
);
3653 r
= sd_journal_get_data(j
, field
, &data
, &size
);
3655 size
> REPLACE_VAR_MAX
)
3656 return strdup(field
);
3658 d
= strlen(field
) + 1;
3660 return strndup((const char*) data
+ d
, size
- d
);
3663 _public_
int sd_journal_get_catalog(sd_journal
*j
, char **ret
) {
3667 _cleanup_free_
char *text
= NULL
, *cid
= NULL
;
3671 assert_return(j
, -EINVAL
);
3672 assert_return(!journal_origin_changed(j
), -ECHILD
);
3673 assert_return(ret
, -EINVAL
);
3675 r
= sd_journal_get_data(j
, "MESSAGE_ID", &data
, &size
);
3679 cid
= strndup((const char*) data
+ 11, size
- 11);
3683 r
= sd_id128_from_string(cid
, &id
);
3687 r
= catalog_get(secure_getenv("SYSTEMD_CATALOG") ?: CATALOG_DATABASE
, id
, &text
);
3691 t
= replace_var(text
, lookup_field
, j
);
3699 _public_
int sd_journal_get_catalog_for_message_id(sd_id128_t id
, char **ret
) {
3700 assert_return(ret
, -EINVAL
);
3702 return catalog_get(CATALOG_DATABASE
, id
, ret
);
3705 _public_
int sd_journal_set_data_threshold(sd_journal
*j
, size_t sz
) {
3706 assert_return(j
, -EINVAL
);
3707 assert_return(!journal_origin_changed(j
), -ECHILD
);
3709 j
->data_threshold
= sz
;
3713 _public_
int sd_journal_get_data_threshold(sd_journal
*j
, size_t *sz
) {
3714 assert_return(j
, -EINVAL
);
3715 assert_return(!journal_origin_changed(j
), -ECHILD
);
3716 assert_return(sz
, -EINVAL
);
3718 *sz
= j
->data_threshold
;
3722 _public_
int sd_journal_has_runtime_files(sd_journal
*j
) {
3723 assert_return(j
, -EINVAL
);
3725 return j
->has_runtime_files
;
3728 _public_
int sd_journal_has_persistent_files(sd_journal
*j
) {
3729 assert_return(j
, -EINVAL
);
3731 return j
->has_persistent_files
;