1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2011 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
24 #include <linux/magic.h>
27 #include <sys/inotify.h>
31 #include "sd-journal.h"
33 #include "alloc-util.h"
36 #include "dirent-util.h"
39 #include "format-util.h"
42 #include "hostname-util.h"
43 #include "id128-util.h"
45 #include "journal-def.h"
46 #include "journal-file.h"
47 #include "journal-internal.h"
51 #include "path-util.h"
52 #include "process-util.h"
53 #include "replace-var.h"
54 #include "stat-util.h"
55 #include "stat-util.h"
56 #include "stdio-util.h"
57 #include "string-util.h"
60 #define JOURNAL_FILES_MAX 7168
62 #define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
64 #define REPLACE_VAR_MAX 256
66 #define DEFAULT_DATA_THRESHOLD (64*1024)
68 static void remove_file_real(sd_journal
*j
, JournalFile
*f
);
70 static bool journal_pid_changed(sd_journal
*j
) {
73 /* We don't support people creating a journal object and
74 * keeping it around over a fork(). Let's complain. */
76 return j
->original_pid
!= getpid_cached();
79 static int journal_put_error(sd_journal
*j
, int r
, const char *path
) {
83 /* Memorize an error we encountered, and store which
84 * file/directory it was generated from. Note that we store
85 * only *one* path per error code, as the error code is the
86 * key into the hashmap, and the path is the value. This means
87 * we keep track only of all error kinds, but not of all error
88 * locations. This has the benefit that the hashmap cannot
91 * We return an error here only if we didn't manage to
92 * memorize the real error. */
97 k
= hashmap_ensure_allocated(&j
->errors
, NULL
);
108 k
= hashmap_put(j
->errors
, INT_TO_PTR(r
), copy
);
121 static void detach_location(sd_journal
*j
) {
127 j
->current_file
= NULL
;
128 j
->current_field
= 0;
130 ORDERED_HASHMAP_FOREACH(f
, j
->files
, i
)
131 journal_file_reset_location(f
);
134 static void reset_location(sd_journal
*j
) {
138 zero(j
->current_location
);
141 static void init_location(Location
*l
, LocationType type
, JournalFile
*f
, Object
*o
) {
143 assert(IN_SET(type
, LOCATION_DISCRETE
, LOCATION_SEEK
));
145 assert(o
->object
.type
== OBJECT_ENTRY
);
148 l
->seqnum
= le64toh(o
->entry
.seqnum
);
149 l
->seqnum_id
= f
->header
->seqnum_id
;
150 l
->realtime
= le64toh(o
->entry
.realtime
);
151 l
->monotonic
= le64toh(o
->entry
.monotonic
);
152 l
->boot_id
= o
->entry
.boot_id
;
153 l
->xor_hash
= le64toh(o
->entry
.xor_hash
);
155 l
->seqnum_set
= l
->realtime_set
= l
->monotonic_set
= l
->xor_hash_set
= true;
158 static void set_location(sd_journal
*j
, JournalFile
*f
, Object
*o
) {
163 init_location(&j
->current_location
, LOCATION_DISCRETE
, f
, o
);
166 j
->current_field
= 0;
168 /* Let f know its candidate entry was picked. */
169 assert(f
->location_type
== LOCATION_SEEK
);
170 f
->location_type
= LOCATION_DISCRETE
;
173 static int match_is_valid(const void *data
, size_t size
) {
181 if (startswith(data
, "__"))
185 for (p
= b
; p
< b
+ size
; p
++) {
193 if (*p
>= 'A' && *p
<= 'Z')
196 if (*p
>= '0' && *p
<= '9')
205 static bool same_field(const void *_a
, size_t s
, const void *_b
, size_t t
) {
206 const uint8_t *a
= _a
, *b
= _b
;
209 for (j
= 0; j
< s
&& j
< t
; j
++) {
218 assert_not_reached("\"=\" not found");
221 static Match
*match_new(Match
*p
, MatchType t
) {
232 LIST_PREPEND(matches
, p
->matches
, m
);
238 static void match_free(Match
*m
) {
242 match_free(m
->matches
);
245 LIST_REMOVE(matches
, m
->parent
->matches
, m
);
251 static void match_free_if_empty(Match
*m
) {
252 if (!m
|| m
->matches
)
258 _public_
int sd_journal_add_match(sd_journal
*j
, const void *data
, size_t size
) {
259 Match
*l3
, *l4
, *add_here
= NULL
, *m
;
262 assert_return(j
, -EINVAL
);
263 assert_return(!journal_pid_changed(j
), -ECHILD
);
264 assert_return(data
, -EINVAL
);
269 assert_return(match_is_valid(data
, size
), -EINVAL
);
275 * level 4: concrete matches */
278 j
->level0
= match_new(NULL
, MATCH_AND_TERM
);
284 j
->level1
= match_new(j
->level0
, MATCH_OR_TERM
);
290 j
->level2
= match_new(j
->level1
, MATCH_AND_TERM
);
295 assert(j
->level0
->type
== MATCH_AND_TERM
);
296 assert(j
->level1
->type
== MATCH_OR_TERM
);
297 assert(j
->level2
->type
== MATCH_AND_TERM
);
299 le_hash
= htole64(hash64(data
, size
));
301 LIST_FOREACH(matches
, l3
, j
->level2
->matches
) {
302 assert(l3
->type
== MATCH_OR_TERM
);
304 LIST_FOREACH(matches
, l4
, l3
->matches
) {
305 assert(l4
->type
== MATCH_DISCRETE
);
307 /* Exactly the same match already? Then ignore
309 if (l4
->le_hash
== le_hash
&&
311 memcmp(l4
->data
, data
, size
) == 0)
314 /* Same field? Then let's add this to this OR term */
315 if (same_field(data
, size
, l4
->data
, l4
->size
)) {
326 add_here
= match_new(j
->level2
, MATCH_OR_TERM
);
331 m
= match_new(add_here
, MATCH_DISCRETE
);
335 m
->le_hash
= le_hash
;
337 m
->data
= memdup(data
, size
);
346 match_free_if_empty(add_here
);
347 match_free_if_empty(j
->level2
);
348 match_free_if_empty(j
->level1
);
349 match_free_if_empty(j
->level0
);
354 _public_
int sd_journal_add_conjunction(sd_journal
*j
) {
355 assert_return(j
, -EINVAL
);
356 assert_return(!journal_pid_changed(j
), -ECHILD
);
364 if (!j
->level1
->matches
)
373 _public_
int sd_journal_add_disjunction(sd_journal
*j
) {
374 assert_return(j
, -EINVAL
);
375 assert_return(!journal_pid_changed(j
), -ECHILD
);
386 if (!j
->level2
->matches
)
393 static char *match_make_string(Match
*m
) {
396 bool enclose
= false;
399 return strdup("none");
401 if (m
->type
== MATCH_DISCRETE
)
402 return strndup(m
->data
, m
->size
);
404 LIST_FOREACH(matches
, i
, m
->matches
) {
407 t
= match_make_string(i
);
412 k
= strjoin(p
, m
->type
== MATCH_OR_TERM
? " OR " : " AND ", t
);
427 r
= strjoin("(", p
, ")");
435 char *journal_make_match_string(sd_journal
*j
) {
438 return match_make_string(j
->level0
);
441 _public_
void sd_journal_flush_matches(sd_journal
*j
) {
446 match_free(j
->level0
);
448 j
->level0
= j
->level1
= j
->level2
= NULL
;
453 _pure_
static int compare_with_location(JournalFile
*f
, Location
*l
) {
456 assert(f
->location_type
== LOCATION_SEEK
);
457 assert(IN_SET(l
->type
, LOCATION_DISCRETE
, LOCATION_SEEK
));
459 if (l
->monotonic_set
&&
460 sd_id128_equal(f
->current_boot_id
, l
->boot_id
) &&
462 f
->current_realtime
== l
->realtime
&&
464 f
->current_xor_hash
== l
->xor_hash
)
468 sd_id128_equal(f
->header
->seqnum_id
, l
->seqnum_id
)) {
470 if (f
->current_seqnum
< l
->seqnum
)
472 if (f
->current_seqnum
> l
->seqnum
)
476 if (l
->monotonic_set
&&
477 sd_id128_equal(f
->current_boot_id
, l
->boot_id
)) {
479 if (f
->current_monotonic
< l
->monotonic
)
481 if (f
->current_monotonic
> l
->monotonic
)
485 if (l
->realtime_set
) {
487 if (f
->current_realtime
< l
->realtime
)
489 if (f
->current_realtime
> l
->realtime
)
493 if (l
->xor_hash_set
) {
495 if (f
->current_xor_hash
< l
->xor_hash
)
497 if (f
->current_xor_hash
> l
->xor_hash
)
504 static int next_for_match(
508 uint64_t after_offset
,
509 direction_t direction
,
521 if (m
->type
== MATCH_DISCRETE
) {
524 r
= journal_file_find_data_object_with_hash(f
, m
->data
, m
->size
, le64toh(m
->le_hash
), NULL
, &dp
);
528 return journal_file_move_to_entry_by_offset_for_data(f
, dp
, after_offset
, direction
, ret
, offset
);
530 } else if (m
->type
== MATCH_OR_TERM
) {
533 /* Find the earliest match beyond after_offset */
535 LIST_FOREACH(matches
, i
, m
->matches
) {
538 r
= next_for_match(j
, i
, f
, after_offset
, direction
, NULL
, &cp
);
542 if (np
== 0 || (direction
== DIRECTION_DOWN
? cp
< np
: cp
> np
))
550 } else if (m
->type
== MATCH_AND_TERM
) {
551 Match
*i
, *last_moved
;
553 /* Always jump to the next matching entry and repeat
554 * this until we find an offset that matches for all
560 r
= next_for_match(j
, m
->matches
, f
, after_offset
, direction
, NULL
, &np
);
564 assert(direction
== DIRECTION_DOWN
? np
>= after_offset
: np
<= after_offset
);
565 last_moved
= m
->matches
;
567 LIST_LOOP_BUT_ONE(matches
, i
, m
->matches
, last_moved
) {
570 r
= next_for_match(j
, i
, f
, np
, direction
, NULL
, &cp
);
574 assert(direction
== DIRECTION_DOWN
? cp
>= np
: cp
<= np
);
575 if (direction
== DIRECTION_DOWN
? cp
> np
: cp
< np
) {
584 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, np
, &n
);
596 static int find_location_for_match(
600 direction_t direction
,
610 if (m
->type
== MATCH_DISCRETE
) {
613 r
= journal_file_find_data_object_with_hash(f
, m
->data
, m
->size
, le64toh(m
->le_hash
), NULL
, &dp
);
617 /* FIXME: missing: find by monotonic */
619 if (j
->current_location
.type
== LOCATION_HEAD
)
620 return journal_file_next_entry_for_data(f
, NULL
, 0, dp
, DIRECTION_DOWN
, ret
, offset
);
621 if (j
->current_location
.type
== LOCATION_TAIL
)
622 return journal_file_next_entry_for_data(f
, NULL
, 0, dp
, DIRECTION_UP
, ret
, offset
);
623 if (j
->current_location
.seqnum_set
&& sd_id128_equal(j
->current_location
.seqnum_id
, f
->header
->seqnum_id
))
624 return journal_file_move_to_entry_by_seqnum_for_data(f
, dp
, j
->current_location
.seqnum
, direction
, ret
, offset
);
625 if (j
->current_location
.monotonic_set
) {
626 r
= journal_file_move_to_entry_by_monotonic_for_data(f
, dp
, j
->current_location
.boot_id
, j
->current_location
.monotonic
, direction
, ret
, offset
);
630 if (j
->current_location
.realtime_set
)
631 return journal_file_move_to_entry_by_realtime_for_data(f
, dp
, j
->current_location
.realtime
, direction
, ret
, offset
);
633 return journal_file_next_entry_for_data(f
, NULL
, 0, dp
, direction
, ret
, offset
);
635 } else if (m
->type
== MATCH_OR_TERM
) {
640 /* Find the earliest match */
642 LIST_FOREACH(matches
, i
, m
->matches
) {
645 r
= find_location_for_match(j
, i
, f
, direction
, NULL
, &cp
);
649 if (np
== 0 || (direction
== DIRECTION_DOWN
? np
> cp
: np
< cp
))
657 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, np
, &n
);
672 assert(m
->type
== MATCH_AND_TERM
);
674 /* First jump to the last match, and then find the
675 * next one where all matches match */
680 LIST_FOREACH(matches
, i
, m
->matches
) {
683 r
= find_location_for_match(j
, i
, f
, direction
, NULL
, &cp
);
687 if (np
== 0 || (direction
== DIRECTION_DOWN
? cp
> np
: cp
< np
))
691 return next_for_match(j
, m
, f
, np
, direction
, ret
, offset
);
695 static int find_location_with_matches(
698 direction_t direction
,
710 /* No matches is simple */
712 if (j
->current_location
.type
== LOCATION_HEAD
)
713 return journal_file_next_entry(f
, 0, DIRECTION_DOWN
, ret
, offset
);
714 if (j
->current_location
.type
== LOCATION_TAIL
)
715 return journal_file_next_entry(f
, 0, DIRECTION_UP
, ret
, offset
);
716 if (j
->current_location
.seqnum_set
&& sd_id128_equal(j
->current_location
.seqnum_id
, f
->header
->seqnum_id
))
717 return journal_file_move_to_entry_by_seqnum(f
, j
->current_location
.seqnum
, direction
, ret
, offset
);
718 if (j
->current_location
.monotonic_set
) {
719 r
= journal_file_move_to_entry_by_monotonic(f
, j
->current_location
.boot_id
, j
->current_location
.monotonic
, direction
, ret
, offset
);
723 if (j
->current_location
.realtime_set
)
724 return journal_file_move_to_entry_by_realtime(f
, j
->current_location
.realtime
, direction
, ret
, offset
);
726 return journal_file_next_entry(f
, 0, direction
, ret
, offset
);
728 return find_location_for_match(j
, j
->level0
, f
, direction
, ret
, offset
);
731 static int next_with_matches(
734 direction_t direction
,
743 /* No matches is easy. We simple advance the file
746 return journal_file_next_entry(f
, f
->current_offset
, direction
, ret
, offset
);
748 /* If we have a match then we look for the next matching entry
749 * with an offset at least one step larger */
750 return next_for_match(j
, j
->level0
, f
,
751 direction
== DIRECTION_DOWN
? f
->current_offset
+ 1
752 : f
->current_offset
- 1,
753 direction
, ret
, offset
);
756 static int next_beyond_location(sd_journal
*j
, JournalFile
*f
, direction_t direction
) {
758 uint64_t cp
, n_entries
;
764 n_entries
= le64toh(f
->header
->n_entries
);
766 /* If we hit EOF before, we don't need to look into this file again
767 * unless direction changed or new entries appeared. */
768 if (f
->last_direction
== direction
&& f
->location_type
== LOCATION_TAIL
&&
769 n_entries
== f
->last_n_entries
)
772 f
->last_n_entries
= n_entries
;
774 if (f
->last_direction
== direction
&& f
->current_offset
> 0) {
775 /* LOCATION_SEEK here means we did the work in a previous
776 * iteration and the current location already points to a
777 * candidate entry. */
778 if (f
->location_type
!= LOCATION_SEEK
) {
779 r
= next_with_matches(j
, f
, direction
, &c
, &cp
);
783 journal_file_save_location(f
, c
, cp
);
786 f
->last_direction
= direction
;
788 r
= find_location_with_matches(j
, f
, direction
, &c
, &cp
);
792 journal_file_save_location(f
, c
, cp
);
795 /* OK, we found the spot, now let's advance until an entry
796 * that is actually different from what we were previously
797 * looking at. This is necessary to handle entries which exist
798 * in two (or more) journal files, and which shall all be
799 * suppressed but one. */
804 if (j
->current_location
.type
== LOCATION_DISCRETE
) {
807 k
= compare_with_location(f
, &j
->current_location
);
809 found
= direction
== DIRECTION_DOWN
? k
> 0 : k
< 0;
816 r
= next_with_matches(j
, f
, direction
, &c
, &cp
);
820 journal_file_save_location(f
, c
, cp
);
824 static int real_journal_next(sd_journal
*j
, direction_t direction
) {
825 JournalFile
*new_file
= NULL
;
831 assert_return(j
, -EINVAL
);
832 assert_return(!journal_pid_changed(j
), -ECHILD
);
834 r
= iterated_cache_get(j
->files_cache
, NULL
, &files
, &n_files
);
838 for (i
= 0; i
< n_files
; i
++) {
839 JournalFile
*f
= (JournalFile
*)files
[i
];
842 r
= next_beyond_location(j
, f
, direction
);
844 log_debug_errno(r
, "Can't iterate through %s, ignoring: %m", f
->path
);
845 remove_file_real(j
, f
);
848 f
->location_type
= LOCATION_TAIL
;
857 k
= journal_file_compare_locations(f
, new_file
);
859 found
= direction
== DIRECTION_DOWN
? k
< 0 : k
> 0;
869 r
= journal_file_move_to_object(new_file
, OBJECT_ENTRY
, new_file
->current_offset
, &o
);
873 set_location(j
, new_file
, o
);
878 _public_
int sd_journal_next(sd_journal
*j
) {
879 return real_journal_next(j
, DIRECTION_DOWN
);
882 _public_
int sd_journal_previous(sd_journal
*j
) {
883 return real_journal_next(j
, DIRECTION_UP
);
886 static int real_journal_next_skip(sd_journal
*j
, direction_t direction
, uint64_t skip
) {
889 assert_return(j
, -EINVAL
);
890 assert_return(!journal_pid_changed(j
), -ECHILD
);
893 /* If this is not a discrete skip, then at least
894 * resolve the current location */
895 if (j
->current_location
.type
!= LOCATION_DISCRETE
) {
896 r
= real_journal_next(j
, direction
);
905 r
= real_journal_next(j
, direction
);
919 _public_
int sd_journal_next_skip(sd_journal
*j
, uint64_t skip
) {
920 return real_journal_next_skip(j
, DIRECTION_DOWN
, skip
);
923 _public_
int sd_journal_previous_skip(sd_journal
*j
, uint64_t skip
) {
924 return real_journal_next_skip(j
, DIRECTION_UP
, skip
);
927 _public_
int sd_journal_get_cursor(sd_journal
*j
, char **cursor
) {
930 char bid
[33], sid
[33];
932 assert_return(j
, -EINVAL
);
933 assert_return(!journal_pid_changed(j
), -ECHILD
);
934 assert_return(cursor
, -EINVAL
);
936 if (!j
->current_file
|| j
->current_file
->current_offset
<= 0)
937 return -EADDRNOTAVAIL
;
939 r
= journal_file_move_to_object(j
->current_file
, OBJECT_ENTRY
, j
->current_file
->current_offset
, &o
);
943 sd_id128_to_string(j
->current_file
->header
->seqnum_id
, sid
);
944 sd_id128_to_string(o
->entry
.boot_id
, bid
);
947 "s=%s;i=%"PRIx64
";b=%s;m=%"PRIx64
";t=%"PRIx64
";x=%"PRIx64
,
948 sid
, le64toh(o
->entry
.seqnum
),
949 bid
, le64toh(o
->entry
.monotonic
),
950 le64toh(o
->entry
.realtime
),
951 le64toh(o
->entry
.xor_hash
)) < 0)
957 _public_
int sd_journal_seek_cursor(sd_journal
*j
, const char *cursor
) {
958 const char *word
, *state
;
960 unsigned long long seqnum
, monotonic
, realtime
, xor_hash
;
962 seqnum_id_set
= false,
965 monotonic_set
= false,
966 realtime_set
= false,
967 xor_hash_set
= false;
968 sd_id128_t seqnum_id
, boot_id
;
970 assert_return(j
, -EINVAL
);
971 assert_return(!journal_pid_changed(j
), -ECHILD
);
972 assert_return(!isempty(cursor
), -EINVAL
);
974 FOREACH_WORD_SEPARATOR(word
, l
, cursor
, ";", state
) {
978 if (l
< 2 || word
[1] != '=')
981 item
= strndup(word
, l
);
988 seqnum_id_set
= true;
989 k
= sd_id128_from_string(item
+2, &seqnum_id
);
994 if (sscanf(item
+2, "%llx", &seqnum
) != 1)
1000 k
= sd_id128_from_string(item
+2, &boot_id
);
1004 monotonic_set
= true;
1005 if (sscanf(item
+2, "%llx", &monotonic
) != 1)
1010 realtime_set
= true;
1011 if (sscanf(item
+2, "%llx", &realtime
) != 1)
1016 xor_hash_set
= true;
1017 if (sscanf(item
+2, "%llx", &xor_hash
) != 1)
1028 if ((!seqnum_set
|| !seqnum_id_set
) &&
1029 (!monotonic_set
|| !boot_id_set
) &&
1035 j
->current_location
.type
= LOCATION_SEEK
;
1038 j
->current_location
.realtime
= (uint64_t) realtime
;
1039 j
->current_location
.realtime_set
= true;
1042 if (seqnum_set
&& seqnum_id_set
) {
1043 j
->current_location
.seqnum
= (uint64_t) seqnum
;
1044 j
->current_location
.seqnum_id
= seqnum_id
;
1045 j
->current_location
.seqnum_set
= true;
1048 if (monotonic_set
&& boot_id_set
) {
1049 j
->current_location
.monotonic
= (uint64_t) monotonic
;
1050 j
->current_location
.boot_id
= boot_id
;
1051 j
->current_location
.monotonic_set
= true;
1055 j
->current_location
.xor_hash
= (uint64_t) xor_hash
;
1056 j
->current_location
.xor_hash_set
= true;
1062 _public_
int sd_journal_test_cursor(sd_journal
*j
, const char *cursor
) {
1066 assert_return(j
, -EINVAL
);
1067 assert_return(!journal_pid_changed(j
), -ECHILD
);
1068 assert_return(!isempty(cursor
), -EINVAL
);
1070 if (!j
->current_file
|| j
->current_file
->current_offset
<= 0)
1071 return -EADDRNOTAVAIL
;
1073 r
= journal_file_move_to_object(j
->current_file
, OBJECT_ENTRY
, j
->current_file
->current_offset
, &o
);
1078 _cleanup_free_
char *item
= NULL
;
1079 unsigned long long ll
;
1083 r
= extract_first_word(&cursor
, &item
, ";", EXTRACT_DONT_COALESCE_SEPARATORS
);
1090 if (strlen(item
) < 2 || item
[1] != '=')
1096 k
= sd_id128_from_string(item
+2, &id
);
1099 if (!sd_id128_equal(id
, j
->current_file
->header
->seqnum_id
))
1104 if (sscanf(item
+2, "%llx", &ll
) != 1)
1106 if (ll
!= le64toh(o
->entry
.seqnum
))
1111 k
= sd_id128_from_string(item
+2, &id
);
1114 if (!sd_id128_equal(id
, o
->entry
.boot_id
))
1119 if (sscanf(item
+2, "%llx", &ll
) != 1)
1121 if (ll
!= le64toh(o
->entry
.monotonic
))
1126 if (sscanf(item
+2, "%llx", &ll
) != 1)
1128 if (ll
!= le64toh(o
->entry
.realtime
))
1133 if (sscanf(item
+2, "%llx", &ll
) != 1)
1135 if (ll
!= le64toh(o
->entry
.xor_hash
))
1144 _public_
int sd_journal_seek_monotonic_usec(sd_journal
*j
, sd_id128_t boot_id
, uint64_t usec
) {
1145 assert_return(j
, -EINVAL
);
1146 assert_return(!journal_pid_changed(j
), -ECHILD
);
1149 j
->current_location
.type
= LOCATION_SEEK
;
1150 j
->current_location
.boot_id
= boot_id
;
1151 j
->current_location
.monotonic
= usec
;
1152 j
->current_location
.monotonic_set
= true;
1157 _public_
int sd_journal_seek_realtime_usec(sd_journal
*j
, uint64_t usec
) {
1158 assert_return(j
, -EINVAL
);
1159 assert_return(!journal_pid_changed(j
), -ECHILD
);
1162 j
->current_location
.type
= LOCATION_SEEK
;
1163 j
->current_location
.realtime
= usec
;
1164 j
->current_location
.realtime_set
= true;
1169 _public_
int sd_journal_seek_head(sd_journal
*j
) {
1170 assert_return(j
, -EINVAL
);
1171 assert_return(!journal_pid_changed(j
), -ECHILD
);
1174 j
->current_location
.type
= LOCATION_HEAD
;
1179 _public_
int sd_journal_seek_tail(sd_journal
*j
) {
1180 assert_return(j
, -EINVAL
);
1181 assert_return(!journal_pid_changed(j
), -ECHILD
);
1184 j
->current_location
.type
= LOCATION_TAIL
;
1189 static void check_network(sd_journal
*j
, int fd
) {
1195 j
->on_network
= fd_is_network_fs(fd
);
1198 static bool file_has_type_prefix(const char *prefix
, const char *filename
) {
1199 const char *full
, *tilded
, *atted
;
1201 full
= strjoina(prefix
, ".journal");
1202 tilded
= strjoina(full
, "~");
1203 atted
= strjoina(prefix
, "@");
1205 return streq(filename
, full
) ||
1206 streq(filename
, tilded
) ||
1207 startswith(filename
, atted
);
1210 static bool file_type_wanted(int flags
, const char *filename
) {
1213 if (!endswith(filename
, ".journal") && !endswith(filename
, ".journal~"))
1216 /* no flags set → every type is OK */
1217 if (!(flags
& (SD_JOURNAL_SYSTEM
| SD_JOURNAL_CURRENT_USER
)))
1220 if (flags
& SD_JOURNAL_SYSTEM
&& file_has_type_prefix("system", filename
))
1223 if (flags
& SD_JOURNAL_CURRENT_USER
) {
1224 char prefix
[5 + DECIMAL_STR_MAX(uid_t
) + 1];
1226 xsprintf(prefix
, "user-"UID_FMT
, getuid());
1228 if (file_has_type_prefix(prefix
, filename
))
1235 static bool path_has_prefix(sd_journal
*j
, const char *path
, const char *prefix
) {
1240 if (j
->toplevel_fd
>= 0)
1243 return path_startswith(path
, prefix
);
1246 static const char *skip_slash(const char *p
) {
1257 static int add_any_file(sd_journal
*j
, int fd
, const char *path
) {
1258 JournalFile
*f
= NULL
;
1259 bool close_fd
= false;
1263 assert(fd
>= 0 || path
);
1266 f
= ordered_hashmap_get(j
->files
, path
);
1268 /* Mark this file as seen in this generation. This is used to GC old files in
1269 * process_q_overflow() to detect journal files that are still and discern them from those who
1271 f
->last_seen_generation
= j
->generation
;
1276 if (ordered_hashmap_size(j
->files
) >= JOURNAL_FILES_MAX
) {
1277 log_debug("Too many open journal files, not adding %s.", path
);
1282 if (fd
< 0 && j
->toplevel_fd
>= 0) {
1284 /* If there's a top-level fd defined, open the file relative to this now. (Make the path relative,
1285 * explicitly, since otherwise openat() ignores the first argument.) */
1287 fd
= openat(j
->toplevel_fd
, skip_slash(path
), O_RDONLY
|O_CLOEXEC
);
1289 r
= log_debug_errno(errno
, "Failed to open journal file %s: %m", path
);
1296 r
= journal_file_open(fd
, path
, O_RDONLY
, 0, false, false, NULL
, j
->mmap
, NULL
, NULL
, &f
);
1300 log_debug_errno(r
, "Failed to open journal file %s: %m", path
);
1304 /* journal_file_dump(f); */
1306 r
= ordered_hashmap_put(j
->files
, f
->path
, f
);
1308 f
->close_fd
= close_fd
;
1309 (void) journal_file_close(f
);
1313 f
->last_seen_generation
= j
->generation
;
1315 if (!j
->has_runtime_files
&& path_has_prefix(j
, f
->path
, "/run"))
1316 j
->has_runtime_files
= true;
1317 else if (!j
->has_persistent_files
&& path_has_prefix(j
, f
->path
, "/var"))
1318 j
->has_persistent_files
= true;
1320 log_debug("File %s added.", f
->path
);
1322 check_network(j
, f
->fd
);
1324 j
->current_invalidate_counter
++;
1329 k
= journal_put_error(j
, r
, path
);
1336 static int add_file_by_name(
1339 const char *filename
) {
1347 if (j
->no_new_files
)
1350 if (!file_type_wanted(j
->flags
, filename
))
1353 path
= strjoina(prefix
, "/", filename
);
1354 return add_any_file(j
, -1, path
);
1357 static void remove_file_by_name(
1360 const char *filename
) {
1369 path
= strjoina(prefix
, "/", filename
);
1370 f
= ordered_hashmap_get(j
->files
, path
);
1374 remove_file_real(j
, f
);
1377 static void remove_file_real(sd_journal
*j
, JournalFile
*f
) {
1381 (void) ordered_hashmap_remove(j
->files
, f
->path
);
1383 log_debug("File %s removed.", f
->path
);
1385 if (j
->current_file
== f
) {
1386 j
->current_file
= NULL
;
1387 j
->current_field
= 0;
1390 if (j
->unique_file
== f
) {
1391 /* Jump to the next unique_file or NULL if that one was last */
1392 j
->unique_file
= ordered_hashmap_next(j
->files
, j
->unique_file
->path
);
1393 j
->unique_offset
= 0;
1394 if (!j
->unique_file
)
1395 j
->unique_file_lost
= true;
1398 if (j
->fields_file
== f
) {
1399 j
->fields_file
= ordered_hashmap_next(j
->files
, j
->fields_file
->path
);
1400 j
->fields_offset
= 0;
1401 if (!j
->fields_file
)
1402 j
->fields_file_lost
= true;
1405 (void) journal_file_close(f
);
1407 j
->current_invalidate_counter
++;
1410 static int dirname_is_machine_id(const char *fn
) {
1411 sd_id128_t id
, machine
;
1414 r
= sd_id128_get_machine(&machine
);
1418 r
= sd_id128_from_string(fn
, &id
);
1422 return sd_id128_equal(id
, machine
);
1425 static bool dirent_is_journal_file(const struct dirent
*de
) {
1428 if (!IN_SET(de
->d_type
, DT_REG
, DT_LNK
, DT_UNKNOWN
))
1431 return endswith(de
->d_name
, ".journal") ||
1432 endswith(de
->d_name
, ".journal~");
1435 static bool dirent_is_id128_subdir(const struct dirent
*de
) {
1438 if (!IN_SET(de
->d_type
, DT_DIR
, DT_LNK
, DT_UNKNOWN
))
1441 return id128_is_valid(de
->d_name
);
1444 static int directory_open(sd_journal
*j
, const char *path
, DIR **ret
) {
1451 if (j
->toplevel_fd
< 0)
1454 /* Open the specified directory relative to the toplevel fd. Enforce that the path specified is
1455 * relative, by dropping the initial slash */
1456 d
= xopendirat(j
->toplevel_fd
, skip_slash(path
), 0);
1464 static int add_directory(sd_journal
*j
, const char *prefix
, const char *dirname
);
1466 static void directory_enumerate(sd_journal
*j
, Directory
*m
, DIR *d
) {
1473 FOREACH_DIRENT_ALL(de
, d
, goto fail
) {
1475 if (dirent_is_journal_file(de
))
1476 (void) add_file_by_name(j
, m
->path
, de
->d_name
);
1478 if (m
->is_root
&& dirent_is_id128_subdir(de
))
1479 (void) add_directory(j
, m
->path
, de
->d_name
);
1485 log_debug_errno(errno
, "Failed to enumerate directory %s, ignoring: %m", m
->path
);
1488 static void directory_watch(sd_journal
*j
, Directory
*m
, int fd
, uint32_t mask
) {
1495 /* Watch this directory if that's enabled and if it not being watched yet. */
1497 if (m
->wd
> 0) /* Already have a watch? */
1499 if (j
->inotify_fd
< 0) /* Not watching at all? */
1502 m
->wd
= inotify_add_watch_fd(j
->inotify_fd
, fd
, mask
);
1504 log_debug_errno(errno
, "Failed to watch journal directory '%s', ignoring: %m", m
->path
);
1508 r
= hashmap_put(j
->directories_by_wd
, INT_TO_PTR(m
->wd
), m
);
1510 log_debug_errno(r
, "Directory '%s' already being watched under a different path, ignoring: %m", m
->path
);
1512 log_debug_errno(r
, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m
->path
);
1513 (void) inotify_rm_watch(j
->inotify_fd
, m
->wd
);
1518 static int add_directory(sd_journal
*j
, const char *prefix
, const char *dirname
) {
1519 _cleanup_free_
char *path
= NULL
;
1520 _cleanup_closedir_
DIR *d
= NULL
;
1527 /* Adds a journal file directory to watch. If the directory is already tracked this updates the inotify watch
1528 * and reenumerates directory contents */
1531 path
= strjoin(prefix
, "/", dirname
);
1533 path
= strdup(prefix
);
1539 log_debug("Considering directory '%s'.", path
);
1541 /* We consider everything local that is in a directory for the local machine ID, or that is stored in /run */
1542 if ((j
->flags
& SD_JOURNAL_LOCAL_ONLY
) &&
1543 !((dirname
&& dirname_is_machine_id(dirname
) > 0) || path_has_prefix(j
, path
, "/run")))
1546 r
= directory_open(j
, path
, &d
);
1548 log_debug_errno(r
, "Failed to open directory '%s': %m", path
);
1552 m
= hashmap_get(j
->directories_by_path
, path
);
1554 m
= new0(Directory
, 1);
1563 if (hashmap_put(j
->directories_by_path
, m
->path
, m
) < 0) {
1569 path
= NULL
; /* avoid freeing in cleanup */
1570 j
->current_invalidate_counter
++;
1572 log_debug("Directory %s added.", m
->path
);
1574 } else if (m
->is_root
)
1575 return 0; /* Don't 'downgrade' from root directory */
1577 m
->last_seen_generation
= j
->generation
;
1579 directory_watch(j
, m
, dirfd(d
),
1580 IN_CREATE
|IN_MOVED_TO
|IN_MODIFY
|IN_ATTRIB
|IN_DELETE
|
1581 IN_DELETE_SELF
|IN_MOVE_SELF
|IN_UNMOUNT
|IN_MOVED_FROM
|
1584 if (!j
->no_new_files
)
1585 directory_enumerate(j
, m
, d
);
1587 check_network(j
, dirfd(d
));
1592 k
= journal_put_error(j
, r
, path
?: prefix
);
1599 static int add_root_directory(sd_journal
*j
, const char *p
, bool missing_ok
) {
1601 _cleanup_closedir_
DIR *d
= NULL
;
1607 /* Adds a root directory to our set of directories to use. If the root directory is already in the set, we
1608 * update the inotify logic, and renumerate the directory entries. This call may hence be called to initially
1609 * populate the set, as well as to update it later. */
1612 /* If there's a path specified, use it. */
1614 log_debug("Considering root directory '%s'.", p
);
1616 if ((j
->flags
& SD_JOURNAL_RUNTIME_ONLY
) &&
1617 !path_has_prefix(j
, p
, "/run"))
1621 p
= strjoina(j
->prefix
, p
);
1623 r
= directory_open(j
, p
, &d
);
1624 if (r
== -ENOENT
&& missing_ok
)
1627 log_debug_errno(r
, "Failed to open root directory %s: %m", p
);
1633 /* If there's no path specified, then we use the top-level fd itself. We duplicate the fd here, since
1634 * opendir() will take possession of the fd, and close it, which we don't want. */
1636 p
= "."; /* store this as "." in the directories hashmap */
1638 dfd
= fcntl(j
->toplevel_fd
, F_DUPFD_CLOEXEC
, 3);
1654 m
= hashmap_get(j
->directories_by_path
, p
);
1656 m
= new0(Directory
, 1);
1664 m
->path
= strdup(p
);
1671 if (hashmap_put(j
->directories_by_path
, m
->path
, m
) < 0) {
1678 j
->current_invalidate_counter
++;
1680 log_debug("Root directory %s added.", m
->path
);
1682 } else if (!m
->is_root
)
1685 directory_watch(j
, m
, dirfd(d
),
1686 IN_CREATE
|IN_MOVED_TO
|IN_MODIFY
|IN_ATTRIB
|IN_DELETE
|
1689 if (!j
->no_new_files
)
1690 directory_enumerate(j
, m
, d
);
1692 check_network(j
, dirfd(d
));
1697 k
= journal_put_error(j
, r
, p
);
1704 static void remove_directory(sd_journal
*j
, Directory
*d
) {
1708 hashmap_remove(j
->directories_by_wd
, INT_TO_PTR(d
->wd
));
1710 if (j
->inotify_fd
>= 0)
1711 inotify_rm_watch(j
->inotify_fd
, d
->wd
);
1714 hashmap_remove(j
->directories_by_path
, d
->path
);
1717 log_debug("Root directory %s removed.", d
->path
);
1719 log_debug("Directory %s removed.", d
->path
);
1725 static int add_search_paths(sd_journal
*j
) {
1727 static const char search_paths
[] =
1728 "/run/log/journal\0"
1729 "/var/log/journal\0";
1734 /* We ignore most errors here, since the idea is to only open
1735 * what's actually accessible, and ignore the rest. */
1737 NULSTR_FOREACH(p
, search_paths
)
1738 (void) add_root_directory(j
, p
, true);
1740 if (!(j
->flags
& SD_JOURNAL_LOCAL_ONLY
))
1741 (void) add_root_directory(j
, "/var/log/journal/remote", true);
1746 static int add_current_paths(sd_journal
*j
) {
1751 assert(j
->no_new_files
);
1753 /* Simply adds all directories for files we have open as directories. We don't expect errors here, so we
1754 * treat them as fatal. */
1756 ORDERED_HASHMAP_FOREACH(f
, j
->files
, i
) {
1757 _cleanup_free_
char *dir
;
1760 dir
= dirname_malloc(f
->path
);
1764 r
= add_directory(j
, dir
, NULL
);
1772 static int allocate_inotify(sd_journal
*j
) {
1775 if (j
->inotify_fd
< 0) {
1776 j
->inotify_fd
= inotify_init1(IN_NONBLOCK
|IN_CLOEXEC
);
1777 if (j
->inotify_fd
< 0)
1781 return hashmap_ensure_allocated(&j
->directories_by_wd
, NULL
);
1784 static sd_journal
*journal_new(int flags
, const char *path
) {
1787 j
= new0(sd_journal
, 1);
1791 j
->original_pid
= getpid_cached();
1792 j
->toplevel_fd
= -1;
1795 j
->data_threshold
= DEFAULT_DATA_THRESHOLD
;
1804 if (flags
& SD_JOURNAL_OS_ROOT
)
1810 j
->files
= ordered_hashmap_new(&path_hash_ops
);
1814 j
->files_cache
= ordered_hashmap_iterated_cache_new(j
->files
);
1815 j
->directories_by_path
= hashmap_new(&path_hash_ops
);
1816 j
->mmap
= mmap_cache_new();
1817 if (!j
->files_cache
|| !j
->directories_by_path
|| !j
->mmap
)
1823 sd_journal_close(j
);
1827 #define OPEN_ALLOWED_FLAGS \
1828 (SD_JOURNAL_LOCAL_ONLY | \
1829 SD_JOURNAL_RUNTIME_ONLY | \
1830 SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)
1832 _public_
int sd_journal_open(sd_journal
**ret
, int flags
) {
1836 assert_return(ret
, -EINVAL
);
1837 assert_return((flags
& ~OPEN_ALLOWED_FLAGS
) == 0, -EINVAL
);
1839 j
= journal_new(flags
, NULL
);
1843 r
= add_search_paths(j
);
1851 sd_journal_close(j
);
1856 #define OPEN_CONTAINER_ALLOWED_FLAGS \
1857 (SD_JOURNAL_LOCAL_ONLY | SD_JOURNAL_SYSTEM)
1859 _public_
int sd_journal_open_container(sd_journal
**ret
, const char *machine
, int flags
) {
1860 _cleanup_free_
char *root
= NULL
, *class = NULL
;
1865 /* This is pretty much deprecated, people should use machined's OpenMachineRootDirectory() call instead in
1866 * combination with sd_journal_open_directory_fd(). */
1868 assert_return(machine
, -EINVAL
);
1869 assert_return(ret
, -EINVAL
);
1870 assert_return((flags
& ~OPEN_CONTAINER_ALLOWED_FLAGS
) == 0, -EINVAL
);
1871 assert_return(machine_name_is_valid(machine
), -EINVAL
);
1873 p
= strjoina("/run/systemd/machines/", machine
);
1874 r
= parse_env_file(p
, NEWLINE
, "ROOT", &root
, "CLASS", &class, NULL
);
1882 if (!streq_ptr(class, "container"))
1885 j
= journal_new(flags
, root
);
1889 r
= add_search_paths(j
);
1897 sd_journal_close(j
);
1901 #define OPEN_DIRECTORY_ALLOWED_FLAGS \
1902 (SD_JOURNAL_OS_ROOT | \
1903 SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER )
1905 _public_
int sd_journal_open_directory(sd_journal
**ret
, const char *path
, int flags
) {
1909 assert_return(ret
, -EINVAL
);
1910 assert_return(path
, -EINVAL
);
1911 assert_return((flags
& ~OPEN_DIRECTORY_ALLOWED_FLAGS
) == 0, -EINVAL
);
1913 j
= journal_new(flags
, path
);
1917 if (flags
& SD_JOURNAL_OS_ROOT
)
1918 r
= add_search_paths(j
);
1920 r
= add_root_directory(j
, path
, false);
1928 sd_journal_close(j
);
1932 _public_
int sd_journal_open_files(sd_journal
**ret
, const char **paths
, int flags
) {
1937 assert_return(ret
, -EINVAL
);
1938 assert_return(flags
== 0, -EINVAL
);
1940 j
= journal_new(flags
, NULL
);
1944 STRV_FOREACH(path
, paths
) {
1945 r
= add_any_file(j
, -1, *path
);
1950 j
->no_new_files
= true;
1956 sd_journal_close(j
);
1960 #define OPEN_DIRECTORY_FD_ALLOWED_FLAGS \
1961 (SD_JOURNAL_OS_ROOT | \
1962 SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER )
1964 _public_
int sd_journal_open_directory_fd(sd_journal
**ret
, int fd
, int flags
) {
1969 assert_return(ret
, -EINVAL
);
1970 assert_return(fd
>= 0, -EBADF
);
1971 assert_return((flags
& ~OPEN_DIRECTORY_FD_ALLOWED_FLAGS
) == 0, -EINVAL
);
1973 if (fstat(fd
, &st
) < 0)
1976 if (!S_ISDIR(st
.st_mode
))
1979 j
= journal_new(flags
, NULL
);
1983 j
->toplevel_fd
= fd
;
1985 if (flags
& SD_JOURNAL_OS_ROOT
)
1986 r
= add_search_paths(j
);
1988 r
= add_root_directory(j
, NULL
, false);
1996 sd_journal_close(j
);
2000 _public_
int sd_journal_open_files_fd(sd_journal
**ret
, int fds
[], unsigned n_fds
, int flags
) {
2007 assert_return(ret
, -EINVAL
);
2008 assert_return(n_fds
> 0, -EBADF
);
2009 assert_return(flags
== 0, -EINVAL
);
2011 j
= journal_new(flags
, NULL
);
2015 for (i
= 0; i
< n_fds
; i
++) {
2023 if (fstat(fds
[i
], &st
) < 0) {
2028 if (S_ISDIR(st
.st_mode
)) {
2032 if (!S_ISREG(st
.st_mode
)) {
2037 r
= add_any_file(j
, fds
[i
], NULL
);
2042 j
->no_new_files
= true;
2043 j
->no_inotify
= true;
2049 /* If we fail, make sure we don't take possession of the files we managed to make use of successfully, and they
2051 ORDERED_HASHMAP_FOREACH(f
, j
->files
, iterator
)
2052 f
->close_fd
= false;
2054 sd_journal_close(j
);
2058 _public_
void sd_journal_close(sd_journal
*j
) {
2064 sd_journal_flush_matches(j
);
2066 ordered_hashmap_free_with_destructor(j
->files
, journal_file_close
);
2067 iterated_cache_free(j
->files_cache
);
2069 while ((d
= hashmap_first(j
->directories_by_path
)))
2070 remove_directory(j
, d
);
2072 while ((d
= hashmap_first(j
->directories_by_wd
)))
2073 remove_directory(j
, d
);
2075 hashmap_free(j
->directories_by_path
);
2076 hashmap_free(j
->directories_by_wd
);
2078 safe_close(j
->inotify_fd
);
2081 log_debug("mmap cache statistics: %u hit, %u miss", mmap_cache_get_hit(j
->mmap
), mmap_cache_get_missed(j
->mmap
));
2082 mmap_cache_unref(j
->mmap
);
2085 hashmap_free_free(j
->errors
);
2089 free(j
->unique_field
);
2090 free(j
->fields_buffer
);
2094 _public_
int sd_journal_get_realtime_usec(sd_journal
*j
, uint64_t *ret
) {
2099 assert_return(j
, -EINVAL
);
2100 assert_return(!journal_pid_changed(j
), -ECHILD
);
2101 assert_return(ret
, -EINVAL
);
2103 f
= j
->current_file
;
2105 return -EADDRNOTAVAIL
;
2107 if (f
->current_offset
<= 0)
2108 return -EADDRNOTAVAIL
;
2110 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2114 *ret
= le64toh(o
->entry
.realtime
);
2118 _public_
int sd_journal_get_monotonic_usec(sd_journal
*j
, uint64_t *ret
, sd_id128_t
*ret_boot_id
) {
2124 assert_return(j
, -EINVAL
);
2125 assert_return(!journal_pid_changed(j
), -ECHILD
);
2127 f
= j
->current_file
;
2129 return -EADDRNOTAVAIL
;
2131 if (f
->current_offset
<= 0)
2132 return -EADDRNOTAVAIL
;
2134 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2139 *ret_boot_id
= o
->entry
.boot_id
;
2141 r
= sd_id128_get_boot(&id
);
2145 if (!sd_id128_equal(id
, o
->entry
.boot_id
))
2150 *ret
= le64toh(o
->entry
.monotonic
);
2155 static bool field_is_valid(const char *field
) {
2163 if (startswith(field
, "__"))
2166 for (p
= field
; *p
; p
++) {
2171 if (*p
>= 'A' && *p
<= 'Z')
2174 if (*p
>= '0' && *p
<= '9')
2183 _public_
int sd_journal_get_data(sd_journal
*j
, const char *field
, const void **data
, size_t *size
) {
2186 size_t field_length
;
2190 assert_return(j
, -EINVAL
);
2191 assert_return(!journal_pid_changed(j
), -ECHILD
);
2192 assert_return(field
, -EINVAL
);
2193 assert_return(data
, -EINVAL
);
2194 assert_return(size
, -EINVAL
);
2195 assert_return(field_is_valid(field
), -EINVAL
);
2197 f
= j
->current_file
;
2199 return -EADDRNOTAVAIL
;
2201 if (f
->current_offset
<= 0)
2202 return -EADDRNOTAVAIL
;
2204 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2208 field_length
= strlen(field
);
2210 n
= journal_file_entry_n_items(o
);
2211 for (i
= 0; i
< n
; i
++) {
2217 p
= le64toh(o
->entry
.items
[i
].object_offset
);
2218 le_hash
= o
->entry
.items
[i
].hash
;
2219 r
= journal_file_move_to_object(f
, OBJECT_DATA
, p
, &o
);
2223 if (le_hash
!= o
->data
.hash
)
2226 l
= le64toh(o
->object
.size
) - offsetof(Object
, data
.payload
);
2228 compression
= o
->object
.flags
& OBJECT_COMPRESSION_MASK
;
2230 #if HAVE_XZ || HAVE_LZ4
2231 r
= decompress_startswith(compression
,
2233 &f
->compress_buffer
, &f
->compress_buffer_size
,
2234 field
, field_length
, '=');
2236 log_debug_errno(r
, "Cannot decompress %s object of length %"PRIu64
" at offset "OFSfmt
": %m",
2237 object_compressed_to_string(compression
), l
, p
);
2242 r
= decompress_blob(compression
,
2244 &f
->compress_buffer
, &f
->compress_buffer_size
, &rsize
,
2249 *data
= f
->compress_buffer
;
2250 *size
= (size_t) rsize
;
2255 return -EPROTONOSUPPORT
;
2257 } else if (l
>= field_length
+1 &&
2258 memcmp(o
->data
.payload
, field
, field_length
) == 0 &&
2259 o
->data
.payload
[field_length
] == '=') {
2263 if ((uint64_t) t
!= l
)
2266 *data
= o
->data
.payload
;
2272 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2280 static int return_data(sd_journal
*j
, JournalFile
*f
, Object
*o
, const void **data
, size_t *size
) {
2285 l
= le64toh(o
->object
.size
) - offsetof(Object
, data
.payload
);
2288 /* We can't read objects larger than 4G on a 32bit machine */
2289 if ((uint64_t) t
!= l
)
2292 compression
= o
->object
.flags
& OBJECT_COMPRESSION_MASK
;
2294 #if HAVE_XZ || HAVE_LZ4
2298 r
= decompress_blob(compression
,
2299 o
->data
.payload
, l
, &f
->compress_buffer
,
2300 &f
->compress_buffer_size
, &rsize
, j
->data_threshold
);
2304 *data
= f
->compress_buffer
;
2305 *size
= (size_t) rsize
;
2307 return -EPROTONOSUPPORT
;
2310 *data
= o
->data
.payload
;
2317 _public_
int sd_journal_enumerate_data(sd_journal
*j
, const void **data
, size_t *size
) {
2324 assert_return(j
, -EINVAL
);
2325 assert_return(!journal_pid_changed(j
), -ECHILD
);
2326 assert_return(data
, -EINVAL
);
2327 assert_return(size
, -EINVAL
);
2329 f
= j
->current_file
;
2331 return -EADDRNOTAVAIL
;
2333 if (f
->current_offset
<= 0)
2334 return -EADDRNOTAVAIL
;
2336 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2340 n
= journal_file_entry_n_items(o
);
2341 if (j
->current_field
>= n
)
2344 p
= le64toh(o
->entry
.items
[j
->current_field
].object_offset
);
2345 le_hash
= o
->entry
.items
[j
->current_field
].hash
;
2346 r
= journal_file_move_to_object(f
, OBJECT_DATA
, p
, &o
);
2350 if (le_hash
!= o
->data
.hash
)
2353 r
= return_data(j
, f
, o
, data
, size
);
2362 _public_
void sd_journal_restart_data(sd_journal
*j
) {
2366 j
->current_field
= 0;
2369 static int reiterate_all_paths(sd_journal
*j
) {
2372 if (j
->no_new_files
)
2373 return add_current_paths(j
);
2375 if (j
->flags
& SD_JOURNAL_OS_ROOT
)
2376 return add_search_paths(j
);
2378 if (j
->toplevel_fd
>= 0)
2379 return add_root_directory(j
, NULL
, false);
2382 return add_root_directory(j
, j
->path
, true);
2384 return add_search_paths(j
);
2387 _public_
int sd_journal_get_fd(sd_journal
*j
) {
2390 assert_return(j
, -EINVAL
);
2391 assert_return(!journal_pid_changed(j
), -ECHILD
);
2394 return -EMEDIUMTYPE
;
2396 if (j
->inotify_fd
>= 0)
2397 return j
->inotify_fd
;
2399 r
= allocate_inotify(j
);
2403 log_debug("Reiterating files to get inotify watches established.");
2405 /* Iterate through all dirs again, to add them to the inotify */
2406 r
= reiterate_all_paths(j
);
2410 return j
->inotify_fd
;
2413 _public_
int sd_journal_get_events(sd_journal
*j
) {
2416 assert_return(j
, -EINVAL
);
2417 assert_return(!journal_pid_changed(j
), -ECHILD
);
2419 fd
= sd_journal_get_fd(j
);
2426 _public_
int sd_journal_get_timeout(sd_journal
*j
, uint64_t *timeout_usec
) {
2429 assert_return(j
, -EINVAL
);
2430 assert_return(!journal_pid_changed(j
), -ECHILD
);
2431 assert_return(timeout_usec
, -EINVAL
);
2433 fd
= sd_journal_get_fd(j
);
2437 if (!j
->on_network
) {
2438 *timeout_usec
= (uint64_t) -1;
2442 /* If we are on the network we need to regularly check for
2443 * changes manually */
2445 *timeout_usec
= j
->last_process_usec
+ JOURNAL_FILES_RECHECK_USEC
;
2449 static void process_q_overflow(sd_journal
*j
) {
2456 /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list
2457 * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all
2458 * journal files we encounter. All journal files and all directories that don't carry it after reenumeration
2459 * are subject for unloading. */
2461 log_debug("Inotify queue overrun, reiterating everything.");
2464 (void) reiterate_all_paths(j
);
2466 ORDERED_HASHMAP_FOREACH(f
, j
->files
, i
) {
2468 if (f
->last_seen_generation
== j
->generation
)
2471 log_debug("File '%s' hasn't been seen in this enumeration, removing.", f
->path
);
2472 remove_file_real(j
, f
);
2475 HASHMAP_FOREACH(m
, j
->directories_by_path
, i
) {
2477 if (m
->last_seen_generation
== j
->generation
)
2480 if (m
->is_root
) /* Never GC root directories */
2483 log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f
->path
);
2484 remove_directory(j
, m
);
2487 log_debug("Reiteration complete.");
2490 static void process_inotify_event(sd_journal
*j
, struct inotify_event
*e
) {
2496 if (e
->mask
& IN_Q_OVERFLOW
) {
2497 process_q_overflow(j
);
2501 /* Is this a subdirectory we watch? */
2502 d
= hashmap_get(j
->directories_by_wd
, INT_TO_PTR(e
->wd
));
2504 if (!(e
->mask
& IN_ISDIR
) && e
->len
> 0 &&
2505 (endswith(e
->name
, ".journal") ||
2506 endswith(e
->name
, ".journal~"))) {
2508 /* Event for a journal file */
2510 if (e
->mask
& (IN_CREATE
|IN_MOVED_TO
|IN_MODIFY
|IN_ATTRIB
))
2511 (void) add_file_by_name(j
, d
->path
, e
->name
);
2512 else if (e
->mask
& (IN_DELETE
|IN_MOVED_FROM
|IN_UNMOUNT
))
2513 remove_file_by_name(j
, d
->path
, e
->name
);
2515 } else if (!d
->is_root
&& e
->len
== 0) {
2517 /* Event for a subdirectory */
2519 if (e
->mask
& (IN_DELETE_SELF
|IN_MOVE_SELF
|IN_UNMOUNT
))
2520 remove_directory(j
, d
);
2522 } else if (d
->is_root
&& (e
->mask
& IN_ISDIR
) && e
->len
> 0 && id128_is_valid(e
->name
)) {
2524 /* Event for root directory */
2526 if (e
->mask
& (IN_CREATE
|IN_MOVED_TO
|IN_MODIFY
|IN_ATTRIB
))
2527 (void) add_directory(j
, d
->path
, e
->name
);
2533 if (e
->mask
& IN_IGNORED
)
2536 log_debug("Unexpected inotify event.");
2539 static int determine_change(sd_journal
*j
) {
2544 b
= j
->current_invalidate_counter
!= j
->last_invalidate_counter
;
2545 j
->last_invalidate_counter
= j
->current_invalidate_counter
;
2547 return b
? SD_JOURNAL_INVALIDATE
: SD_JOURNAL_APPEND
;
2550 _public_
int sd_journal_process(sd_journal
*j
) {
2551 bool got_something
= false;
2553 assert_return(j
, -EINVAL
);
2554 assert_return(!journal_pid_changed(j
), -ECHILD
);
2556 if (j
->inotify_fd
< 0) /* We have no inotify fd yet? Then there's noting to process. */
2559 j
->last_process_usec
= now(CLOCK_MONOTONIC
);
2560 j
->last_invalidate_counter
= j
->current_invalidate_counter
;
2563 union inotify_event_buffer buffer
;
2564 struct inotify_event
*e
;
2567 l
= read(j
->inotify_fd
, &buffer
, sizeof(buffer
));
2569 if (IN_SET(errno
, EAGAIN
, EINTR
))
2570 return got_something
? determine_change(j
) : SD_JOURNAL_NOP
;
2575 got_something
= true;
2577 FOREACH_INOTIFY_EVENT(e
, buffer
, l
)
2578 process_inotify_event(j
, e
);
2582 _public_
int sd_journal_wait(sd_journal
*j
, uint64_t timeout_usec
) {
2586 assert_return(j
, -EINVAL
);
2587 assert_return(!journal_pid_changed(j
), -ECHILD
);
2589 if (j
->inotify_fd
< 0) {
2591 /* This is the first invocation, hence create the
2593 r
= sd_journal_get_fd(j
);
2597 /* The journal might have changed since the context
2598 * object was created and we weren't watching before,
2599 * hence don't wait for anything, and return
2601 return determine_change(j
);
2604 r
= sd_journal_get_timeout(j
, &t
);
2608 if (t
!= (uint64_t) -1) {
2611 n
= now(CLOCK_MONOTONIC
);
2612 t
= t
> n
? t
- n
: 0;
2614 if (timeout_usec
== (uint64_t) -1 || timeout_usec
> t
)
2619 r
= fd_wait_for_event(j
->inotify_fd
, POLLIN
, timeout_usec
);
2620 } while (r
== -EINTR
);
2625 return sd_journal_process(j
);
2628 _public_
int sd_journal_get_cutoff_realtime_usec(sd_journal
*j
, uint64_t *from
, uint64_t *to
) {
2632 uint64_t fmin
= 0, tmax
= 0;
2635 assert_return(j
, -EINVAL
);
2636 assert_return(!journal_pid_changed(j
), -ECHILD
);
2637 assert_return(from
|| to
, -EINVAL
);
2638 assert_return(from
!= to
, -EINVAL
);
2640 ORDERED_HASHMAP_FOREACH(f
, j
->files
, i
) {
2643 r
= journal_file_get_cutoff_realtime_usec(f
, &fr
, &t
);
2656 fmin
= MIN(fr
, fmin
);
2657 tmax
= MAX(t
, tmax
);
2666 return first
? 0 : 1;
2669 _public_
int sd_journal_get_cutoff_monotonic_usec(sd_journal
*j
, sd_id128_t boot_id
, uint64_t *from
, uint64_t *to
) {
2675 assert_return(j
, -EINVAL
);
2676 assert_return(!journal_pid_changed(j
), -ECHILD
);
2677 assert_return(from
|| to
, -EINVAL
);
2678 assert_return(from
!= to
, -EINVAL
);
2680 ORDERED_HASHMAP_FOREACH(f
, j
->files
, i
) {
2683 r
= journal_file_get_cutoff_monotonic_usec(f
, boot_id
, &fr
, &t
);
2693 *from
= MIN(fr
, *from
);
2708 void journal_print_header(sd_journal
*j
) {
2711 bool newline
= false;
2715 ORDERED_HASHMAP_FOREACH(f
, j
->files
, i
) {
2721 journal_file_print_header(f
);
2725 _public_
int sd_journal_get_usage(sd_journal
*j
, uint64_t *bytes
) {
2730 assert_return(j
, -EINVAL
);
2731 assert_return(!journal_pid_changed(j
), -ECHILD
);
2732 assert_return(bytes
, -EINVAL
);
2734 ORDERED_HASHMAP_FOREACH(f
, j
->files
, i
) {
2737 if (fstat(f
->fd
, &st
) < 0)
2740 sum
+= (uint64_t) st
.st_blocks
* 512ULL;
2747 _public_
int sd_journal_query_unique(sd_journal
*j
, const char *field
) {
2750 assert_return(j
, -EINVAL
);
2751 assert_return(!journal_pid_changed(j
), -ECHILD
);
2752 assert_return(!isempty(field
), -EINVAL
);
2753 assert_return(field_is_valid(field
), -EINVAL
);
2759 free(j
->unique_field
);
2760 j
->unique_field
= f
;
2761 j
->unique_file
= NULL
;
2762 j
->unique_offset
= 0;
2763 j
->unique_file_lost
= false;
2768 _public_
int sd_journal_enumerate_unique(sd_journal
*j
, const void **data
, size_t *l
) {
2771 assert_return(j
, -EINVAL
);
2772 assert_return(!journal_pid_changed(j
), -ECHILD
);
2773 assert_return(data
, -EINVAL
);
2774 assert_return(l
, -EINVAL
);
2775 assert_return(j
->unique_field
, -EINVAL
);
2777 k
= strlen(j
->unique_field
);
2779 if (!j
->unique_file
) {
2780 if (j
->unique_file_lost
)
2783 j
->unique_file
= ordered_hashmap_first(j
->files
);
2784 if (!j
->unique_file
)
2787 j
->unique_offset
= 0;
2799 /* Proceed to next data object in the field's linked list */
2800 if (j
->unique_offset
== 0) {
2801 r
= journal_file_find_field_object(j
->unique_file
, j
->unique_field
, k
, &o
, NULL
);
2805 j
->unique_offset
= r
> 0 ? le64toh(o
->field
.head_data_offset
) : 0;
2807 r
= journal_file_move_to_object(j
->unique_file
, OBJECT_DATA
, j
->unique_offset
, &o
);
2811 j
->unique_offset
= le64toh(o
->data
.next_field_offset
);
2814 /* We reached the end of the list? Then start again, with the next file */
2815 if (j
->unique_offset
== 0) {
2816 j
->unique_file
= ordered_hashmap_next(j
->files
, j
->unique_file
->path
);
2817 if (!j
->unique_file
)
2823 /* We do not use OBJECT_DATA context here, but OBJECT_UNUSED
2824 * instead, so that we can look at this data object at the same
2825 * time as one on another file */
2826 r
= journal_file_move_to_object(j
->unique_file
, OBJECT_UNUSED
, j
->unique_offset
, &o
);
2830 /* Let's do the type check by hand, since we used 0 context above. */
2831 if (o
->object
.type
!= OBJECT_DATA
) {
2832 log_debug("%s:offset " OFSfmt
": object has type %d, expected %d",
2833 j
->unique_file
->path
, j
->unique_offset
,
2834 o
->object
.type
, OBJECT_DATA
);
2838 r
= return_data(j
, j
->unique_file
, o
, &odata
, &ol
);
2842 /* Check if we have at least the field name and "=". */
2844 log_debug("%s:offset " OFSfmt
": object has size %zu, expected at least %zu",
2845 j
->unique_file
->path
, j
->unique_offset
,
2850 if (memcmp(odata
, j
->unique_field
, k
) || ((const char*) odata
)[k
] != '=') {
2851 log_debug("%s:offset " OFSfmt
": object does not start with \"%s=\"",
2852 j
->unique_file
->path
, j
->unique_offset
,
2857 /* OK, now let's see if we already returned this data
2858 * object by checking if it exists in the earlier
2859 * traversed files. */
2861 ORDERED_HASHMAP_FOREACH(of
, j
->files
, i
) {
2862 if (of
== j
->unique_file
)
2865 /* Skip this file it didn't have any fields indexed */
2866 if (JOURNAL_HEADER_CONTAINS(of
->header
, n_fields
) && le64toh(of
->header
->n_fields
) <= 0)
2869 r
= journal_file_find_data_object_with_hash(of
, odata
, ol
, le64toh(o
->data
.hash
), NULL
, NULL
);
2881 r
= return_data(j
, j
->unique_file
, o
, data
, l
);
2889 _public_
void sd_journal_restart_unique(sd_journal
*j
) {
2893 j
->unique_file
= NULL
;
2894 j
->unique_offset
= 0;
2895 j
->unique_file_lost
= false;
2898 _public_
int sd_journal_enumerate_fields(sd_journal
*j
, const char **field
) {
2901 assert_return(j
, -EINVAL
);
2902 assert_return(!journal_pid_changed(j
), -ECHILD
);
2903 assert_return(field
, -EINVAL
);
2905 if (!j
->fields_file
) {
2906 if (j
->fields_file_lost
)
2909 j
->fields_file
= ordered_hashmap_first(j
->files
);
2910 if (!j
->fields_file
)
2913 j
->fields_hash_table_index
= 0;
2914 j
->fields_offset
= 0;
2918 JournalFile
*f
, *of
;
2927 if (j
->fields_offset
== 0) {
2930 /* We are not yet positioned at any field. Let's pick the first one */
2931 r
= journal_file_map_field_hash_table(f
);
2935 m
= le64toh(f
->header
->field_hash_table_size
) / sizeof(HashItem
);
2937 if (j
->fields_hash_table_index
>= m
) {
2938 /* Reached the end of the hash table, go to the next file. */
2943 j
->fields_offset
= le64toh(f
->field_hash_table
[j
->fields_hash_table_index
].head_hash_offset
);
2945 if (j
->fields_offset
!= 0)
2948 /* Empty hash table bucket, go to next one */
2949 j
->fields_hash_table_index
++;
2953 /* Proceed with next file */
2954 j
->fields_file
= ordered_hashmap_next(j
->files
, f
->path
);
2955 if (!j
->fields_file
) {
2960 j
->fields_offset
= 0;
2961 j
->fields_hash_table_index
= 0;
2966 /* We are already positioned at a field. If so, let's figure out the next field from it */
2968 r
= journal_file_move_to_object(f
, OBJECT_FIELD
, j
->fields_offset
, &o
);
2972 j
->fields_offset
= le64toh(o
->field
.next_hash_offset
);
2973 if (j
->fields_offset
== 0) {
2974 /* Reached the end of the hash table chain */
2975 j
->fields_hash_table_index
++;
2980 /* We use OBJECT_UNUSED here, so that the iterator below doesn't remove our mmap window */
2981 r
= journal_file_move_to_object(f
, OBJECT_UNUSED
, j
->fields_offset
, &o
);
2985 /* Because we used OBJECT_UNUSED above, we need to do our type check manually */
2986 if (o
->object
.type
!= OBJECT_FIELD
) {
2987 log_debug("%s:offset " OFSfmt
": object has type %i, expected %i", f
->path
, j
->fields_offset
, o
->object
.type
, OBJECT_FIELD
);
2991 sz
= le64toh(o
->object
.size
) - offsetof(Object
, field
.payload
);
2993 /* Let's see if we already returned this field name before. */
2995 ORDERED_HASHMAP_FOREACH(of
, j
->files
, i
) {
2999 /* Skip this file it didn't have any fields indexed */
3000 if (JOURNAL_HEADER_CONTAINS(of
->header
, n_fields
) && le64toh(of
->header
->n_fields
) <= 0)
3003 r
= journal_file_find_field_object_with_hash(of
, o
->field
.payload
, sz
, le64toh(o
->field
.hash
), NULL
, NULL
);
3015 /* Check if this is really a valid string containing no NUL byte */
3016 if (memchr(o
->field
.payload
, 0, sz
))
3019 if (sz
> j
->data_threshold
)
3020 sz
= j
->data_threshold
;
3022 if (!GREEDY_REALLOC(j
->fields_buffer
, j
->fields_buffer_allocated
, sz
+ 1))
3025 memcpy(j
->fields_buffer
, o
->field
.payload
, sz
);
3026 j
->fields_buffer
[sz
] = 0;
3028 if (!field_is_valid(j
->fields_buffer
))
3031 *field
= j
->fields_buffer
;
3036 _public_
void sd_journal_restart_fields(sd_journal
*j
) {
3040 j
->fields_file
= NULL
;
3041 j
->fields_hash_table_index
= 0;
3042 j
->fields_offset
= 0;
3043 j
->fields_file_lost
= false;
3046 _public_
int sd_journal_reliable_fd(sd_journal
*j
) {
3047 assert_return(j
, -EINVAL
);
3048 assert_return(!journal_pid_changed(j
), -ECHILD
);
3050 return !j
->on_network
;
3053 static char *lookup_field(const char *field
, void *userdata
) {
3054 sd_journal
*j
= userdata
;
3062 r
= sd_journal_get_data(j
, field
, &data
, &size
);
3064 size
> REPLACE_VAR_MAX
)
3065 return strdup(field
);
3067 d
= strlen(field
) + 1;
3069 return strndup((const char*) data
+ d
, size
- d
);
3072 _public_
int sd_journal_get_catalog(sd_journal
*j
, char **ret
) {
3076 _cleanup_free_
char *text
= NULL
, *cid
= NULL
;
3080 assert_return(j
, -EINVAL
);
3081 assert_return(!journal_pid_changed(j
), -ECHILD
);
3082 assert_return(ret
, -EINVAL
);
3084 r
= sd_journal_get_data(j
, "MESSAGE_ID", &data
, &size
);
3088 cid
= strndup((const char*) data
+ 11, size
- 11);
3092 r
= sd_id128_from_string(cid
, &id
);
3096 r
= catalog_get(CATALOG_DATABASE
, id
, &text
);
3100 t
= replace_var(text
, lookup_field
, j
);
3108 _public_
int sd_journal_get_catalog_for_message_id(sd_id128_t id
, char **ret
) {
3109 assert_return(ret
, -EINVAL
);
3111 return catalog_get(CATALOG_DATABASE
, id
, ret
);
3114 _public_
int sd_journal_set_data_threshold(sd_journal
*j
, size_t sz
) {
3115 assert_return(j
, -EINVAL
);
3116 assert_return(!journal_pid_changed(j
), -ECHILD
);
3118 j
->data_threshold
= sz
;
3122 _public_
int sd_journal_get_data_threshold(sd_journal
*j
, size_t *sz
) {
3123 assert_return(j
, -EINVAL
);
3124 assert_return(!journal_pid_changed(j
), -ECHILD
);
3125 assert_return(sz
, -EINVAL
);
3127 *sz
= j
->data_threshold
;
3131 _public_
int sd_journal_has_runtime_files(sd_journal
*j
) {
3132 assert_return(j
, -EINVAL
);
3134 return j
->has_runtime_files
;
3137 _public_
int sd_journal_has_persistent_files(sd_journal
*j
) {
3138 assert_return(j
, -EINVAL
);
3140 return j
->has_persistent_files
;