1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2011 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
24 #include <linux/magic.h>
27 #include <sys/inotify.h>
31 #include "sd-journal.h"
33 #include "alloc-util.h"
36 #include "dirent-util.h"
39 #include "format-util.h"
42 #include "hostname-util.h"
43 #include "id128-util.h"
45 #include "journal-def.h"
46 #include "journal-file.h"
47 #include "journal-internal.h"
51 #include "path-util.h"
52 #include "process-util.h"
53 #include "replace-var.h"
54 #include "stat-util.h"
55 #include "stat-util.h"
56 #include "stdio-util.h"
57 #include "string-util.h"
60 #define JOURNAL_FILES_MAX 7168
62 #define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
64 #define REPLACE_VAR_MAX 256
66 #define DEFAULT_DATA_THRESHOLD (64*1024)
68 static void remove_file_real(sd_journal
*j
, JournalFile
*f
);
70 static bool journal_pid_changed(sd_journal
*j
) {
73 /* We don't support people creating a journal object and
74 * keeping it around over a fork(). Let's complain. */
76 return j
->original_pid
!= getpid_cached();
79 static int journal_put_error(sd_journal
*j
, int r
, const char *path
) {
83 /* Memorize an error we encountered, and store which
84 * file/directory it was generated from. Note that we store
85 * only *one* path per error code, as the error code is the
86 * key into the hashmap, and the path is the value. This means
87 * we keep track only of all error kinds, but not of all error
88 * locations. This has the benefit that the hashmap cannot
91 * We return an error here only if we didn't manage to
92 * memorize the real error. */
97 k
= hashmap_ensure_allocated(&j
->errors
, NULL
);
108 k
= hashmap_put(j
->errors
, INT_TO_PTR(r
), copy
);
121 static void detach_location(sd_journal
*j
) {
127 j
->current_file
= NULL
;
128 j
->current_field
= 0;
130 ORDERED_HASHMAP_FOREACH(f
, j
->files
, i
)
131 journal_file_reset_location(f
);
134 static void reset_location(sd_journal
*j
) {
138 zero(j
->current_location
);
141 static void init_location(Location
*l
, LocationType type
, JournalFile
*f
, Object
*o
) {
143 assert(IN_SET(type
, LOCATION_DISCRETE
, LOCATION_SEEK
));
145 assert(o
->object
.type
== OBJECT_ENTRY
);
148 l
->seqnum
= le64toh(o
->entry
.seqnum
);
149 l
->seqnum_id
= f
->header
->seqnum_id
;
150 l
->realtime
= le64toh(o
->entry
.realtime
);
151 l
->monotonic
= le64toh(o
->entry
.monotonic
);
152 l
->boot_id
= o
->entry
.boot_id
;
153 l
->xor_hash
= le64toh(o
->entry
.xor_hash
);
155 l
->seqnum_set
= l
->realtime_set
= l
->monotonic_set
= l
->xor_hash_set
= true;
158 static void set_location(sd_journal
*j
, JournalFile
*f
, Object
*o
) {
163 init_location(&j
->current_location
, LOCATION_DISCRETE
, f
, o
);
166 j
->current_field
= 0;
168 /* Let f know its candidate entry was picked. */
169 assert(f
->location_type
== LOCATION_SEEK
);
170 f
->location_type
= LOCATION_DISCRETE
;
173 static int match_is_valid(const void *data
, size_t size
) {
181 if (startswith(data
, "__"))
185 for (p
= b
; p
< b
+ size
; p
++) {
193 if (*p
>= 'A' && *p
<= 'Z')
196 if (*p
>= '0' && *p
<= '9')
205 static bool same_field(const void *_a
, size_t s
, const void *_b
, size_t t
) {
206 const uint8_t *a
= _a
, *b
= _b
;
209 for (j
= 0; j
< s
&& j
< t
; j
++) {
218 assert_not_reached("\"=\" not found");
221 static Match
*match_new(Match
*p
, MatchType t
) {
232 LIST_PREPEND(matches
, p
->matches
, m
);
238 static void match_free(Match
*m
) {
242 match_free(m
->matches
);
245 LIST_REMOVE(matches
, m
->parent
->matches
, m
);
251 static void match_free_if_empty(Match
*m
) {
252 if (!m
|| m
->matches
)
258 _public_
int sd_journal_add_match(sd_journal
*j
, const void *data
, size_t size
) {
259 Match
*l3
, *l4
, *add_here
= NULL
, *m
;
262 assert_return(j
, -EINVAL
);
263 assert_return(!journal_pid_changed(j
), -ECHILD
);
264 assert_return(data
, -EINVAL
);
269 assert_return(match_is_valid(data
, size
), -EINVAL
);
275 * level 4: concrete matches */
278 j
->level0
= match_new(NULL
, MATCH_AND_TERM
);
284 j
->level1
= match_new(j
->level0
, MATCH_OR_TERM
);
290 j
->level2
= match_new(j
->level1
, MATCH_AND_TERM
);
295 assert(j
->level0
->type
== MATCH_AND_TERM
);
296 assert(j
->level1
->type
== MATCH_OR_TERM
);
297 assert(j
->level2
->type
== MATCH_AND_TERM
);
299 le_hash
= htole64(hash64(data
, size
));
301 LIST_FOREACH(matches
, l3
, j
->level2
->matches
) {
302 assert(l3
->type
== MATCH_OR_TERM
);
304 LIST_FOREACH(matches
, l4
, l3
->matches
) {
305 assert(l4
->type
== MATCH_DISCRETE
);
307 /* Exactly the same match already? Then ignore
309 if (l4
->le_hash
== le_hash
&&
311 memcmp(l4
->data
, data
, size
) == 0)
314 /* Same field? Then let's add this to this OR term */
315 if (same_field(data
, size
, l4
->data
, l4
->size
)) {
326 add_here
= match_new(j
->level2
, MATCH_OR_TERM
);
331 m
= match_new(add_here
, MATCH_DISCRETE
);
335 m
->le_hash
= le_hash
;
337 m
->data
= memdup(data
, size
);
346 match_free_if_empty(add_here
);
347 match_free_if_empty(j
->level2
);
348 match_free_if_empty(j
->level1
);
349 match_free_if_empty(j
->level0
);
354 _public_
int sd_journal_add_conjunction(sd_journal
*j
) {
355 assert_return(j
, -EINVAL
);
356 assert_return(!journal_pid_changed(j
), -ECHILD
);
364 if (!j
->level1
->matches
)
373 _public_
int sd_journal_add_disjunction(sd_journal
*j
) {
374 assert_return(j
, -EINVAL
);
375 assert_return(!journal_pid_changed(j
), -ECHILD
);
386 if (!j
->level2
->matches
)
393 static char *match_make_string(Match
*m
) {
396 bool enclose
= false;
399 return strdup("none");
401 if (m
->type
== MATCH_DISCRETE
)
402 return strndup(m
->data
, m
->size
);
404 LIST_FOREACH(matches
, i
, m
->matches
) {
407 t
= match_make_string(i
);
412 k
= strjoin(p
, m
->type
== MATCH_OR_TERM
? " OR " : " AND ", t
);
427 r
= strjoin("(", p
, ")");
435 char *journal_make_match_string(sd_journal
*j
) {
438 return match_make_string(j
->level0
);
441 _public_
void sd_journal_flush_matches(sd_journal
*j
) {
446 match_free(j
->level0
);
448 j
->level0
= j
->level1
= j
->level2
= NULL
;
453 _pure_
static int compare_with_location(JournalFile
*f
, Location
*l
) {
456 assert(f
->location_type
== LOCATION_SEEK
);
457 assert(IN_SET(l
->type
, LOCATION_DISCRETE
, LOCATION_SEEK
));
459 if (l
->monotonic_set
&&
460 sd_id128_equal(f
->current_boot_id
, l
->boot_id
) &&
462 f
->current_realtime
== l
->realtime
&&
464 f
->current_xor_hash
== l
->xor_hash
)
468 sd_id128_equal(f
->header
->seqnum_id
, l
->seqnum_id
)) {
470 if (f
->current_seqnum
< l
->seqnum
)
472 if (f
->current_seqnum
> l
->seqnum
)
476 if (l
->monotonic_set
&&
477 sd_id128_equal(f
->current_boot_id
, l
->boot_id
)) {
479 if (f
->current_monotonic
< l
->monotonic
)
481 if (f
->current_monotonic
> l
->monotonic
)
485 if (l
->realtime_set
) {
487 if (f
->current_realtime
< l
->realtime
)
489 if (f
->current_realtime
> l
->realtime
)
493 if (l
->xor_hash_set
) {
495 if (f
->current_xor_hash
< l
->xor_hash
)
497 if (f
->current_xor_hash
> l
->xor_hash
)
504 static int next_for_match(
508 uint64_t after_offset
,
509 direction_t direction
,
521 if (m
->type
== MATCH_DISCRETE
) {
524 r
= journal_file_find_data_object_with_hash(f
, m
->data
, m
->size
, le64toh(m
->le_hash
), NULL
, &dp
);
528 return journal_file_move_to_entry_by_offset_for_data(f
, dp
, after_offset
, direction
, ret
, offset
);
530 } else if (m
->type
== MATCH_OR_TERM
) {
533 /* Find the earliest match beyond after_offset */
535 LIST_FOREACH(matches
, i
, m
->matches
) {
538 r
= next_for_match(j
, i
, f
, after_offset
, direction
, NULL
, &cp
);
542 if (np
== 0 || (direction
== DIRECTION_DOWN
? cp
< np
: cp
> np
))
550 } else if (m
->type
== MATCH_AND_TERM
) {
551 Match
*i
, *last_moved
;
553 /* Always jump to the next matching entry and repeat
554 * this until we find an offset that matches for all
560 r
= next_for_match(j
, m
->matches
, f
, after_offset
, direction
, NULL
, &np
);
564 assert(direction
== DIRECTION_DOWN
? np
>= after_offset
: np
<= after_offset
);
565 last_moved
= m
->matches
;
567 LIST_LOOP_BUT_ONE(matches
, i
, m
->matches
, last_moved
) {
570 r
= next_for_match(j
, i
, f
, np
, direction
, NULL
, &cp
);
574 assert(direction
== DIRECTION_DOWN
? cp
>= np
: cp
<= np
);
575 if (direction
== DIRECTION_DOWN
? cp
> np
: cp
< np
) {
584 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, np
, &n
);
596 static int find_location_for_match(
600 direction_t direction
,
610 if (m
->type
== MATCH_DISCRETE
) {
613 r
= journal_file_find_data_object_with_hash(f
, m
->data
, m
->size
, le64toh(m
->le_hash
), NULL
, &dp
);
617 /* FIXME: missing: find by monotonic */
619 if (j
->current_location
.type
== LOCATION_HEAD
)
620 return journal_file_next_entry_for_data(f
, NULL
, 0, dp
, DIRECTION_DOWN
, ret
, offset
);
621 if (j
->current_location
.type
== LOCATION_TAIL
)
622 return journal_file_next_entry_for_data(f
, NULL
, 0, dp
, DIRECTION_UP
, ret
, offset
);
623 if (j
->current_location
.seqnum_set
&& sd_id128_equal(j
->current_location
.seqnum_id
, f
->header
->seqnum_id
))
624 return journal_file_move_to_entry_by_seqnum_for_data(f
, dp
, j
->current_location
.seqnum
, direction
, ret
, offset
);
625 if (j
->current_location
.monotonic_set
) {
626 r
= journal_file_move_to_entry_by_monotonic_for_data(f
, dp
, j
->current_location
.boot_id
, j
->current_location
.monotonic
, direction
, ret
, offset
);
630 if (j
->current_location
.realtime_set
)
631 return journal_file_move_to_entry_by_realtime_for_data(f
, dp
, j
->current_location
.realtime
, direction
, ret
, offset
);
633 return journal_file_next_entry_for_data(f
, NULL
, 0, dp
, direction
, ret
, offset
);
635 } else if (m
->type
== MATCH_OR_TERM
) {
640 /* Find the earliest match */
642 LIST_FOREACH(matches
, i
, m
->matches
) {
645 r
= find_location_for_match(j
, i
, f
, direction
, NULL
, &cp
);
649 if (np
== 0 || (direction
== DIRECTION_DOWN
? np
> cp
: np
< cp
))
657 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, np
, &n
);
672 assert(m
->type
== MATCH_AND_TERM
);
674 /* First jump to the last match, and then find the
675 * next one where all matches match */
680 LIST_FOREACH(matches
, i
, m
->matches
) {
683 r
= find_location_for_match(j
, i
, f
, direction
, NULL
, &cp
);
687 if (np
== 0 || (direction
== DIRECTION_DOWN
? cp
> np
: cp
< np
))
691 return next_for_match(j
, m
, f
, np
, direction
, ret
, offset
);
695 static int find_location_with_matches(
698 direction_t direction
,
710 /* No matches is simple */
712 if (j
->current_location
.type
== LOCATION_HEAD
)
713 return journal_file_next_entry(f
, 0, DIRECTION_DOWN
, ret
, offset
);
714 if (j
->current_location
.type
== LOCATION_TAIL
)
715 return journal_file_next_entry(f
, 0, DIRECTION_UP
, ret
, offset
);
716 if (j
->current_location
.seqnum_set
&& sd_id128_equal(j
->current_location
.seqnum_id
, f
->header
->seqnum_id
))
717 return journal_file_move_to_entry_by_seqnum(f
, j
->current_location
.seqnum
, direction
, ret
, offset
);
718 if (j
->current_location
.monotonic_set
) {
719 r
= journal_file_move_to_entry_by_monotonic(f
, j
->current_location
.boot_id
, j
->current_location
.monotonic
, direction
, ret
, offset
);
723 if (j
->current_location
.realtime_set
)
724 return journal_file_move_to_entry_by_realtime(f
, j
->current_location
.realtime
, direction
, ret
, offset
);
726 return journal_file_next_entry(f
, 0, direction
, ret
, offset
);
728 return find_location_for_match(j
, j
->level0
, f
, direction
, ret
, offset
);
731 static int next_with_matches(
734 direction_t direction
,
743 /* No matches is easy. We simple advance the file
746 return journal_file_next_entry(f
, f
->current_offset
, direction
, ret
, offset
);
748 /* If we have a match then we look for the next matching entry
749 * with an offset at least one step larger */
750 return next_for_match(j
, j
->level0
, f
,
751 direction
== DIRECTION_DOWN
? f
->current_offset
+ 1
752 : f
->current_offset
- 1,
753 direction
, ret
, offset
);
756 static int next_beyond_location(sd_journal
*j
, JournalFile
*f
, direction_t direction
) {
758 uint64_t cp
, n_entries
;
764 n_entries
= le64toh(f
->header
->n_entries
);
766 /* If we hit EOF before, we don't need to look into this file again
767 * unless direction changed or new entries appeared. */
768 if (f
->last_direction
== direction
&& f
->location_type
== LOCATION_TAIL
&&
769 n_entries
== f
->last_n_entries
)
772 f
->last_n_entries
= n_entries
;
774 if (f
->last_direction
== direction
&& f
->current_offset
> 0) {
775 /* LOCATION_SEEK here means we did the work in a previous
776 * iteration and the current location already points to a
777 * candidate entry. */
778 if (f
->location_type
!= LOCATION_SEEK
) {
779 r
= next_with_matches(j
, f
, direction
, &c
, &cp
);
783 journal_file_save_location(f
, c
, cp
);
786 f
->last_direction
= direction
;
788 r
= find_location_with_matches(j
, f
, direction
, &c
, &cp
);
792 journal_file_save_location(f
, c
, cp
);
795 /* OK, we found the spot, now let's advance until an entry
796 * that is actually different from what we were previously
797 * looking at. This is necessary to handle entries which exist
798 * in two (or more) journal files, and which shall all be
799 * suppressed but one. */
804 if (j
->current_location
.type
== LOCATION_DISCRETE
) {
807 k
= compare_with_location(f
, &j
->current_location
);
809 found
= direction
== DIRECTION_DOWN
? k
> 0 : k
< 0;
816 r
= next_with_matches(j
, f
, direction
, &c
, &cp
);
820 journal_file_save_location(f
, c
, cp
);
824 static int real_journal_next(sd_journal
*j
, direction_t direction
) {
825 JournalFile
*new_file
= NULL
;
831 assert_return(j
, -EINVAL
);
832 assert_return(!journal_pid_changed(j
), -ECHILD
);
834 r
= iterated_cache_get(j
->files_cache
, NULL
, &files
, &n_files
);
838 for (i
= 0; i
< n_files
; i
++) {
839 JournalFile
*f
= (JournalFile
*)files
[i
];
842 r
= next_beyond_location(j
, f
, direction
);
844 log_debug_errno(r
, "Can't iterate through %s, ignoring: %m", f
->path
);
845 remove_file_real(j
, f
);
848 f
->location_type
= LOCATION_TAIL
;
857 k
= journal_file_compare_locations(f
, new_file
);
859 found
= direction
== DIRECTION_DOWN
? k
< 0 : k
> 0;
869 r
= journal_file_move_to_object(new_file
, OBJECT_ENTRY
, new_file
->current_offset
, &o
);
873 set_location(j
, new_file
, o
);
878 _public_
int sd_journal_next(sd_journal
*j
) {
879 return real_journal_next(j
, DIRECTION_DOWN
);
882 _public_
int sd_journal_previous(sd_journal
*j
) {
883 return real_journal_next(j
, DIRECTION_UP
);
886 static int real_journal_next_skip(sd_journal
*j
, direction_t direction
, uint64_t skip
) {
889 assert_return(j
, -EINVAL
);
890 assert_return(!journal_pid_changed(j
), -ECHILD
);
893 /* If this is not a discrete skip, then at least
894 * resolve the current location */
895 if (j
->current_location
.type
!= LOCATION_DISCRETE
) {
896 r
= real_journal_next(j
, direction
);
905 r
= real_journal_next(j
, direction
);
919 _public_
int sd_journal_next_skip(sd_journal
*j
, uint64_t skip
) {
920 return real_journal_next_skip(j
, DIRECTION_DOWN
, skip
);
923 _public_
int sd_journal_previous_skip(sd_journal
*j
, uint64_t skip
) {
924 return real_journal_next_skip(j
, DIRECTION_UP
, skip
);
927 _public_
int sd_journal_get_cursor(sd_journal
*j
, char **cursor
) {
930 char bid
[33], sid
[33];
932 assert_return(j
, -EINVAL
);
933 assert_return(!journal_pid_changed(j
), -ECHILD
);
934 assert_return(cursor
, -EINVAL
);
936 if (!j
->current_file
|| j
->current_file
->current_offset
<= 0)
937 return -EADDRNOTAVAIL
;
939 r
= journal_file_move_to_object(j
->current_file
, OBJECT_ENTRY
, j
->current_file
->current_offset
, &o
);
943 sd_id128_to_string(j
->current_file
->header
->seqnum_id
, sid
);
944 sd_id128_to_string(o
->entry
.boot_id
, bid
);
947 "s=%s;i=%"PRIx64
";b=%s;m=%"PRIx64
";t=%"PRIx64
";x=%"PRIx64
,
948 sid
, le64toh(o
->entry
.seqnum
),
949 bid
, le64toh(o
->entry
.monotonic
),
950 le64toh(o
->entry
.realtime
),
951 le64toh(o
->entry
.xor_hash
)) < 0)
957 _public_
int sd_journal_seek_cursor(sd_journal
*j
, const char *cursor
) {
958 const char *word
, *state
;
960 unsigned long long seqnum
, monotonic
, realtime
, xor_hash
;
962 seqnum_id_set
= false,
965 monotonic_set
= false,
966 realtime_set
= false,
967 xor_hash_set
= false;
968 sd_id128_t seqnum_id
, boot_id
;
970 assert_return(j
, -EINVAL
);
971 assert_return(!journal_pid_changed(j
), -ECHILD
);
972 assert_return(!isempty(cursor
), -EINVAL
);
974 FOREACH_WORD_SEPARATOR(word
, l
, cursor
, ";", state
) {
978 if (l
< 2 || word
[1] != '=')
981 item
= strndup(word
, l
);
988 seqnum_id_set
= true;
989 k
= sd_id128_from_string(item
+2, &seqnum_id
);
994 if (sscanf(item
+2, "%llx", &seqnum
) != 1)
1000 k
= sd_id128_from_string(item
+2, &boot_id
);
1004 monotonic_set
= true;
1005 if (sscanf(item
+2, "%llx", &monotonic
) != 1)
1010 realtime_set
= true;
1011 if (sscanf(item
+2, "%llx", &realtime
) != 1)
1016 xor_hash_set
= true;
1017 if (sscanf(item
+2, "%llx", &xor_hash
) != 1)
1028 if ((!seqnum_set
|| !seqnum_id_set
) &&
1029 (!monotonic_set
|| !boot_id_set
) &&
1035 j
->current_location
.type
= LOCATION_SEEK
;
1038 j
->current_location
.realtime
= (uint64_t) realtime
;
1039 j
->current_location
.realtime_set
= true;
1042 if (seqnum_set
&& seqnum_id_set
) {
1043 j
->current_location
.seqnum
= (uint64_t) seqnum
;
1044 j
->current_location
.seqnum_id
= seqnum_id
;
1045 j
->current_location
.seqnum_set
= true;
1048 if (monotonic_set
&& boot_id_set
) {
1049 j
->current_location
.monotonic
= (uint64_t) monotonic
;
1050 j
->current_location
.boot_id
= boot_id
;
1051 j
->current_location
.monotonic_set
= true;
1055 j
->current_location
.xor_hash
= (uint64_t) xor_hash
;
1056 j
->current_location
.xor_hash_set
= true;
1062 _public_
int sd_journal_test_cursor(sd_journal
*j
, const char *cursor
) {
1066 assert_return(j
, -EINVAL
);
1067 assert_return(!journal_pid_changed(j
), -ECHILD
);
1068 assert_return(!isempty(cursor
), -EINVAL
);
1070 if (!j
->current_file
|| j
->current_file
->current_offset
<= 0)
1071 return -EADDRNOTAVAIL
;
1073 r
= journal_file_move_to_object(j
->current_file
, OBJECT_ENTRY
, j
->current_file
->current_offset
, &o
);
1078 _cleanup_free_
char *item
= NULL
;
1079 unsigned long long ll
;
1083 r
= extract_first_word(&cursor
, &item
, ";", EXTRACT_DONT_COALESCE_SEPARATORS
);
1090 if (strlen(item
) < 2 || item
[1] != '=')
1096 k
= sd_id128_from_string(item
+2, &id
);
1099 if (!sd_id128_equal(id
, j
->current_file
->header
->seqnum_id
))
1104 if (sscanf(item
+2, "%llx", &ll
) != 1)
1106 if (ll
!= le64toh(o
->entry
.seqnum
))
1111 k
= sd_id128_from_string(item
+2, &id
);
1114 if (!sd_id128_equal(id
, o
->entry
.boot_id
))
1119 if (sscanf(item
+2, "%llx", &ll
) != 1)
1121 if (ll
!= le64toh(o
->entry
.monotonic
))
1126 if (sscanf(item
+2, "%llx", &ll
) != 1)
1128 if (ll
!= le64toh(o
->entry
.realtime
))
1133 if (sscanf(item
+2, "%llx", &ll
) != 1)
1135 if (ll
!= le64toh(o
->entry
.xor_hash
))
1144 _public_
int sd_journal_seek_monotonic_usec(sd_journal
*j
, sd_id128_t boot_id
, uint64_t usec
) {
1145 assert_return(j
, -EINVAL
);
1146 assert_return(!journal_pid_changed(j
), -ECHILD
);
1149 j
->current_location
.type
= LOCATION_SEEK
;
1150 j
->current_location
.boot_id
= boot_id
;
1151 j
->current_location
.monotonic
= usec
;
1152 j
->current_location
.monotonic_set
= true;
1157 _public_
int sd_journal_seek_realtime_usec(sd_journal
*j
, uint64_t usec
) {
1158 assert_return(j
, -EINVAL
);
1159 assert_return(!journal_pid_changed(j
), -ECHILD
);
1162 j
->current_location
.type
= LOCATION_SEEK
;
1163 j
->current_location
.realtime
= usec
;
1164 j
->current_location
.realtime_set
= true;
1169 _public_
int sd_journal_seek_head(sd_journal
*j
) {
1170 assert_return(j
, -EINVAL
);
1171 assert_return(!journal_pid_changed(j
), -ECHILD
);
1174 j
->current_location
.type
= LOCATION_HEAD
;
1179 _public_
int sd_journal_seek_tail(sd_journal
*j
) {
1180 assert_return(j
, -EINVAL
);
1181 assert_return(!journal_pid_changed(j
), -ECHILD
);
1184 j
->current_location
.type
= LOCATION_TAIL
;
1189 static void check_network(sd_journal
*j
, int fd
) {
1195 j
->on_network
= fd_is_network_fs(fd
);
1198 static bool file_has_type_prefix(const char *prefix
, const char *filename
) {
1199 const char *full
, *tilded
, *atted
;
1201 full
= strjoina(prefix
, ".journal");
1202 tilded
= strjoina(full
, "~");
1203 atted
= strjoina(prefix
, "@");
1205 return streq(filename
, full
) ||
1206 streq(filename
, tilded
) ||
1207 startswith(filename
, atted
);
1210 static bool file_type_wanted(int flags
, const char *filename
) {
1213 if (!endswith(filename
, ".journal") && !endswith(filename
, ".journal~"))
1216 /* no flags set → every type is OK */
1217 if (!(flags
& (SD_JOURNAL_SYSTEM
| SD_JOURNAL_CURRENT_USER
)))
1220 if (flags
& SD_JOURNAL_SYSTEM
&& file_has_type_prefix("system", filename
))
1223 if (flags
& SD_JOURNAL_CURRENT_USER
) {
1224 char prefix
[5 + DECIMAL_STR_MAX(uid_t
) + 1];
1226 xsprintf(prefix
, "user-"UID_FMT
, getuid());
1228 if (file_has_type_prefix(prefix
, filename
))
1235 static bool path_has_prefix(sd_journal
*j
, const char *path
, const char *prefix
) {
1240 if (j
->toplevel_fd
>= 0)
1243 return path_startswith(path
, prefix
);
1246 static const char *skip_slash(const char *p
) {
1257 static int add_any_file(sd_journal
*j
, int fd
, const char *path
) {
1258 JournalFile
*f
= NULL
;
1259 bool close_fd
= false;
1263 assert(fd
>= 0 || path
);
1266 f
= ordered_hashmap_get(j
->files
, path
);
1268 /* Mark this file as seen in this generation. This is used to GC old files in
1269 * process_q_overflow() to detect journal files that are still and discern them from those who
1271 f
->last_seen_generation
= j
->generation
;
1276 if (ordered_hashmap_size(j
->files
) >= JOURNAL_FILES_MAX
) {
1277 log_debug("Too many open journal files, not adding %s.", path
);
1282 if (fd
< 0 && j
->toplevel_fd
>= 0) {
1284 /* If there's a top-level fd defined, open the file relative to this now. (Make the path relative,
1285 * explicitly, since otherwise openat() ignores the first argument.) */
1287 fd
= openat(j
->toplevel_fd
, skip_slash(path
), O_RDONLY
|O_CLOEXEC
);
1289 r
= log_debug_errno(errno
, "Failed to open journal file %s: %m", path
);
1296 r
= journal_file_open(fd
, path
, O_RDONLY
, 0, false, false, NULL
, j
->mmap
, NULL
, NULL
, &f
);
1300 log_debug_errno(r
, "Failed to open journal file %s: %m", path
);
1304 /* journal_file_dump(f); */
1306 r
= ordered_hashmap_put(j
->files
, f
->path
, f
);
1308 f
->close_fd
= close_fd
;
1309 (void) journal_file_close(f
);
1313 f
->last_seen_generation
= j
->generation
;
1315 if (!j
->has_runtime_files
&& path_has_prefix(j
, f
->path
, "/run"))
1316 j
->has_runtime_files
= true;
1317 else if (!j
->has_persistent_files
&& path_has_prefix(j
, f
->path
, "/var"))
1318 j
->has_persistent_files
= true;
1320 log_debug("File %s added.", f
->path
);
1322 check_network(j
, f
->fd
);
1324 j
->current_invalidate_counter
++;
1329 k
= journal_put_error(j
, r
, path
);
1336 static int add_file(sd_journal
*j
, const char *prefix
, const char *filename
) {
1343 if (j
->no_new_files
)
1346 if (!file_type_wanted(j
->flags
, filename
))
1349 path
= strjoina(prefix
, "/", filename
);
1350 return add_any_file(j
, -1, path
);
1353 static void remove_file(sd_journal
*j
, const char *prefix
, const char *filename
) {
1361 path
= strjoina(prefix
, "/", filename
);
1362 f
= ordered_hashmap_get(j
->files
, path
);
1366 remove_file_real(j
, f
);
1369 static void remove_file_real(sd_journal
*j
, JournalFile
*f
) {
1373 ordered_hashmap_remove(j
->files
, f
->path
);
1375 log_debug("File %s removed.", f
->path
);
1377 if (j
->current_file
== f
) {
1378 j
->current_file
= NULL
;
1379 j
->current_field
= 0;
1382 if (j
->unique_file
== f
) {
1383 /* Jump to the next unique_file or NULL if that one was last */
1384 j
->unique_file
= ordered_hashmap_next(j
->files
, j
->unique_file
->path
);
1385 j
->unique_offset
= 0;
1386 if (!j
->unique_file
)
1387 j
->unique_file_lost
= true;
1390 if (j
->fields_file
== f
) {
1391 j
->fields_file
= ordered_hashmap_next(j
->files
, j
->fields_file
->path
);
1392 j
->fields_offset
= 0;
1393 if (!j
->fields_file
)
1394 j
->fields_file_lost
= true;
1397 (void) journal_file_close(f
);
1399 j
->current_invalidate_counter
++;
1402 static int dirname_is_machine_id(const char *fn
) {
1403 sd_id128_t id
, machine
;
1406 r
= sd_id128_get_machine(&machine
);
1410 r
= sd_id128_from_string(fn
, &id
);
1414 return sd_id128_equal(id
, machine
);
1417 static bool dirent_is_journal_file(const struct dirent
*de
) {
1420 if (!IN_SET(de
->d_type
, DT_REG
, DT_LNK
, DT_UNKNOWN
))
1423 return endswith(de
->d_name
, ".journal") ||
1424 endswith(de
->d_name
, ".journal~");
1427 static bool dirent_is_id128_subdir(const struct dirent
*de
) {
1430 if (!IN_SET(de
->d_type
, DT_DIR
, DT_LNK
, DT_UNKNOWN
))
1433 return id128_is_valid(de
->d_name
);
1436 static int directory_open(sd_journal
*j
, const char *path
, DIR **ret
) {
1443 if (j
->toplevel_fd
< 0)
1446 /* Open the specified directory relative to the toplevel fd. Enforce that the path specified is
1447 * relative, by dropping the initial slash */
1448 d
= xopendirat(j
->toplevel_fd
, skip_slash(path
), 0);
1456 static int add_directory(sd_journal
*j
, const char *prefix
, const char *dirname
);
1458 static void directory_enumerate(sd_journal
*j
, Directory
*m
, DIR *d
) {
1465 FOREACH_DIRENT_ALL(de
, d
, goto fail
) {
1466 if (dirent_is_journal_file(de
))
1467 (void) add_file(j
, m
->path
, de
->d_name
);
1469 if (m
->is_root
&& dirent_is_id128_subdir(de
))
1470 (void) add_directory(j
, m
->path
, de
->d_name
);
1476 log_debug_errno(errno
, "Failed to enumerate directory %s, ignoring: %m", m
->path
);
1479 static void directory_watch(sd_journal
*j
, Directory
*m
, int fd
, uint32_t mask
) {
1486 /* Watch this directory if that's enabled and if it not being watched yet. */
1488 if (m
->wd
> 0) /* Already have a watch? */
1490 if (j
->inotify_fd
< 0) /* Not watching at all? */
1493 m
->wd
= inotify_add_watch_fd(j
->inotify_fd
, fd
, mask
);
1495 log_debug_errno(errno
, "Failed to watch journal directory '%s', ignoring: %m", m
->path
);
1499 r
= hashmap_put(j
->directories_by_wd
, INT_TO_PTR(m
->wd
), m
);
1501 log_debug_errno(r
, "Directory '%s' already being watched under a different path, ignoring: %m", m
->path
);
1503 log_debug_errno(r
, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m
->path
);
1504 (void) inotify_rm_watch(j
->inotify_fd
, m
->wd
);
1509 static int add_directory(sd_journal
*j
, const char *prefix
, const char *dirname
) {
1510 _cleanup_free_
char *path
= NULL
;
1511 _cleanup_closedir_
DIR *d
= NULL
;
1518 /* Adds a journal file directory to watch. If the directory is already tracked this updates the inotify watch
1519 * and reenumerates directory contents */
1522 path
= strjoin(prefix
, "/", dirname
);
1524 path
= strdup(prefix
);
1530 log_debug("Considering directory '%s'.", path
);
1532 /* We consider everything local that is in a directory for the local machine ID, or that is stored in /run */
1533 if ((j
->flags
& SD_JOURNAL_LOCAL_ONLY
) &&
1534 !((dirname
&& dirname_is_machine_id(dirname
) > 0) || path_has_prefix(j
, path
, "/run")))
1537 r
= directory_open(j
, path
, &d
);
1539 log_debug_errno(r
, "Failed to open directory '%s': %m", path
);
1543 m
= hashmap_get(j
->directories_by_path
, path
);
1545 m
= new0(Directory
, 1);
1554 if (hashmap_put(j
->directories_by_path
, m
->path
, m
) < 0) {
1560 path
= NULL
; /* avoid freeing in cleanup */
1561 j
->current_invalidate_counter
++;
1563 log_debug("Directory %s added.", m
->path
);
1565 } else if (m
->is_root
)
1566 return 0; /* Don't 'downgrade' from root directory */
1568 m
->last_seen_generation
= j
->generation
;
1570 directory_watch(j
, m
, dirfd(d
),
1571 IN_CREATE
|IN_MOVED_TO
|IN_MODIFY
|IN_ATTRIB
|IN_DELETE
|
1572 IN_DELETE_SELF
|IN_MOVE_SELF
|IN_UNMOUNT
|IN_MOVED_FROM
|
1575 if (!j
->no_new_files
)
1576 directory_enumerate(j
, m
, d
);
1578 check_network(j
, dirfd(d
));
1583 k
= journal_put_error(j
, r
, path
?: prefix
);
1590 static int add_root_directory(sd_journal
*j
, const char *p
, bool missing_ok
) {
1592 _cleanup_closedir_
DIR *d
= NULL
;
1598 /* Adds a root directory to our set of directories to use. If the root directory is already in the set, we
1599 * update the inotify logic, and renumerate the directory entries. This call may hence be called to initially
1600 * populate the set, as well as to update it later. */
1603 /* If there's a path specified, use it. */
1605 log_debug("Considering root directory '%s'.", p
);
1607 if ((j
->flags
& SD_JOURNAL_RUNTIME_ONLY
) &&
1608 !path_has_prefix(j
, p
, "/run"))
1612 p
= strjoina(j
->prefix
, p
);
1614 r
= directory_open(j
, p
, &d
);
1615 if (r
== -ENOENT
&& missing_ok
)
1618 log_debug_errno(r
, "Failed to open root directory %s: %m", p
);
1624 /* If there's no path specified, then we use the top-level fd itself. We duplicate the fd here, since
1625 * opendir() will take possession of the fd, and close it, which we don't want. */
1627 p
= "."; /* store this as "." in the directories hashmap */
1629 dfd
= fcntl(j
->toplevel_fd
, F_DUPFD_CLOEXEC
, 3);
1645 m
= hashmap_get(j
->directories_by_path
, p
);
1647 m
= new0(Directory
, 1);
1655 m
->path
= strdup(p
);
1662 if (hashmap_put(j
->directories_by_path
, m
->path
, m
) < 0) {
1669 j
->current_invalidate_counter
++;
1671 log_debug("Root directory %s added.", m
->path
);
1673 } else if (!m
->is_root
)
1676 directory_watch(j
, m
, dirfd(d
),
1677 IN_CREATE
|IN_MOVED_TO
|IN_MODIFY
|IN_ATTRIB
|IN_DELETE
|
1680 if (!j
->no_new_files
)
1681 directory_enumerate(j
, m
, d
);
1683 check_network(j
, dirfd(d
));
1688 k
= journal_put_error(j
, r
, p
);
1695 static void remove_directory(sd_journal
*j
, Directory
*d
) {
1699 hashmap_remove(j
->directories_by_wd
, INT_TO_PTR(d
->wd
));
1701 if (j
->inotify_fd
>= 0)
1702 inotify_rm_watch(j
->inotify_fd
, d
->wd
);
1705 hashmap_remove(j
->directories_by_path
, d
->path
);
1708 log_debug("Root directory %s removed.", d
->path
);
1710 log_debug("Directory %s removed.", d
->path
);
1716 static int add_search_paths(sd_journal
*j
) {
1718 static const char search_paths
[] =
1719 "/run/log/journal\0"
1720 "/var/log/journal\0";
1725 /* We ignore most errors here, since the idea is to only open
1726 * what's actually accessible, and ignore the rest. */
1728 NULSTR_FOREACH(p
, search_paths
)
1729 (void) add_root_directory(j
, p
, true);
1731 if (!(j
->flags
& SD_JOURNAL_LOCAL_ONLY
))
1732 (void) add_root_directory(j
, "/var/log/journal/remote", true);
1737 static int add_current_paths(sd_journal
*j
) {
1742 assert(j
->no_new_files
);
1744 /* Simply adds all directories for files we have open as directories. We don't expect errors here, so we
1745 * treat them as fatal. */
1747 ORDERED_HASHMAP_FOREACH(f
, j
->files
, i
) {
1748 _cleanup_free_
char *dir
;
1751 dir
= dirname_malloc(f
->path
);
1755 r
= add_directory(j
, dir
, NULL
);
1763 static int allocate_inotify(sd_journal
*j
) {
1766 if (j
->inotify_fd
< 0) {
1767 j
->inotify_fd
= inotify_init1(IN_NONBLOCK
|IN_CLOEXEC
);
1768 if (j
->inotify_fd
< 0)
1772 return hashmap_ensure_allocated(&j
->directories_by_wd
, NULL
);
1775 static sd_journal
*journal_new(int flags
, const char *path
) {
1778 j
= new0(sd_journal
, 1);
1782 j
->original_pid
= getpid_cached();
1783 j
->toplevel_fd
= -1;
1786 j
->data_threshold
= DEFAULT_DATA_THRESHOLD
;
1795 if (flags
& SD_JOURNAL_OS_ROOT
)
1801 j
->files
= ordered_hashmap_new(&path_hash_ops
);
1805 j
->files_cache
= ordered_hashmap_iterated_cache_new(j
->files
);
1806 j
->directories_by_path
= hashmap_new(&path_hash_ops
);
1807 j
->mmap
= mmap_cache_new();
1808 if (!j
->files_cache
|| !j
->directories_by_path
|| !j
->mmap
)
1814 sd_journal_close(j
);
1818 #define OPEN_ALLOWED_FLAGS \
1819 (SD_JOURNAL_LOCAL_ONLY | \
1820 SD_JOURNAL_RUNTIME_ONLY | \
1821 SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)
1823 _public_
int sd_journal_open(sd_journal
**ret
, int flags
) {
1827 assert_return(ret
, -EINVAL
);
1828 assert_return((flags
& ~OPEN_ALLOWED_FLAGS
) == 0, -EINVAL
);
1830 j
= journal_new(flags
, NULL
);
1834 r
= add_search_paths(j
);
1842 sd_journal_close(j
);
1847 #define OPEN_CONTAINER_ALLOWED_FLAGS \
1848 (SD_JOURNAL_LOCAL_ONLY | SD_JOURNAL_SYSTEM)
1850 _public_
int sd_journal_open_container(sd_journal
**ret
, const char *machine
, int flags
) {
1851 _cleanup_free_
char *root
= NULL
, *class = NULL
;
1856 /* This is pretty much deprecated, people should use machined's OpenMachineRootDirectory() call instead in
1857 * combination with sd_journal_open_directory_fd(). */
1859 assert_return(machine
, -EINVAL
);
1860 assert_return(ret
, -EINVAL
);
1861 assert_return((flags
& ~OPEN_CONTAINER_ALLOWED_FLAGS
) == 0, -EINVAL
);
1862 assert_return(machine_name_is_valid(machine
), -EINVAL
);
1864 p
= strjoina("/run/systemd/machines/", machine
);
1865 r
= parse_env_file(p
, NEWLINE
, "ROOT", &root
, "CLASS", &class, NULL
);
1873 if (!streq_ptr(class, "container"))
1876 j
= journal_new(flags
, root
);
1880 r
= add_search_paths(j
);
1888 sd_journal_close(j
);
1892 #define OPEN_DIRECTORY_ALLOWED_FLAGS \
1893 (SD_JOURNAL_OS_ROOT | \
1894 SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER )
1896 _public_
int sd_journal_open_directory(sd_journal
**ret
, const char *path
, int flags
) {
1900 assert_return(ret
, -EINVAL
);
1901 assert_return(path
, -EINVAL
);
1902 assert_return((flags
& ~OPEN_DIRECTORY_ALLOWED_FLAGS
) == 0, -EINVAL
);
1904 j
= journal_new(flags
, path
);
1908 if (flags
& SD_JOURNAL_OS_ROOT
)
1909 r
= add_search_paths(j
);
1911 r
= add_root_directory(j
, path
, false);
1919 sd_journal_close(j
);
1923 _public_
int sd_journal_open_files(sd_journal
**ret
, const char **paths
, int flags
) {
1928 assert_return(ret
, -EINVAL
);
1929 assert_return(flags
== 0, -EINVAL
);
1931 j
= journal_new(flags
, NULL
);
1935 STRV_FOREACH(path
, paths
) {
1936 r
= add_any_file(j
, -1, *path
);
1941 j
->no_new_files
= true;
1947 sd_journal_close(j
);
1951 #define OPEN_DIRECTORY_FD_ALLOWED_FLAGS \
1952 (SD_JOURNAL_OS_ROOT | \
1953 SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER )
1955 _public_
int sd_journal_open_directory_fd(sd_journal
**ret
, int fd
, int flags
) {
1960 assert_return(ret
, -EINVAL
);
1961 assert_return(fd
>= 0, -EBADF
);
1962 assert_return((flags
& ~OPEN_DIRECTORY_FD_ALLOWED_FLAGS
) == 0, -EINVAL
);
1964 if (fstat(fd
, &st
) < 0)
1967 if (!S_ISDIR(st
.st_mode
))
1970 j
= journal_new(flags
, NULL
);
1974 j
->toplevel_fd
= fd
;
1976 if (flags
& SD_JOURNAL_OS_ROOT
)
1977 r
= add_search_paths(j
);
1979 r
= add_root_directory(j
, NULL
, false);
1987 sd_journal_close(j
);
1991 _public_
int sd_journal_open_files_fd(sd_journal
**ret
, int fds
[], unsigned n_fds
, int flags
) {
1998 assert_return(ret
, -EINVAL
);
1999 assert_return(n_fds
> 0, -EBADF
);
2000 assert_return(flags
== 0, -EINVAL
);
2002 j
= journal_new(flags
, NULL
);
2006 for (i
= 0; i
< n_fds
; i
++) {
2014 if (fstat(fds
[i
], &st
) < 0) {
2019 if (!S_ISREG(st
.st_mode
)) {
2024 r
= add_any_file(j
, fds
[i
], NULL
);
2029 j
->no_new_files
= true;
2030 j
->no_inotify
= true;
2036 /* If we fail, make sure we don't take possession of the files we managed to make use of successfully, and they
2038 ORDERED_HASHMAP_FOREACH(f
, j
->files
, iterator
)
2039 f
->close_fd
= false;
2041 sd_journal_close(j
);
2045 _public_
void sd_journal_close(sd_journal
*j
) {
2051 sd_journal_flush_matches(j
);
2053 ordered_hashmap_free_with_destructor(j
->files
, journal_file_close
);
2054 iterated_cache_free(j
->files_cache
);
2056 while ((d
= hashmap_first(j
->directories_by_path
)))
2057 remove_directory(j
, d
);
2059 while ((d
= hashmap_first(j
->directories_by_wd
)))
2060 remove_directory(j
, d
);
2062 hashmap_free(j
->directories_by_path
);
2063 hashmap_free(j
->directories_by_wd
);
2065 safe_close(j
->inotify_fd
);
2068 log_debug("mmap cache statistics: %u hit, %u miss", mmap_cache_get_hit(j
->mmap
), mmap_cache_get_missed(j
->mmap
));
2069 mmap_cache_unref(j
->mmap
);
2072 hashmap_free_free(j
->errors
);
2076 free(j
->unique_field
);
2077 free(j
->fields_buffer
);
2081 _public_
int sd_journal_get_realtime_usec(sd_journal
*j
, uint64_t *ret
) {
2086 assert_return(j
, -EINVAL
);
2087 assert_return(!journal_pid_changed(j
), -ECHILD
);
2088 assert_return(ret
, -EINVAL
);
2090 f
= j
->current_file
;
2092 return -EADDRNOTAVAIL
;
2094 if (f
->current_offset
<= 0)
2095 return -EADDRNOTAVAIL
;
2097 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2101 *ret
= le64toh(o
->entry
.realtime
);
2105 _public_
int sd_journal_get_monotonic_usec(sd_journal
*j
, uint64_t *ret
, sd_id128_t
*ret_boot_id
) {
2111 assert_return(j
, -EINVAL
);
2112 assert_return(!journal_pid_changed(j
), -ECHILD
);
2114 f
= j
->current_file
;
2116 return -EADDRNOTAVAIL
;
2118 if (f
->current_offset
<= 0)
2119 return -EADDRNOTAVAIL
;
2121 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2126 *ret_boot_id
= o
->entry
.boot_id
;
2128 r
= sd_id128_get_boot(&id
);
2132 if (!sd_id128_equal(id
, o
->entry
.boot_id
))
2137 *ret
= le64toh(o
->entry
.monotonic
);
2142 static bool field_is_valid(const char *field
) {
2150 if (startswith(field
, "__"))
2153 for (p
= field
; *p
; p
++) {
2158 if (*p
>= 'A' && *p
<= 'Z')
2161 if (*p
>= '0' && *p
<= '9')
2170 _public_
int sd_journal_get_data(sd_journal
*j
, const char *field
, const void **data
, size_t *size
) {
2173 size_t field_length
;
2177 assert_return(j
, -EINVAL
);
2178 assert_return(!journal_pid_changed(j
), -ECHILD
);
2179 assert_return(field
, -EINVAL
);
2180 assert_return(data
, -EINVAL
);
2181 assert_return(size
, -EINVAL
);
2182 assert_return(field_is_valid(field
), -EINVAL
);
2184 f
= j
->current_file
;
2186 return -EADDRNOTAVAIL
;
2188 if (f
->current_offset
<= 0)
2189 return -EADDRNOTAVAIL
;
2191 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2195 field_length
= strlen(field
);
2197 n
= journal_file_entry_n_items(o
);
2198 for (i
= 0; i
< n
; i
++) {
2204 p
= le64toh(o
->entry
.items
[i
].object_offset
);
2205 le_hash
= o
->entry
.items
[i
].hash
;
2206 r
= journal_file_move_to_object(f
, OBJECT_DATA
, p
, &o
);
2210 if (le_hash
!= o
->data
.hash
)
2213 l
= le64toh(o
->object
.size
) - offsetof(Object
, data
.payload
);
2215 compression
= o
->object
.flags
& OBJECT_COMPRESSION_MASK
;
2217 #if HAVE_XZ || HAVE_LZ4
2218 r
= decompress_startswith(compression
,
2220 &f
->compress_buffer
, &f
->compress_buffer_size
,
2221 field
, field_length
, '=');
2223 log_debug_errno(r
, "Cannot decompress %s object of length %"PRIu64
" at offset "OFSfmt
": %m",
2224 object_compressed_to_string(compression
), l
, p
);
2229 r
= decompress_blob(compression
,
2231 &f
->compress_buffer
, &f
->compress_buffer_size
, &rsize
,
2236 *data
= f
->compress_buffer
;
2237 *size
= (size_t) rsize
;
2242 return -EPROTONOSUPPORT
;
2244 } else if (l
>= field_length
+1 &&
2245 memcmp(o
->data
.payload
, field
, field_length
) == 0 &&
2246 o
->data
.payload
[field_length
] == '=') {
2250 if ((uint64_t) t
!= l
)
2253 *data
= o
->data
.payload
;
2259 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2267 static int return_data(sd_journal
*j
, JournalFile
*f
, Object
*o
, const void **data
, size_t *size
) {
2272 l
= le64toh(o
->object
.size
) - offsetof(Object
, data
.payload
);
2275 /* We can't read objects larger than 4G on a 32bit machine */
2276 if ((uint64_t) t
!= l
)
2279 compression
= o
->object
.flags
& OBJECT_COMPRESSION_MASK
;
2281 #if HAVE_XZ || HAVE_LZ4
2285 r
= decompress_blob(compression
,
2286 o
->data
.payload
, l
, &f
->compress_buffer
,
2287 &f
->compress_buffer_size
, &rsize
, j
->data_threshold
);
2291 *data
= f
->compress_buffer
;
2292 *size
= (size_t) rsize
;
2294 return -EPROTONOSUPPORT
;
2297 *data
= o
->data
.payload
;
2304 _public_
int sd_journal_enumerate_data(sd_journal
*j
, const void **data
, size_t *size
) {
2311 assert_return(j
, -EINVAL
);
2312 assert_return(!journal_pid_changed(j
), -ECHILD
);
2313 assert_return(data
, -EINVAL
);
2314 assert_return(size
, -EINVAL
);
2316 f
= j
->current_file
;
2318 return -EADDRNOTAVAIL
;
2320 if (f
->current_offset
<= 0)
2321 return -EADDRNOTAVAIL
;
2323 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
2327 n
= journal_file_entry_n_items(o
);
2328 if (j
->current_field
>= n
)
2331 p
= le64toh(o
->entry
.items
[j
->current_field
].object_offset
);
2332 le_hash
= o
->entry
.items
[j
->current_field
].hash
;
2333 r
= journal_file_move_to_object(f
, OBJECT_DATA
, p
, &o
);
2337 if (le_hash
!= o
->data
.hash
)
2340 r
= return_data(j
, f
, o
, data
, size
);
2349 _public_
void sd_journal_restart_data(sd_journal
*j
) {
2353 j
->current_field
= 0;
2356 static int reiterate_all_paths(sd_journal
*j
) {
2359 if (j
->no_new_files
)
2360 return add_current_paths(j
);
2362 if (j
->flags
& SD_JOURNAL_OS_ROOT
)
2363 return add_search_paths(j
);
2365 if (j
->toplevel_fd
>= 0)
2366 return add_root_directory(j
, NULL
, false);
2369 return add_root_directory(j
, j
->path
, true);
2371 return add_search_paths(j
);
2374 _public_
int sd_journal_get_fd(sd_journal
*j
) {
2377 assert_return(j
, -EINVAL
);
2378 assert_return(!journal_pid_changed(j
), -ECHILD
);
2381 return -EMEDIUMTYPE
;
2383 if (j
->inotify_fd
>= 0)
2384 return j
->inotify_fd
;
2386 r
= allocate_inotify(j
);
2390 log_debug("Reiterating files to get inotify watches established.");
2392 /* Iterate through all dirs again, to add them to the inotify */
2393 r
= reiterate_all_paths(j
);
2397 return j
->inotify_fd
;
2400 _public_
int sd_journal_get_events(sd_journal
*j
) {
2403 assert_return(j
, -EINVAL
);
2404 assert_return(!journal_pid_changed(j
), -ECHILD
);
2406 fd
= sd_journal_get_fd(j
);
2413 _public_
int sd_journal_get_timeout(sd_journal
*j
, uint64_t *timeout_usec
) {
2416 assert_return(j
, -EINVAL
);
2417 assert_return(!journal_pid_changed(j
), -ECHILD
);
2418 assert_return(timeout_usec
, -EINVAL
);
2420 fd
= sd_journal_get_fd(j
);
2424 if (!j
->on_network
) {
2425 *timeout_usec
= (uint64_t) -1;
2429 /* If we are on the network we need to regularly check for
2430 * changes manually */
2432 *timeout_usec
= j
->last_process_usec
+ JOURNAL_FILES_RECHECK_USEC
;
2436 static void process_q_overflow(sd_journal
*j
) {
2443 /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list
2444 * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all
2445 * journal files we encounter. All journal files and all directories that don't carry it after reenumeration
2446 * are subject for unloading. */
2448 log_debug("Inotify queue overrun, reiterating everything.");
2451 (void) reiterate_all_paths(j
);
2453 ORDERED_HASHMAP_FOREACH(f
, j
->files
, i
) {
2455 if (f
->last_seen_generation
== j
->generation
)
2458 log_debug("File '%s' hasn't been seen in this enumeration, removing.", f
->path
);
2459 remove_file_real(j
, f
);
2462 HASHMAP_FOREACH(m
, j
->directories_by_path
, i
) {
2464 if (m
->last_seen_generation
== j
->generation
)
2467 if (m
->is_root
) /* Never GC root directories */
2470 log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f
->path
);
2471 remove_directory(j
, m
);
2474 log_debug("Reiteration complete.");
2477 static void process_inotify_event(sd_journal
*j
, struct inotify_event
*e
) {
2483 if (e
->mask
& IN_Q_OVERFLOW
) {
2484 process_q_overflow(j
);
2488 /* Is this a subdirectory we watch? */
2489 d
= hashmap_get(j
->directories_by_wd
, INT_TO_PTR(e
->wd
));
2491 if (!(e
->mask
& IN_ISDIR
) && e
->len
> 0 &&
2492 (endswith(e
->name
, ".journal") ||
2493 endswith(e
->name
, ".journal~"))) {
2495 /* Event for a journal file */
2497 if (e
->mask
& (IN_CREATE
|IN_MOVED_TO
|IN_MODIFY
|IN_ATTRIB
))
2498 (void) add_file(j
, d
->path
, e
->name
);
2499 else if (e
->mask
& (IN_DELETE
|IN_MOVED_FROM
|IN_UNMOUNT
))
2500 remove_file(j
, d
->path
, e
->name
);
2502 } else if (!d
->is_root
&& e
->len
== 0) {
2504 /* Event for a subdirectory */
2506 if (e
->mask
& (IN_DELETE_SELF
|IN_MOVE_SELF
|IN_UNMOUNT
))
2507 remove_directory(j
, d
);
2509 } else if (d
->is_root
&& (e
->mask
& IN_ISDIR
) && e
->len
> 0 && id128_is_valid(e
->name
)) {
2511 /* Event for root directory */
2513 if (e
->mask
& (IN_CREATE
|IN_MOVED_TO
|IN_MODIFY
|IN_ATTRIB
))
2514 (void) add_directory(j
, d
->path
, e
->name
);
2520 if (e
->mask
& IN_IGNORED
)
2523 log_debug("Unexpected inotify event.");
2526 static int determine_change(sd_journal
*j
) {
2531 b
= j
->current_invalidate_counter
!= j
->last_invalidate_counter
;
2532 j
->last_invalidate_counter
= j
->current_invalidate_counter
;
2534 return b
? SD_JOURNAL_INVALIDATE
: SD_JOURNAL_APPEND
;
2537 _public_
int sd_journal_process(sd_journal
*j
) {
2538 bool got_something
= false;
2540 assert_return(j
, -EINVAL
);
2541 assert_return(!journal_pid_changed(j
), -ECHILD
);
2543 if (j
->inotify_fd
< 0) /* We have no inotify fd yet? Then there's noting to process. */
2546 j
->last_process_usec
= now(CLOCK_MONOTONIC
);
2547 j
->last_invalidate_counter
= j
->current_invalidate_counter
;
2550 union inotify_event_buffer buffer
;
2551 struct inotify_event
*e
;
2554 l
= read(j
->inotify_fd
, &buffer
, sizeof(buffer
));
2556 if (IN_SET(errno
, EAGAIN
, EINTR
))
2557 return got_something
? determine_change(j
) : SD_JOURNAL_NOP
;
2562 got_something
= true;
2564 FOREACH_INOTIFY_EVENT(e
, buffer
, l
)
2565 process_inotify_event(j
, e
);
2569 _public_
int sd_journal_wait(sd_journal
*j
, uint64_t timeout_usec
) {
2573 assert_return(j
, -EINVAL
);
2574 assert_return(!journal_pid_changed(j
), -ECHILD
);
2576 if (j
->inotify_fd
< 0) {
2578 /* This is the first invocation, hence create the
2580 r
= sd_journal_get_fd(j
);
2584 /* The journal might have changed since the context
2585 * object was created and we weren't watching before,
2586 * hence don't wait for anything, and return
2588 return determine_change(j
);
2591 r
= sd_journal_get_timeout(j
, &t
);
2595 if (t
!= (uint64_t) -1) {
2598 n
= now(CLOCK_MONOTONIC
);
2599 t
= t
> n
? t
- n
: 0;
2601 if (timeout_usec
== (uint64_t) -1 || timeout_usec
> t
)
2606 r
= fd_wait_for_event(j
->inotify_fd
, POLLIN
, timeout_usec
);
2607 } while (r
== -EINTR
);
2612 return sd_journal_process(j
);
2615 _public_
int sd_journal_get_cutoff_realtime_usec(sd_journal
*j
, uint64_t *from
, uint64_t *to
) {
2619 uint64_t fmin
= 0, tmax
= 0;
2622 assert_return(j
, -EINVAL
);
2623 assert_return(!journal_pid_changed(j
), -ECHILD
);
2624 assert_return(from
|| to
, -EINVAL
);
2625 assert_return(from
!= to
, -EINVAL
);
2627 ORDERED_HASHMAP_FOREACH(f
, j
->files
, i
) {
2630 r
= journal_file_get_cutoff_realtime_usec(f
, &fr
, &t
);
2643 fmin
= MIN(fr
, fmin
);
2644 tmax
= MAX(t
, tmax
);
2653 return first
? 0 : 1;
2656 _public_
int sd_journal_get_cutoff_monotonic_usec(sd_journal
*j
, sd_id128_t boot_id
, uint64_t *from
, uint64_t *to
) {
2662 assert_return(j
, -EINVAL
);
2663 assert_return(!journal_pid_changed(j
), -ECHILD
);
2664 assert_return(from
|| to
, -EINVAL
);
2665 assert_return(from
!= to
, -EINVAL
);
2667 ORDERED_HASHMAP_FOREACH(f
, j
->files
, i
) {
2670 r
= journal_file_get_cutoff_monotonic_usec(f
, boot_id
, &fr
, &t
);
2680 *from
= MIN(fr
, *from
);
2695 void journal_print_header(sd_journal
*j
) {
2698 bool newline
= false;
2702 ORDERED_HASHMAP_FOREACH(f
, j
->files
, i
) {
2708 journal_file_print_header(f
);
2712 _public_
int sd_journal_get_usage(sd_journal
*j
, uint64_t *bytes
) {
2717 assert_return(j
, -EINVAL
);
2718 assert_return(!journal_pid_changed(j
), -ECHILD
);
2719 assert_return(bytes
, -EINVAL
);
2721 ORDERED_HASHMAP_FOREACH(f
, j
->files
, i
) {
2724 if (fstat(f
->fd
, &st
) < 0)
2727 sum
+= (uint64_t) st
.st_blocks
* 512ULL;
2734 _public_
int sd_journal_query_unique(sd_journal
*j
, const char *field
) {
2737 assert_return(j
, -EINVAL
);
2738 assert_return(!journal_pid_changed(j
), -ECHILD
);
2739 assert_return(!isempty(field
), -EINVAL
);
2740 assert_return(field_is_valid(field
), -EINVAL
);
2746 free(j
->unique_field
);
2747 j
->unique_field
= f
;
2748 j
->unique_file
= NULL
;
2749 j
->unique_offset
= 0;
2750 j
->unique_file_lost
= false;
2755 _public_
int sd_journal_enumerate_unique(sd_journal
*j
, const void **data
, size_t *l
) {
2758 assert_return(j
, -EINVAL
);
2759 assert_return(!journal_pid_changed(j
), -ECHILD
);
2760 assert_return(data
, -EINVAL
);
2761 assert_return(l
, -EINVAL
);
2762 assert_return(j
->unique_field
, -EINVAL
);
2764 k
= strlen(j
->unique_field
);
2766 if (!j
->unique_file
) {
2767 if (j
->unique_file_lost
)
2770 j
->unique_file
= ordered_hashmap_first(j
->files
);
2771 if (!j
->unique_file
)
2774 j
->unique_offset
= 0;
2786 /* Proceed to next data object in the field's linked list */
2787 if (j
->unique_offset
== 0) {
2788 r
= journal_file_find_field_object(j
->unique_file
, j
->unique_field
, k
, &o
, NULL
);
2792 j
->unique_offset
= r
> 0 ? le64toh(o
->field
.head_data_offset
) : 0;
2794 r
= journal_file_move_to_object(j
->unique_file
, OBJECT_DATA
, j
->unique_offset
, &o
);
2798 j
->unique_offset
= le64toh(o
->data
.next_field_offset
);
2801 /* We reached the end of the list? Then start again, with the next file */
2802 if (j
->unique_offset
== 0) {
2803 j
->unique_file
= ordered_hashmap_next(j
->files
, j
->unique_file
->path
);
2804 if (!j
->unique_file
)
2810 /* We do not use OBJECT_DATA context here, but OBJECT_UNUSED
2811 * instead, so that we can look at this data object at the same
2812 * time as one on another file */
2813 r
= journal_file_move_to_object(j
->unique_file
, OBJECT_UNUSED
, j
->unique_offset
, &o
);
2817 /* Let's do the type check by hand, since we used 0 context above. */
2818 if (o
->object
.type
!= OBJECT_DATA
) {
2819 log_debug("%s:offset " OFSfmt
": object has type %d, expected %d",
2820 j
->unique_file
->path
, j
->unique_offset
,
2821 o
->object
.type
, OBJECT_DATA
);
2825 r
= return_data(j
, j
->unique_file
, o
, &odata
, &ol
);
2829 /* Check if we have at least the field name and "=". */
2831 log_debug("%s:offset " OFSfmt
": object has size %zu, expected at least %zu",
2832 j
->unique_file
->path
, j
->unique_offset
,
2837 if (memcmp(odata
, j
->unique_field
, k
) || ((const char*) odata
)[k
] != '=') {
2838 log_debug("%s:offset " OFSfmt
": object does not start with \"%s=\"",
2839 j
->unique_file
->path
, j
->unique_offset
,
2844 /* OK, now let's see if we already returned this data
2845 * object by checking if it exists in the earlier
2846 * traversed files. */
2848 ORDERED_HASHMAP_FOREACH(of
, j
->files
, i
) {
2849 if (of
== j
->unique_file
)
2852 /* Skip this file it didn't have any fields indexed */
2853 if (JOURNAL_HEADER_CONTAINS(of
->header
, n_fields
) && le64toh(of
->header
->n_fields
) <= 0)
2856 r
= journal_file_find_data_object_with_hash(of
, odata
, ol
, le64toh(o
->data
.hash
), NULL
, NULL
);
2868 r
= return_data(j
, j
->unique_file
, o
, data
, l
);
2876 _public_
void sd_journal_restart_unique(sd_journal
*j
) {
2880 j
->unique_file
= NULL
;
2881 j
->unique_offset
= 0;
2882 j
->unique_file_lost
= false;
2885 _public_
int sd_journal_enumerate_fields(sd_journal
*j
, const char **field
) {
2888 assert_return(j
, -EINVAL
);
2889 assert_return(!journal_pid_changed(j
), -ECHILD
);
2890 assert_return(field
, -EINVAL
);
2892 if (!j
->fields_file
) {
2893 if (j
->fields_file_lost
)
2896 j
->fields_file
= ordered_hashmap_first(j
->files
);
2897 if (!j
->fields_file
)
2900 j
->fields_hash_table_index
= 0;
2901 j
->fields_offset
= 0;
2905 JournalFile
*f
, *of
;
2914 if (j
->fields_offset
== 0) {
2917 /* We are not yet positioned at any field. Let's pick the first one */
2918 r
= journal_file_map_field_hash_table(f
);
2922 m
= le64toh(f
->header
->field_hash_table_size
) / sizeof(HashItem
);
2924 if (j
->fields_hash_table_index
>= m
) {
2925 /* Reached the end of the hash table, go to the next file. */
2930 j
->fields_offset
= le64toh(f
->field_hash_table
[j
->fields_hash_table_index
].head_hash_offset
);
2932 if (j
->fields_offset
!= 0)
2935 /* Empty hash table bucket, go to next one */
2936 j
->fields_hash_table_index
++;
2940 /* Proceed with next file */
2941 j
->fields_file
= ordered_hashmap_next(j
->files
, f
->path
);
2942 if (!j
->fields_file
) {
2947 j
->fields_offset
= 0;
2948 j
->fields_hash_table_index
= 0;
2953 /* We are already positioned at a field. If so, let's figure out the next field from it */
2955 r
= journal_file_move_to_object(f
, OBJECT_FIELD
, j
->fields_offset
, &o
);
2959 j
->fields_offset
= le64toh(o
->field
.next_hash_offset
);
2960 if (j
->fields_offset
== 0) {
2961 /* Reached the end of the hash table chain */
2962 j
->fields_hash_table_index
++;
2967 /* We use OBJECT_UNUSED here, so that the iterator below doesn't remove our mmap window */
2968 r
= journal_file_move_to_object(f
, OBJECT_UNUSED
, j
->fields_offset
, &o
);
2972 /* Because we used OBJECT_UNUSED above, we need to do our type check manually */
2973 if (o
->object
.type
!= OBJECT_FIELD
) {
2974 log_debug("%s:offset " OFSfmt
": object has type %i, expected %i", f
->path
, j
->fields_offset
, o
->object
.type
, OBJECT_FIELD
);
2978 sz
= le64toh(o
->object
.size
) - offsetof(Object
, field
.payload
);
2980 /* Let's see if we already returned this field name before. */
2982 ORDERED_HASHMAP_FOREACH(of
, j
->files
, i
) {
2986 /* Skip this file it didn't have any fields indexed */
2987 if (JOURNAL_HEADER_CONTAINS(of
->header
, n_fields
) && le64toh(of
->header
->n_fields
) <= 0)
2990 r
= journal_file_find_field_object_with_hash(of
, o
->field
.payload
, sz
, le64toh(o
->field
.hash
), NULL
, NULL
);
3002 /* Check if this is really a valid string containing no NUL byte */
3003 if (memchr(o
->field
.payload
, 0, sz
))
3006 if (sz
> j
->data_threshold
)
3007 sz
= j
->data_threshold
;
3009 if (!GREEDY_REALLOC(j
->fields_buffer
, j
->fields_buffer_allocated
, sz
+ 1))
3012 memcpy(j
->fields_buffer
, o
->field
.payload
, sz
);
3013 j
->fields_buffer
[sz
] = 0;
3015 if (!field_is_valid(j
->fields_buffer
))
3018 *field
= j
->fields_buffer
;
3023 _public_
void sd_journal_restart_fields(sd_journal
*j
) {
3027 j
->fields_file
= NULL
;
3028 j
->fields_hash_table_index
= 0;
3029 j
->fields_offset
= 0;
3030 j
->fields_file_lost
= false;
3033 _public_
int sd_journal_reliable_fd(sd_journal
*j
) {
3034 assert_return(j
, -EINVAL
);
3035 assert_return(!journal_pid_changed(j
), -ECHILD
);
3037 return !j
->on_network
;
3040 static char *lookup_field(const char *field
, void *userdata
) {
3041 sd_journal
*j
= userdata
;
3049 r
= sd_journal_get_data(j
, field
, &data
, &size
);
3051 size
> REPLACE_VAR_MAX
)
3052 return strdup(field
);
3054 d
= strlen(field
) + 1;
3056 return strndup((const char*) data
+ d
, size
- d
);
3059 _public_
int sd_journal_get_catalog(sd_journal
*j
, char **ret
) {
3063 _cleanup_free_
char *text
= NULL
, *cid
= NULL
;
3067 assert_return(j
, -EINVAL
);
3068 assert_return(!journal_pid_changed(j
), -ECHILD
);
3069 assert_return(ret
, -EINVAL
);
3071 r
= sd_journal_get_data(j
, "MESSAGE_ID", &data
, &size
);
3075 cid
= strndup((const char*) data
+ 11, size
- 11);
3079 r
= sd_id128_from_string(cid
, &id
);
3083 r
= catalog_get(CATALOG_DATABASE
, id
, &text
);
3087 t
= replace_var(text
, lookup_field
, j
);
3095 _public_
int sd_journal_get_catalog_for_message_id(sd_id128_t id
, char **ret
) {
3096 assert_return(ret
, -EINVAL
);
3098 return catalog_get(CATALOG_DATABASE
, id
, ret
);
3101 _public_
int sd_journal_set_data_threshold(sd_journal
*j
, size_t sz
) {
3102 assert_return(j
, -EINVAL
);
3103 assert_return(!journal_pid_changed(j
), -ECHILD
);
3105 j
->data_threshold
= sz
;
3109 _public_
int sd_journal_get_data_threshold(sd_journal
*j
, size_t *sz
) {
3110 assert_return(j
, -EINVAL
);
3111 assert_return(!journal_pid_changed(j
), -ECHILD
);
3112 assert_return(sz
, -EINVAL
);
3114 *sz
= j
->data_threshold
;
3118 _public_
int sd_journal_has_runtime_files(sd_journal
*j
) {
3119 assert_return(j
, -EINVAL
);
3121 return j
->has_runtime_files
;
3124 _public_
int sd_journal_has_persistent_files(sd_journal
*j
) {
3125 assert_return(j
, -EINVAL
);
3127 return j
->has_persistent_files
;