]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-journal/sd-journal.c
Merge pull request #31754 from YHNdnzj/journal-fd-namespace
[thirdparty/systemd.git] / src / libsystemd / sd-journal / sd-journal.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <inttypes.h>
6 #include <linux/magic.h>
7 #include <poll.h>
8 #include <stddef.h>
9 #include <sys/inotify.h>
10 #include <sys/vfs.h>
11 #include <unistd.h>
12
13 #include "sd-journal.h"
14
15 #include "alloc-util.h"
16 #include "catalog.h"
17 #include "compress.h"
18 #include "dirent-util.h"
19 #include "env-file.h"
20 #include "escape.h"
21 #include "fd-util.h"
22 #include "fileio.h"
23 #include "format-util.h"
24 #include "fs-util.h"
25 #include "hashmap.h"
26 #include "hostname-util.h"
27 #include "id128-util.h"
28 #include "inotify-util.h"
29 #include "io-util.h"
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-internal.h"
33 #include "list.h"
34 #include "lookup3.h"
35 #include "nulstr-util.h"
36 #include "origin-id.h"
37 #include "path-util.h"
38 #include "prioq.h"
39 #include "process-util.h"
40 #include "replace-var.h"
41 #include "sort-util.h"
42 #include "stat-util.h"
43 #include "stdio-util.h"
44 #include "string-util.h"
45 #include "strv.h"
46 #include "syslog-util.h"
47 #include "uid-classification.h"
48
49 #define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
50
51 /* The maximum size of variable values we'll expand in catalog entries. We bind this to PATH_MAX for now, as
52 * we want to be able to show all officially valid paths at least */
53 #define REPLACE_VAR_MAX PATH_MAX
54
55 #define DEFAULT_DATA_THRESHOLD (64*1024)
56
57 DEFINE_PRIVATE_ORIGIN_ID_HELPERS(sd_journal, journal);
58
59 static void remove_file_real(sd_journal *j, JournalFile *f);
60 static int journal_file_read_tail_timestamp(sd_journal *j, JournalFile *f);
61 static void journal_file_unlink_newest_by_boot_id(sd_journal *j, JournalFile *f);
62
63 static int journal_put_error(sd_journal *j, int r, const char *path) {
64 _cleanup_free_ char *copy = NULL;
65 int k;
66
67 /* Memorize an error we encountered, and store which
68 * file/directory it was generated from. Note that we store
69 * only *one* path per error code, as the error code is the
70 * key into the hashmap, and the path is the value. This means
71 * we keep track only of all error kinds, but not of all error
72 * locations. This has the benefit that the hashmap cannot
73 * grow beyond bounds.
74 *
75 * We return an error here only if we didn't manage to
76 * memorize the real error. */
77
78 if (r >= 0)
79 return r;
80
81 if (path) {
82 copy = strdup(path);
83 if (!copy)
84 return -ENOMEM;
85 }
86
87 k = hashmap_ensure_put(&j->errors, NULL, INT_TO_PTR(r), copy);
88 if (k < 0) {
89 if (k == -EEXIST)
90 return 0;
91
92 return k;
93 }
94
95 TAKE_PTR(copy);
96 return 0;
97 }
98
99 static void detach_location(sd_journal *j) {
100 JournalFile *f;
101
102 assert(j);
103
104 j->current_file = NULL;
105 j->current_field = 0;
106
107 ORDERED_HASHMAP_FOREACH(f, j->files)
108 journal_file_reset_location(f);
109 }
110
111 static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) {
112 assert(l);
113 assert(IN_SET(type, LOCATION_DISCRETE, LOCATION_SEEK));
114 assert(f);
115
116 *l = (Location) {
117 .type = type,
118 .seqnum = le64toh(o->entry.seqnum),
119 .seqnum_id = f->header->seqnum_id,
120 .realtime = le64toh(o->entry.realtime),
121 .monotonic = le64toh(o->entry.monotonic),
122 .boot_id = o->entry.boot_id,
123 .xor_hash = le64toh(o->entry.xor_hash),
124 .seqnum_set = true,
125 .realtime_set = true,
126 .monotonic_set = true,
127 .xor_hash_set = true,
128 };
129 }
130
131 static void set_location(sd_journal *j, JournalFile *f, Object *o) {
132 assert(j);
133 assert(f);
134 assert(o);
135
136 init_location(&j->current_location, LOCATION_DISCRETE, f, o);
137
138 j->current_file = f;
139 j->current_field = 0;
140
141 /* Let f know its candidate entry was picked. */
142 assert(f->location_type == LOCATION_SEEK);
143 f->location_type = LOCATION_DISCRETE;
144 }
145
146 static int match_is_valid(const void *data, size_t size) {
147 const char *b = ASSERT_PTR(data);
148
149 if (size < 2)
150 return false;
151
152 if (((char*) data)[0] == '_' && ((char*) data)[1] == '_')
153 return false;
154
155 for (const char *p = b; p < b + size; p++) {
156
157 if (*p == '=')
158 return p > b;
159
160 if (*p == '_')
161 continue;
162
163 if (*p >= 'A' && *p <= 'Z')
164 continue;
165
166 if (ascii_isdigit(*p))
167 continue;
168
169 return false;
170 }
171
172 return false;
173 }
174
175 static bool same_field(const void *_a, size_t s, const void *_b, size_t t) {
176 const uint8_t *a = _a, *b = _b;
177
178 for (size_t j = 0; j < s && j < t; j++) {
179
180 if (a[j] != b[j])
181 return false;
182
183 if (a[j] == '=')
184 return true;
185 }
186
187 assert_not_reached();
188 }
189
190 static Match *match_new(Match *p, MatchType t) {
191 Match *m;
192
193 m = new(Match, 1);
194 if (!m)
195 return NULL;
196
197 *m = (Match) {
198 .type = t,
199 .parent = p,
200 };
201
202 if (p)
203 LIST_PREPEND(matches, p->matches, m);
204
205 return m;
206 }
207
208 static Match *match_free(Match *m) {
209 assert(m);
210
211 while (m->matches)
212 match_free(m->matches);
213
214 if (m->parent)
215 LIST_REMOVE(matches, m->parent->matches, m);
216
217 free(m->data);
218 return mfree(m);
219 }
220
221 static Match *match_free_if_empty(Match *m) {
222 if (!m || m->matches)
223 return m;
224
225 return match_free(m);
226 }
227
228 _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
229 Match *add_here = NULL, *m = NULL;
230 uint64_t hash;
231
232 assert_return(j, -EINVAL);
233 assert_return(!journal_origin_changed(j), -ECHILD);
234 assert_return(data, -EINVAL);
235
236 if (size == 0)
237 size = strlen(data);
238
239 if (!match_is_valid(data, size))
240 return -EINVAL;
241
242 /* level 0: AND term
243 * level 1: OR terms
244 * level 2: AND terms
245 * level 3: OR terms
246 * level 4: concrete matches */
247
248 if (!j->level0) {
249 j->level0 = match_new(NULL, MATCH_AND_TERM);
250 if (!j->level0)
251 return -ENOMEM;
252 }
253
254 if (!j->level1) {
255 j->level1 = match_new(j->level0, MATCH_OR_TERM);
256 if (!j->level1)
257 return -ENOMEM;
258 }
259
260 if (!j->level2) {
261 j->level2 = match_new(j->level1, MATCH_AND_TERM);
262 if (!j->level2)
263 return -ENOMEM;
264 }
265
266 assert(j->level0->type == MATCH_AND_TERM);
267 assert(j->level1->type == MATCH_OR_TERM);
268 assert(j->level2->type == MATCH_AND_TERM);
269
270 /* Old-style Jenkins (unkeyed) hashing only here. We do not cover new-style siphash (keyed) hashing
271 * here, since it's different for each file, and thus can't be pre-calculated in the Match object. */
272 hash = jenkins_hash64(data, size);
273
274 LIST_FOREACH(matches, l3, j->level2->matches) {
275 assert(l3->type == MATCH_OR_TERM);
276
277 LIST_FOREACH(matches, l4, l3->matches) {
278 assert(l4->type == MATCH_DISCRETE);
279
280 /* Exactly the same match already? Then ignore
281 * this addition */
282 if (l4->hash == hash &&
283 l4->size == size &&
284 memcmp(l4->data, data, size) == 0)
285 return 0;
286
287 /* Same field? Then let's add this to this OR term */
288 if (same_field(data, size, l4->data, l4->size)) {
289 add_here = l3;
290 break;
291 }
292 }
293
294 if (add_here)
295 break;
296 }
297
298 if (!add_here) {
299 add_here = match_new(j->level2, MATCH_OR_TERM);
300 if (!add_here)
301 goto fail;
302 }
303
304 m = match_new(add_here, MATCH_DISCRETE);
305 if (!m)
306 goto fail;
307
308 m->hash = hash;
309 m->size = size;
310 m->data = memdup(data, size);
311 if (!m->data)
312 goto fail;
313
314 detach_location(j);
315
316 return 0;
317
318 fail:
319 match_free(m);
320 match_free_if_empty(add_here);
321 j->level2 = match_free_if_empty(j->level2);
322 j->level1 = match_free_if_empty(j->level1);
323 j->level0 = match_free_if_empty(j->level0);
324
325 return -ENOMEM;
326 }
327
328 _public_ int sd_journal_add_conjunction(sd_journal *j) {
329 assert_return(j, -EINVAL);
330 assert_return(!journal_origin_changed(j), -ECHILD);
331
332 if (!j->level0)
333 return 0;
334
335 if (!j->level1)
336 return 0;
337
338 if (!j->level1->matches)
339 return 0;
340
341 j->level1 = NULL;
342 j->level2 = NULL;
343
344 return 0;
345 }
346
347 _public_ int sd_journal_add_disjunction(sd_journal *j) {
348 assert_return(j, -EINVAL);
349 assert_return(!journal_origin_changed(j), -ECHILD);
350
351 if (!j->level0)
352 return 0;
353
354 if (!j->level1)
355 return 0;
356
357 if (!j->level2)
358 return 0;
359
360 if (!j->level2->matches)
361 return 0;
362
363 j->level2 = NULL;
364 return 0;
365 }
366
367 static char *match_make_string(Match *m) {
368 _cleanup_free_ char *p = NULL;
369 bool enclose = false;
370
371 if (!m)
372 return strdup("none");
373
374 if (m->type == MATCH_DISCRETE)
375 return cescape_length(m->data, m->size);
376
377 LIST_FOREACH(matches, i, m->matches) {
378 _cleanup_free_ char *t = NULL;
379
380 t = match_make_string(i);
381 if (!t)
382 return NULL;
383
384 if (p) {
385 if (!strextend(&p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t))
386 return NULL;
387
388 enclose = true;
389 } else
390 p = TAKE_PTR(t);
391 }
392
393 if (enclose)
394 return strjoin("(", p, ")");
395
396 return TAKE_PTR(p);
397 }
398
399 char *journal_make_match_string(sd_journal *j) {
400 assert(j);
401
402 return match_make_string(j->level0);
403 }
404
405 _public_ void sd_journal_flush_matches(sd_journal *j) {
406 if (!j || journal_origin_changed(j))
407 return;
408
409 if (j->level0)
410 match_free(j->level0);
411
412 j->level0 = j->level1 = j->level2 = NULL;
413
414 detach_location(j);
415 }
416
417 static int newest_by_boot_id_compare(const NewestByBootId *a, const NewestByBootId *b) {
418 return id128_compare_func(&a->boot_id, &b->boot_id);
419 }
420
421 static void journal_file_unlink_newest_by_boot_id(sd_journal *j, JournalFile *f) {
422 NewestByBootId *found;
423
424 assert(j);
425 assert(f);
426
427 if (f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL) /* not linked currently, hence this is a NOP */
428 return;
429
430 found = typesafe_bsearch(&(NewestByBootId) { .boot_id = f->newest_boot_id },
431 j->newest_by_boot_id, j->n_newest_by_boot_id, newest_by_boot_id_compare);
432 assert(found);
433
434 assert_se(prioq_remove(found->prioq, f, &f->newest_boot_id_prioq_idx) > 0);
435 f->newest_boot_id_prioq_idx = PRIOQ_IDX_NULL;
436
437 /* The prioq may be empty, but that should not cause any issue. Let's keep it. */
438 }
439
440 static void journal_clear_newest_by_boot_id(sd_journal *j) {
441 FOREACH_ARRAY(i, j->newest_by_boot_id, j->n_newest_by_boot_id) {
442 JournalFile *f;
443
444 while ((f = prioq_peek(i->prioq)))
445 journal_file_unlink_newest_by_boot_id(j, f);
446
447 prioq_free(i->prioq);
448 }
449
450 j->newest_by_boot_id = mfree(j->newest_by_boot_id);
451 j->n_newest_by_boot_id = 0;
452 }
453
454 static int journal_file_newest_monotonic_compare(const void *a, const void *b) {
455 const JournalFile *x = a, *y = b;
456
457 return -CMP(x->newest_monotonic_usec, y->newest_monotonic_usec); /* Invert order, we want newest first! */
458 }
459
460 static int journal_file_reshuffle_newest_by_boot_id(sd_journal *j, JournalFile *f) {
461 NewestByBootId *found;
462 int r;
463
464 assert(j);
465 assert(f);
466
467 found = typesafe_bsearch(&(NewestByBootId) { .boot_id = f->newest_boot_id },
468 j->newest_by_boot_id, j->n_newest_by_boot_id, newest_by_boot_id_compare);
469 if (found) {
470 /* There's already a priority queue for this boot ID */
471
472 if (f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL) {
473 r = prioq_put(found->prioq, f, &f->newest_boot_id_prioq_idx); /* Insert if we aren't in there yet */
474 if (r < 0)
475 return r;
476 } else
477 prioq_reshuffle(found->prioq, f, &f->newest_boot_id_prioq_idx); /* Reshuffle otherwise */
478
479 } else {
480 _cleanup_(prioq_freep) Prioq *q = NULL;
481
482 /* No priority queue yet, then allocate one */
483
484 assert(f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL); /* we can't be a member either */
485
486 q = prioq_new(journal_file_newest_monotonic_compare);
487 if (!q)
488 return -ENOMEM;
489
490 r = prioq_put(q, f, &f->newest_boot_id_prioq_idx);
491 if (r < 0)
492 return r;
493
494 if (!GREEDY_REALLOC(j->newest_by_boot_id, j->n_newest_by_boot_id + 1)) {
495 f->newest_boot_id_prioq_idx = PRIOQ_IDX_NULL;
496 return -ENOMEM;
497 }
498
499 j->newest_by_boot_id[j->n_newest_by_boot_id++] = (NewestByBootId) {
500 .boot_id = f->newest_boot_id,
501 .prioq = TAKE_PTR(q),
502 };
503
504 typesafe_qsort(j->newest_by_boot_id, j->n_newest_by_boot_id, newest_by_boot_id_compare);
505 }
506
507 return 0;
508 }
509
510 static int journal_file_find_newest_for_boot_id(
511 sd_journal *j,
512 sd_id128_t id,
513 JournalFile **ret) {
514
515 JournalFile *prev = NULL;
516 int r;
517
518 assert(j);
519 assert(ret);
520
521 /* Before we use it, let's refresh the timestamp from the header, and reshuffle our prioq
522 * accordingly. We do this only a bunch of times, to not be caught in some update loop. */
523 for (unsigned n_tries = 0;; n_tries++) {
524 NewestByBootId *found;
525 JournalFile *f;
526
527 found = typesafe_bsearch(&(NewestByBootId) { .boot_id = id },
528 j->newest_by_boot_id, j->n_newest_by_boot_id, newest_by_boot_id_compare);
529
530 f = found ? prioq_peek(found->prioq) : NULL;
531 if (!f)
532 return log_debug_errno(SYNTHETIC_ERRNO(ENODATA),
533 "Requested delta for boot ID %s, but we have no information about that boot ID.", SD_ID128_TO_STRING(id));
534
535 if (f == prev || n_tries >= 5) {
536 /* This was already the best answer in the previous run, or we tried too often, use it */
537 *ret = f;
538 return 0;
539 }
540
541 prev = f;
542
543 /* Let's read the journal file's current timestamp once, before we return it, maybe it has changed. */
544 r = journal_file_read_tail_timestamp(j, f);
545 if (r < 0)
546 return log_debug_errno(r, "Failed to read tail timestamp while trying to find newest journal file for boot ID %s.", SD_ID128_TO_STRING(id));
547 if (r == 0) {
548 /* No new entry found. */
549 *ret = f;
550 return 0;
551 }
552
553 /* Refreshing the timestamp we read might have reshuffled the prioq, hence let's check the
554 * prioq again and only use the information once we reached an equilibrium or hit a limit */
555 }
556 }
557
558 static int compare_boot_ids(sd_journal *j, sd_id128_t a, sd_id128_t b) {
559 JournalFile *x, *y;
560
561 assert(j);
562
563 /* Try to find the newest open journal file for the two boot ids */
564 if (journal_file_find_newest_for_boot_id(j, a, &x) < 0 ||
565 journal_file_find_newest_for_boot_id(j, b, &y) < 0)
566 return 0;
567
568 /* Only compare the boot id timestamps if they originate from the same machine. If they are from
569 * different machines, then we timestamps of the boot ids might be as off as the timestamps on the
570 * entries and hence not useful for comparing. */
571 if (!sd_id128_equal(x->newest_machine_id, y->newest_machine_id))
572 return 0;
573
574 return CMP(x->newest_realtime_usec, y->newest_realtime_usec);
575 }
576
577 static int compare_with_location(
578 sd_journal *j,
579 const JournalFile *f,
580 const Location *l,
581 const JournalFile *current_file) {
582 int r;
583
584 assert(j);
585 assert(f);
586 assert(l);
587 assert(f->location_type == LOCATION_SEEK);
588 assert(IN_SET(l->type, LOCATION_DISCRETE, LOCATION_SEEK));
589
590 if (l->monotonic_set &&
591 sd_id128_equal(f->current_boot_id, l->boot_id) &&
592 l->realtime_set &&
593 f->current_realtime == l->realtime &&
594 l->xor_hash_set &&
595 f->current_xor_hash == l->xor_hash &&
596 l->seqnum_set &&
597 sd_id128_equal(f->header->seqnum_id, l->seqnum_id) &&
598 f->current_seqnum == l->seqnum &&
599 f != current_file)
600 return 0;
601
602 if (l->seqnum_set &&
603 sd_id128_equal(f->header->seqnum_id, l->seqnum_id)) {
604 r = CMP(f->current_seqnum, l->seqnum);
605 if (r != 0)
606 return r;
607 }
608
609 if (l->monotonic_set) {
610 /* If both arguments have the same boot ID, then we can compare the monotonic timestamps. If
611 * they are distinct, then we might able to lookup the timestamps of those boot IDs (if they
612 * are from the same machine) and order by that. */
613 if (sd_id128_equal(f->current_boot_id, l->boot_id))
614 r = CMP(f->current_monotonic, l->monotonic);
615 else
616 r = compare_boot_ids(j, f->current_boot_id, l->boot_id);
617 if (r != 0)
618 return r;
619 }
620
621 if (l->realtime_set) {
622 r = CMP(f->current_realtime, l->realtime);
623 if (r != 0)
624 return r;
625 }
626
627 if (l->xor_hash_set) {
628 r = CMP(f->current_xor_hash, l->xor_hash);
629 if (r != 0)
630 return r;
631 }
632
633 return 0;
634 }
635
636 static int next_for_match(
637 sd_journal *j,
638 Match *m,
639 JournalFile *f,
640 uint64_t after_offset,
641 direction_t direction,
642 Object **ret,
643 uint64_t *offset) {
644
645 int r;
646 uint64_t np = 0;
647
648 assert(j);
649 assert(m);
650 assert(f);
651
652 if (m->type == MATCH_DISCRETE) {
653 Object *d;
654 uint64_t hash;
655
656 /* If the keyed hash logic is used, we need to calculate the hash fresh per file. Otherwise
657 * we can use what we pre-calculated. */
658 if (JOURNAL_HEADER_KEYED_HASH(f->header))
659 hash = journal_file_hash_data(f, m->data, m->size);
660 else
661 hash = m->hash;
662
663 r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, &d, NULL);
664 if (r <= 0)
665 return r;
666
667 return journal_file_move_to_entry_by_offset_for_data(f, d, after_offset, direction, ret, offset);
668
669 } else if (m->type == MATCH_OR_TERM) {
670
671 /* Find the earliest match beyond after_offset */
672
673 LIST_FOREACH(matches, i, m->matches) {
674 uint64_t cp;
675
676 r = next_for_match(j, i, f, after_offset, direction, NULL, &cp);
677 if (r < 0)
678 return r;
679 else if (r > 0) {
680 if (np == 0 || (direction == DIRECTION_DOWN ? cp < np : cp > np))
681 np = cp;
682 }
683 }
684
685 if (np == 0)
686 return 0;
687
688 } else if (m->type == MATCH_AND_TERM) {
689 Match *last_moved;
690
691 /* Always jump to the next matching entry and repeat
692 * this until we find an offset that matches for all
693 * matches. */
694
695 if (!m->matches)
696 return 0;
697
698 r = next_for_match(j, m->matches, f, after_offset, direction, NULL, &np);
699 if (r <= 0)
700 return r;
701
702 assert(direction == DIRECTION_DOWN ? np >= after_offset : np <= after_offset);
703 last_moved = m->matches;
704
705 LIST_LOOP_BUT_ONE(matches, i, m->matches, last_moved) {
706 uint64_t cp;
707
708 r = next_for_match(j, i, f, np, direction, NULL, &cp);
709 if (r <= 0)
710 return r;
711
712 assert(direction == DIRECTION_DOWN ? cp >= np : cp <= np);
713 if (direction == DIRECTION_DOWN ? cp > np : cp < np) {
714 np = cp;
715 last_moved = i;
716 }
717 }
718 }
719
720 assert(np > 0);
721
722 if (ret) {
723 r = journal_file_move_to_object(f, OBJECT_ENTRY, np, ret);
724 if (r < 0)
725 return r;
726 }
727
728 if (offset)
729 *offset = np;
730
731 return 1;
732 }
733
734 static int find_location_for_match(
735 sd_journal *j,
736 Match *m,
737 JournalFile *f,
738 direction_t direction,
739 Object **ret,
740 uint64_t *offset) {
741
742 int r;
743
744 assert(j);
745 assert(m);
746 assert(f);
747
748 if (m->type == MATCH_DISCRETE) {
749 Object *d;
750 uint64_t dp, hash;
751
752 if (JOURNAL_HEADER_KEYED_HASH(f->header))
753 hash = journal_file_hash_data(f, m->data, m->size);
754 else
755 hash = m->hash;
756
757 r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, &d, &dp);
758 if (r <= 0)
759 return r;
760
761 /* FIXME: missing: find by monotonic */
762
763 if (j->current_location.type == LOCATION_HEAD)
764 return direction == DIRECTION_DOWN ? journal_file_move_to_entry_for_data(f, d, DIRECTION_DOWN, ret, offset) : 0;
765 if (j->current_location.type == LOCATION_TAIL)
766 return direction == DIRECTION_UP ? journal_file_move_to_entry_for_data(f, d, DIRECTION_UP, ret, offset) : 0;
767 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
768 return journal_file_move_to_entry_by_seqnum_for_data(f, d, j->current_location.seqnum, direction, ret, offset);
769 if (j->current_location.monotonic_set) {
770 r = journal_file_move_to_entry_by_monotonic_for_data(f, d, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
771 if (r != 0)
772 return r;
773
774 /* The data object might have been invalidated. */
775 r = journal_file_move_to_object(f, OBJECT_DATA, dp, &d);
776 if (r < 0)
777 return r;
778 }
779 if (j->current_location.realtime_set)
780 return journal_file_move_to_entry_by_realtime_for_data(f, d, j->current_location.realtime, direction, ret, offset);
781
782 return journal_file_move_to_entry_for_data(f, d, direction, ret, offset);
783
784 } else if (m->type == MATCH_OR_TERM) {
785 uint64_t np = 0;
786
787 /* Find the earliest match */
788
789 LIST_FOREACH(matches, i, m->matches) {
790 uint64_t cp;
791
792 r = find_location_for_match(j, i, f, direction, NULL, &cp);
793 if (r < 0)
794 return r;
795 else if (r > 0) {
796 if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
797 np = cp;
798 }
799 }
800
801 if (np == 0)
802 return 0;
803
804 if (ret) {
805 r = journal_file_move_to_object(f, OBJECT_ENTRY, np, ret);
806 if (r < 0)
807 return r;
808 }
809
810 if (offset)
811 *offset = np;
812
813 return 1;
814
815 } else {
816 uint64_t np = 0;
817
818 assert(m->type == MATCH_AND_TERM);
819
820 /* First jump to the last match, and then find the
821 * next one where all matches match */
822
823 if (!m->matches)
824 return 0;
825
826 LIST_FOREACH(matches, i, m->matches) {
827 uint64_t cp;
828
829 r = find_location_for_match(j, i, f, direction, NULL, &cp);
830 if (r <= 0)
831 return r;
832
833 if (np == 0 || (direction == DIRECTION_DOWN ? cp > np : cp < np))
834 np = cp;
835 }
836
837 return next_for_match(j, m, f, np, direction, ret, offset);
838 }
839 }
840
841 static int find_location_with_matches(
842 sd_journal *j,
843 JournalFile *f,
844 direction_t direction,
845 Object **ret,
846 uint64_t *offset) {
847
848 int r;
849
850 assert(j);
851 assert(f);
852 assert(ret);
853 assert(offset);
854
855 if (!j->level0) {
856 /* No matches is simple */
857
858 if (j->current_location.type == LOCATION_HEAD)
859 return direction == DIRECTION_DOWN ? journal_file_next_entry(f, 0, DIRECTION_DOWN, ret, offset) : 0;
860 if (j->current_location.type == LOCATION_TAIL)
861 return direction == DIRECTION_UP ? journal_file_next_entry(f, 0, DIRECTION_UP, ret, offset) : 0;
862 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
863 return journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, ret, offset);
864 if (j->current_location.monotonic_set) {
865 r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, ret, offset);
866 if (r != 0)
867 return r;
868 }
869 if (j->current_location.realtime_set)
870 return journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, ret, offset);
871
872 return journal_file_next_entry(f, 0, direction, ret, offset);
873 } else
874 return find_location_for_match(j, j->level0, f, direction, ret, offset);
875 }
876
877 static int next_with_matches(
878 sd_journal *j,
879 JournalFile *f,
880 direction_t direction,
881 Object **ret,
882 uint64_t *offset) {
883
884 assert(j);
885 assert(f);
886 assert(ret);
887 assert(offset);
888
889 /* No matches is easy. We simple advance the file
890 * pointer by one. */
891 if (!j->level0)
892 return journal_file_next_entry(f, f->current_offset, direction, ret, offset);
893
894 /* If we have a match then we look for the next matching entry
895 * with an offset at least one step larger */
896 return next_for_match(j, j->level0, f,
897 direction == DIRECTION_DOWN ? f->current_offset + 1
898 : f->current_offset - 1,
899 direction, ret, offset);
900 }
901
902 static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction) {
903 Object *c;
904 uint64_t cp, n_entries;
905 int r;
906
907 assert(j);
908 assert(f);
909
910 (void) journal_file_read_tail_timestamp(j, f);
911
912 n_entries = le64toh(f->header->n_entries);
913
914 /* If we hit EOF before, we don't need to look into this file again
915 * unless direction changed or new entries appeared. */
916 if (f->last_direction == direction &&
917 f->location_type == (direction == DIRECTION_DOWN ? LOCATION_TAIL : LOCATION_HEAD) &&
918 n_entries == f->last_n_entries)
919 return 0;
920
921 f->last_n_entries = n_entries;
922
923 if (f->last_direction == direction && f->current_offset > 0) {
924 /* LOCATION_SEEK here means we did the work in a previous
925 * iteration and the current location already points to a
926 * candidate entry. */
927 if (f->location_type != LOCATION_SEEK) {
928 r = next_with_matches(j, f, direction, &c, &cp);
929 if (r <= 0)
930 return r;
931
932 journal_file_save_location(f, c, cp);
933 }
934 } else {
935 f->last_direction = direction;
936
937 r = find_location_with_matches(j, f, direction, &c, &cp);
938 if (r <= 0)
939 return r;
940
941 journal_file_save_location(f, c, cp);
942 }
943
944 /* OK, we found the spot, now let's advance until an entry
945 * that is actually different from what we were previously
946 * looking at. This is necessary to handle entries which exist
947 * in two (or more) journal files, and which shall all be
948 * suppressed but one. */
949
950 for (;;) {
951 bool found;
952
953 if (j->current_location.type == LOCATION_DISCRETE) {
954 int k;
955
956 k = compare_with_location(j, f, &j->current_location, j->current_file);
957
958 found = direction == DIRECTION_DOWN ? k > 0 : k < 0;
959 } else
960 found = true;
961
962 if (found)
963 return 1;
964
965 r = next_with_matches(j, f, direction, &c, &cp);
966 if (r <= 0)
967 return r;
968
969 journal_file_save_location(f, c, cp);
970 }
971 }
972
973 static int compare_locations(sd_journal *j, JournalFile *af, JournalFile *bf) {
974 int r;
975
976 assert(j);
977 assert(af);
978 assert(af->header);
979 assert(bf);
980 assert(bf->header);
981 assert(af->location_type == LOCATION_SEEK);
982 assert(bf->location_type == LOCATION_SEEK);
983
984 /* If contents, timestamps and seqnum match, these entries are identical. */
985 if (sd_id128_equal(af->current_boot_id, bf->current_boot_id) &&
986 af->current_monotonic == bf->current_monotonic &&
987 af->current_realtime == bf->current_realtime &&
988 af->current_xor_hash == bf->current_xor_hash &&
989 sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id) &&
990 af->current_seqnum == bf->current_seqnum)
991 return 0;
992
993 if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) {
994 /* If this is from the same seqnum source, compare seqnums */
995 r = CMP(af->current_seqnum, bf->current_seqnum);
996 if (r != 0)
997 return r;
998
999 /* Wow! This is weird, different data but the same seqnums? Something is borked, but let's
1000 * make the best of it and compare by time. */
1001 }
1002
1003 if (sd_id128_equal(af->current_boot_id, bf->current_boot_id))
1004 /* If the boot id matches, compare monotonic time */
1005 r = CMP(af->current_monotonic, bf->current_monotonic);
1006 else
1007 /* If they don't match try to compare boot IDs */
1008 r = compare_boot_ids(j, af->current_boot_id, bf->current_boot_id);
1009 if (r != 0)
1010 return r;
1011
1012 /* Otherwise, compare UTC time */
1013 r = CMP(af->current_realtime, bf->current_realtime);
1014 if (r != 0)
1015 return r;
1016
1017 /* Finally, compare by contents */
1018 return CMP(af->current_xor_hash, bf->current_xor_hash);
1019 }
1020
1021 static int real_journal_next(sd_journal *j, direction_t direction) {
1022 JournalFile *new_file = NULL;
1023 unsigned n_files;
1024 const void **files;
1025 Object *o;
1026 int r;
1027
1028 assert_return(j, -EINVAL);
1029 assert_return(!journal_origin_changed(j), -ECHILD);
1030
1031 r = iterated_cache_get(j->files_cache, NULL, &files, &n_files);
1032 if (r < 0)
1033 return r;
1034
1035 FOREACH_ARRAY(_f, files, n_files) {
1036 JournalFile *f = (JournalFile*) *_f;
1037 bool found;
1038
1039 r = next_beyond_location(j, f, direction);
1040 if (r < 0) {
1041 log_debug_errno(r, "Can't iterate through %s, ignoring: %m", f->path);
1042 remove_file_real(j, f);
1043 continue;
1044 } else if (r == 0) {
1045 f->location_type = direction == DIRECTION_DOWN ? LOCATION_TAIL : LOCATION_HEAD;
1046 continue;
1047 }
1048
1049 if (!new_file)
1050 found = true;
1051 else {
1052 int k;
1053
1054 k = compare_locations(j, f, new_file);
1055
1056 found = direction == DIRECTION_DOWN ? k < 0 : k > 0;
1057 }
1058
1059 if (found)
1060 new_file = f;
1061 }
1062
1063 if (!new_file)
1064 return 0;
1065
1066 r = journal_file_move_to_object(new_file, OBJECT_ENTRY, new_file->current_offset, &o);
1067 if (r < 0)
1068 return r;
1069
1070 set_location(j, new_file, o);
1071
1072 return 1;
1073 }
1074
1075 _public_ int sd_journal_next(sd_journal *j) {
1076 return real_journal_next(j, DIRECTION_DOWN);
1077 }
1078
1079 _public_ int sd_journal_previous(sd_journal *j) {
1080 return real_journal_next(j, DIRECTION_UP);
1081 }
1082
1083 _public_ int sd_journal_step_one(sd_journal *j, int advanced) {
1084 assert_return(j, -EINVAL);
1085
1086 if (j->current_location.type == LOCATION_HEAD)
1087 return sd_journal_next(j);
1088 if (j->current_location.type == LOCATION_TAIL)
1089 return sd_journal_previous(j);
1090 return real_journal_next(j, advanced ? DIRECTION_DOWN : DIRECTION_UP);
1091 }
1092
1093 static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) {
1094 int c = 0, r;
1095
1096 assert_return(j, -EINVAL);
1097 assert_return(!journal_origin_changed(j), -ECHILD);
1098 assert_return(skip <= INT_MAX, -ERANGE);
1099
1100 if (skip == 0) {
1101 /* If this is not a discrete skip, then at least
1102 * resolve the current location */
1103 if (j->current_location.type != LOCATION_DISCRETE) {
1104 r = real_journal_next(j, direction);
1105 if (r < 0)
1106 return r;
1107 }
1108
1109 return 0;
1110 }
1111
1112 do {
1113 r = real_journal_next(j, direction);
1114 if (r < 0)
1115 return r;
1116
1117 if (r == 0)
1118 return c;
1119
1120 skip--;
1121 c++;
1122 } while (skip > 0);
1123
1124 return c;
1125 }
1126
1127 _public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) {
1128 return real_journal_next_skip(j, DIRECTION_DOWN, skip);
1129 }
1130
1131 _public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) {
1132 return real_journal_next_skip(j, DIRECTION_UP, skip);
1133 }
1134
1135 _public_ int sd_journal_get_cursor(sd_journal *j, char **cursor) {
1136 Object *o;
1137 int r;
1138
1139 assert_return(j, -EINVAL);
1140 assert_return(!journal_origin_changed(j), -ECHILD);
1141 assert_return(cursor, -EINVAL);
1142
1143 if (!j->current_file || j->current_file->current_offset <= 0)
1144 return -EADDRNOTAVAIL;
1145
1146 r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
1147 if (r < 0)
1148 return r;
1149
1150 if (asprintf(cursor,
1151 "s=%s;i=%"PRIx64";b=%s;m=%"PRIx64";t=%"PRIx64";x=%"PRIx64,
1152 SD_ID128_TO_STRING(j->current_file->header->seqnum_id), le64toh(o->entry.seqnum),
1153 SD_ID128_TO_STRING(o->entry.boot_id), le64toh(o->entry.monotonic),
1154 le64toh(o->entry.realtime),
1155 le64toh(o->entry.xor_hash)) < 0)
1156 return -ENOMEM;
1157
1158 return 0;
1159 }
1160
1161 _public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
1162 unsigned long long seqnum, monotonic, realtime, xor_hash;
1163 bool seqnum_id_set = false,
1164 seqnum_set = false,
1165 boot_id_set = false,
1166 monotonic_set = false,
1167 realtime_set = false,
1168 xor_hash_set = false;
1169 sd_id128_t seqnum_id, boot_id;
1170 int r;
1171
1172 assert_return(j, -EINVAL);
1173 assert_return(!journal_origin_changed(j), -ECHILD);
1174 assert_return(!isempty(cursor), -EINVAL);
1175
1176 for (const char *p = cursor;;) {
1177 _cleanup_free_ char *word = NULL;
1178
1179 r = extract_first_word(&p, &word, ";", EXTRACT_DONT_COALESCE_SEPARATORS);
1180 if (r < 0)
1181 return r;
1182 if (r == 0)
1183 break;
1184
1185 if (word[0] == '\0' || word[1] != '=')
1186 return -EINVAL;
1187
1188 switch (word[0]) {
1189 case 's':
1190 seqnum_id_set = true;
1191 r = sd_id128_from_string(word + 2, &seqnum_id);
1192 if (r < 0)
1193 return r;
1194 break;
1195
1196 case 'i':
1197 seqnum_set = true;
1198 if (sscanf(word + 2, "%llx", &seqnum) != 1)
1199 return -EINVAL;
1200 break;
1201
1202 case 'b':
1203 boot_id_set = true;
1204 r = sd_id128_from_string(word + 2, &boot_id);
1205 if (r < 0)
1206 return r;
1207 break;
1208
1209 case 'm':
1210 monotonic_set = true;
1211 if (sscanf(word + 2, "%llx", &monotonic) != 1)
1212 return -EINVAL;
1213 break;
1214
1215 case 't':
1216 realtime_set = true;
1217 if (sscanf(word + 2, "%llx", &realtime) != 1)
1218 return -EINVAL;
1219 break;
1220
1221 case 'x':
1222 xor_hash_set = true;
1223 if (sscanf(word + 2, "%llx", &xor_hash) != 1)
1224 return -EINVAL;
1225 break;
1226 }
1227 }
1228
1229 if ((!seqnum_set || !seqnum_id_set) &&
1230 (!monotonic_set || !boot_id_set) &&
1231 !realtime_set)
1232 return -EINVAL;
1233
1234 detach_location(j);
1235 j->current_location = (Location) {
1236 .type = LOCATION_SEEK,
1237 };
1238
1239 if (realtime_set) {
1240 j->current_location.realtime = (uint64_t) realtime;
1241 j->current_location.realtime_set = true;
1242 }
1243
1244 if (seqnum_set && seqnum_id_set) {
1245 j->current_location.seqnum = (uint64_t) seqnum;
1246 j->current_location.seqnum_id = seqnum_id;
1247 j->current_location.seqnum_set = true;
1248 }
1249
1250 if (monotonic_set && boot_id_set) {
1251 j->current_location.monotonic = (uint64_t) monotonic;
1252 j->current_location.boot_id = boot_id;
1253 j->current_location.monotonic_set = true;
1254 }
1255
1256 if (xor_hash_set) {
1257 j->current_location.xor_hash = (uint64_t) xor_hash;
1258 j->current_location.xor_hash_set = true;
1259 }
1260
1261 return 0;
1262 }
1263
1264 _public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) {
1265 int r;
1266 Object *o;
1267
1268 assert_return(j, -EINVAL);
1269 assert_return(!journal_origin_changed(j), -ECHILD);
1270 assert_return(!isempty(cursor), -EINVAL);
1271
1272 if (!j->current_file || j->current_file->current_offset <= 0)
1273 return -EADDRNOTAVAIL;
1274
1275 r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
1276 if (r < 0)
1277 return r;
1278
1279 for (;;) {
1280 _cleanup_free_ char *item = NULL;
1281 unsigned long long ll;
1282 sd_id128_t id;
1283 int k = 0;
1284
1285 r = extract_first_word(&cursor, &item, ";", EXTRACT_DONT_COALESCE_SEPARATORS);
1286 if (r < 0)
1287 return r;
1288
1289 if (r == 0)
1290 break;
1291
1292 if (strlen(item) < 2 || item[1] != '=')
1293 return -EINVAL;
1294
1295 switch (item[0]) {
1296
1297 case 's':
1298 k = sd_id128_from_string(item+2, &id);
1299 if (k < 0)
1300 return k;
1301 if (!sd_id128_equal(id, j->current_file->header->seqnum_id))
1302 return 0;
1303 break;
1304
1305 case 'i':
1306 if (sscanf(item+2, "%llx", &ll) != 1)
1307 return -EINVAL;
1308 if (ll != le64toh(o->entry.seqnum))
1309 return 0;
1310 break;
1311
1312 case 'b':
1313 k = sd_id128_from_string(item+2, &id);
1314 if (k < 0)
1315 return k;
1316 if (!sd_id128_equal(id, o->entry.boot_id))
1317 return 0;
1318 break;
1319
1320 case 'm':
1321 if (sscanf(item+2, "%llx", &ll) != 1)
1322 return -EINVAL;
1323 if (ll != le64toh(o->entry.monotonic))
1324 return 0;
1325 break;
1326
1327 case 't':
1328 if (sscanf(item+2, "%llx", &ll) != 1)
1329 return -EINVAL;
1330 if (ll != le64toh(o->entry.realtime))
1331 return 0;
1332 break;
1333
1334 case 'x':
1335 if (sscanf(item+2, "%llx", &ll) != 1)
1336 return -EINVAL;
1337 if (ll != le64toh(o->entry.xor_hash))
1338 return 0;
1339 break;
1340 }
1341 }
1342
1343 return 1;
1344 }
1345
1346 _public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) {
1347 assert_return(j, -EINVAL);
1348 assert_return(!journal_origin_changed(j), -ECHILD);
1349
1350 detach_location(j);
1351
1352 j->current_location = (Location) {
1353 .type = LOCATION_SEEK,
1354 .boot_id = boot_id,
1355 .monotonic = usec,
1356 .monotonic_set = true,
1357 };
1358
1359 return 0;
1360 }
1361
1362 _public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
1363 assert_return(j, -EINVAL);
1364 assert_return(!journal_origin_changed(j), -ECHILD);
1365
1366 detach_location(j);
1367
1368 j->current_location = (Location) {
1369 .type = LOCATION_SEEK,
1370 .realtime = usec,
1371 .realtime_set = true,
1372 };
1373
1374 return 0;
1375 }
1376
1377 _public_ int sd_journal_seek_head(sd_journal *j) {
1378 assert_return(j, -EINVAL);
1379 assert_return(!journal_origin_changed(j), -ECHILD);
1380
1381 detach_location(j);
1382
1383 j->current_location = (Location) {
1384 .type = LOCATION_HEAD,
1385 };
1386
1387 return 0;
1388 }
1389
1390 _public_ int sd_journal_seek_tail(sd_journal *j) {
1391 assert_return(j, -EINVAL);
1392 assert_return(!journal_origin_changed(j), -ECHILD);
1393
1394 detach_location(j);
1395
1396 j->current_location = (Location) {
1397 .type = LOCATION_TAIL,
1398 };
1399
1400 return 0;
1401 }
1402
1403 static void check_network(sd_journal *j, int fd) {
1404 assert(j);
1405
1406 if (j->on_network)
1407 return;
1408
1409 j->on_network = fd_is_network_fs(fd);
1410 }
1411
1412 static bool file_has_type_prefix(const char *prefix, const char *filename) {
1413 const char *full, *tilded, *atted;
1414
1415 full = strjoina(prefix, ".journal");
1416 tilded = strjoina(full, "~");
1417 atted = strjoina(prefix, "@");
1418
1419 return STR_IN_SET(filename, full, tilded) ||
1420 startswith(filename, atted);
1421 }
1422
1423 static bool file_type_wanted(int flags, const char *filename) {
1424 assert(filename);
1425
1426 if (!ENDSWITH_SET(filename, ".journal", ".journal~"))
1427 return false;
1428
1429 /* no flags set → every type is OK */
1430 if (!(flags & (SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)))
1431 return true;
1432
1433 if (FLAGS_SET(flags, SD_JOURNAL_CURRENT_USER)) {
1434 char prefix[5 + DECIMAL_STR_MAX(uid_t) + 1];
1435
1436 xsprintf(prefix, "user-" UID_FMT, getuid());
1437
1438 if (file_has_type_prefix(prefix, filename))
1439 return true;
1440
1441 /* If SD_JOURNAL_CURRENT_USER is specified and we are invoked under a system UID, then
1442 * automatically enable SD_JOURNAL_SYSTEM too, because journald will actually put system user
1443 * data into the system journal. */
1444
1445 if (uid_for_system_journal(getuid()))
1446 flags |= SD_JOURNAL_SYSTEM;
1447 }
1448
1449 if (FLAGS_SET(flags, SD_JOURNAL_SYSTEM) && file_has_type_prefix("system", filename))
1450 return true;
1451
1452 return false;
1453 }
1454
1455 static bool path_has_prefix(sd_journal *j, const char *path, const char *prefix) {
1456 assert(j);
1457 assert(path);
1458 assert(prefix);
1459
1460 if (j->toplevel_fd >= 0)
1461 return false;
1462
1463 return path_startswith(path, prefix);
1464 }
1465
1466 static void track_file_disposition(sd_journal *j, JournalFile *f) {
1467 assert(j);
1468 assert(f);
1469
1470 if (!j->has_runtime_files && path_has_prefix(j, f->path, "/run"))
1471 j->has_runtime_files = true;
1472 else if (!j->has_persistent_files && path_has_prefix(j, f->path, "/var"))
1473 j->has_persistent_files = true;
1474 }
1475
1476 static int add_any_file(
1477 sd_journal *j,
1478 int fd,
1479 const char *path) {
1480
1481 _cleanup_close_ int our_fd = -EBADF;
1482 JournalFile *f;
1483 struct stat st;
1484 int r;
1485
1486 assert(j);
1487 assert(fd >= 0 || path);
1488
1489 if (fd < 0) {
1490 assert(path); /* For gcc. */
1491 if (j->toplevel_fd >= 0)
1492 /* If there's a top-level fd defined make the path relative, explicitly, since otherwise
1493 * openat() ignores the first argument. */
1494
1495 fd = our_fd = openat(j->toplevel_fd, skip_leading_slash(path), O_RDONLY|O_CLOEXEC|O_NONBLOCK);
1496 else
1497 fd = our_fd = open(path, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
1498 if (fd < 0) {
1499 r = log_debug_errno(errno, "Failed to open journal file %s: %m", path);
1500 goto error;
1501 }
1502
1503 r = fd_nonblock(fd, false);
1504 if (r < 0) {
1505 r = log_debug_errno(errno, "Failed to turn off O_NONBLOCK for %s: %m", path);
1506 goto error;
1507 }
1508 }
1509
1510 if (fstat(fd, &st) < 0) {
1511 r = log_debug_errno(errno, "Failed to fstat %s: %m", path ?: "fd");
1512 goto error;
1513 }
1514
1515 r = stat_verify_regular(&st);
1516 if (r < 0) {
1517 log_debug_errno(r, "Refusing to open %s: %m", path ?: "fd");
1518 goto error;
1519 }
1520
1521 if (path) {
1522 f = ordered_hashmap_get(j->files, path);
1523 if (f) {
1524 if (stat_inode_same(&f->last_stat, &st)) {
1525 /* We already track this file, under the same path and with the same
1526 * device/inode numbers, it's hence really the same. Mark this file as seen
1527 * in this generation. This is used to GC old files in process_q_overflow()
1528 * to detect journal files that are still there and discern them from those
1529 * which are gone. */
1530
1531 f->last_seen_generation = j->generation;
1532 (void) journal_file_read_tail_timestamp(j, f);
1533 return 0;
1534 }
1535
1536 /* So we tracked a file under this name, but it has a different inode/device. In that
1537 * case, it got replaced (probably due to rotation?), let's drop it hence from our
1538 * list. */
1539 remove_file_real(j, f);
1540 f = NULL;
1541 }
1542 }
1543
1544 if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
1545 r = log_debug_errno(SYNTHETIC_ERRNO(ETOOMANYREFS),
1546 "Too many open journal files, not adding %s.", path ?: "fd");
1547 goto error;
1548 }
1549
1550 r = journal_file_open(fd, path, O_RDONLY, 0, 0, 0, NULL, j->mmap, NULL, &f);
1551 if (r < 0) {
1552 log_debug_errno(r, "Failed to open journal file %s: %m", path ?: "from fd");
1553 goto error;
1554 }
1555
1556 /* journal_file_dump(f); */
1557
1558 /* journal_file_open() generates an replacement fname if necessary, so we can use f->path. */
1559 r = ordered_hashmap_put(j->files, f->path, f);
1560 if (r < 0) {
1561 f->close_fd = false; /* Make sure journal_file_close() doesn't close the caller's fd
1562 * (or our own). The caller or we will do that ourselves. */
1563 (void) journal_file_close(f);
1564 goto error;
1565 }
1566
1567 TAKE_FD(our_fd); /* the fd is now owned by the JournalFile object */
1568
1569 f->last_seen_generation = j->generation;
1570
1571 track_file_disposition(j, f);
1572 check_network(j, f->fd);
1573 (void) journal_file_read_tail_timestamp(j, f);
1574
1575 j->current_invalidate_counter++;
1576
1577 log_debug("File %s added.", f->path);
1578
1579 return 0;
1580
1581 error:
1582 (void) journal_put_error(j, r, path); /* path==NULL is OK. */
1583 return r;
1584 }
1585
1586 int journal_get_directories(sd_journal *j, char ***ret) {
1587 _cleanup_strv_free_ char **paths = NULL;
1588 JournalFile *f;
1589 const char *p;
1590 size_t n = SIZE_MAX;
1591 int r;
1592
1593 assert(j);
1594 assert(ret);
1595
1596 /* This returns parent directories of opened journal files. */
1597
1598 ORDERED_HASHMAP_FOREACH_KEY(f, p, j->files) {
1599 _cleanup_free_ char *d = NULL;
1600
1601 /* Ignore paths generated from fd. */
1602 if (path_startswith(p, "/proc/"))
1603 continue;
1604
1605 r = path_extract_directory(p, &d);
1606 if (r < 0)
1607 return r;
1608
1609 if (path_strv_contains(paths, d))
1610 continue;
1611
1612 r = strv_extend_with_size(&paths, &n, d);
1613 if (r < 0)
1614 return r;
1615 }
1616
1617 *ret = TAKE_PTR(paths);
1618 return 0;
1619 }
1620
1621 static int add_file_by_name(
1622 sd_journal *j,
1623 const char *prefix,
1624 const char *filename) {
1625
1626 _cleanup_free_ char *path = NULL;
1627
1628 assert(j);
1629 assert(prefix);
1630 assert(filename);
1631
1632 if (j->no_new_files)
1633 return 0;
1634
1635 if (!file_type_wanted(j->flags, filename))
1636 return 0;
1637
1638 path = path_join(prefix, filename);
1639 if (!path)
1640 return -ENOMEM;
1641
1642 return add_any_file(j, -1, path);
1643 }
1644
1645 static int remove_file_by_name(
1646 sd_journal *j,
1647 const char *prefix,
1648 const char *filename) {
1649
1650 _cleanup_free_ char *path = NULL;
1651 JournalFile *f;
1652
1653 assert(j);
1654 assert(prefix);
1655 assert(filename);
1656
1657 path = path_join(prefix, filename);
1658 if (!path)
1659 return -ENOMEM;
1660
1661 f = ordered_hashmap_get(j->files, path);
1662 if (!f)
1663 return 0;
1664
1665 remove_file_real(j, f);
1666 return 1;
1667 }
1668
1669 static void remove_file_real(sd_journal *j, JournalFile *f) {
1670 assert(j);
1671 assert(f);
1672
1673 (void) ordered_hashmap_remove(j->files, f->path);
1674
1675 log_debug("File %s removed.", f->path);
1676
1677 if (j->current_file == f) {
1678 j->current_file = NULL;
1679 j->current_field = 0;
1680 }
1681
1682 if (j->unique_file == f) {
1683 /* Jump to the next unique_file or NULL if that one was last */
1684 j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
1685 j->unique_offset = 0;
1686 if (!j->unique_file)
1687 j->unique_file_lost = true;
1688 }
1689
1690 if (j->fields_file == f) {
1691 j->fields_file = ordered_hashmap_next(j->files, j->fields_file->path);
1692 j->fields_offset = 0;
1693 if (!j->fields_file)
1694 j->fields_file_lost = true;
1695 }
1696
1697 journal_file_unlink_newest_by_boot_id(j, f);
1698 (void) journal_file_close(f);
1699
1700 j->current_invalidate_counter++;
1701 }
1702
1703 static int dirname_is_machine_id(const char *fn) {
1704 sd_id128_t id, machine;
1705 const char *e;
1706 int r;
1707
1708 /* Returns true if the specified directory name matches the local machine ID */
1709
1710 r = sd_id128_get_machine(&machine);
1711 if (r < 0)
1712 return r;
1713
1714 e = strchr(fn, '.');
1715 if (e) {
1716 const char *k;
1717
1718 /* Looks like it has a namespace suffix. Verify that. */
1719 if (!log_namespace_name_valid(e + 1))
1720 return false;
1721
1722 k = strndupa_safe(fn, e - fn);
1723 r = sd_id128_from_string(k, &id);
1724 } else
1725 r = sd_id128_from_string(fn, &id);
1726 if (r < 0)
1727 return r;
1728
1729 return sd_id128_equal(id, machine);
1730 }
1731
1732 static int dirname_has_namespace(const char *fn, const char *namespace) {
1733 const char *e;
1734
1735 /* Returns true if the specified directory name matches the specified namespace */
1736
1737 e = strchr(fn, '.');
1738 if (e) {
1739 const char *k;
1740
1741 if (!namespace)
1742 return false;
1743
1744 if (!streq(e + 1, namespace))
1745 return false;
1746
1747 k = strndupa_safe(fn, e - fn);
1748 return id128_is_valid(k);
1749 }
1750
1751 if (namespace)
1752 return false;
1753
1754 return id128_is_valid(fn);
1755 }
1756
1757 static bool dirent_is_journal_file(const struct dirent *de) {
1758 assert(de);
1759
1760 /* Returns true if the specified directory entry looks like a journal file we might be interested in */
1761
1762 if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
1763 return false;
1764
1765 return endswith(de->d_name, ".journal") ||
1766 endswith(de->d_name, ".journal~");
1767 }
1768
1769 static bool dirent_is_journal_subdir(const struct dirent *de) {
1770 const char *e, *n;
1771 assert(de);
1772
1773 /* returns true if the specified directory entry looks like a directory that might contain journal
1774 * files we might be interested in, i.e. is either a 128-bit ID or a 128-bit ID suffixed by a
1775 * namespace. */
1776
1777 if (!IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN))
1778 return false;
1779
1780 e = strchr(de->d_name, '.');
1781 if (!e)
1782 return id128_is_valid(de->d_name); /* No namespace */
1783
1784 n = strndupa_safe(de->d_name, e - de->d_name);
1785 if (!id128_is_valid(n))
1786 return false;
1787
1788 return log_namespace_name_valid(e + 1);
1789 }
1790
1791 static int directory_open(sd_journal *j, const char *path, DIR **ret) {
1792 DIR *d;
1793
1794 assert(j);
1795 assert(path);
1796 assert(ret);
1797
1798 if (j->toplevel_fd < 0)
1799 d = opendir(path);
1800 else
1801 /* Open the specified directory relative to the toplevel fd. Enforce that the path specified is
1802 * relative, by dropping the initial slash */
1803 d = xopendirat(j->toplevel_fd, skip_leading_slash(path), 0);
1804 if (!d)
1805 return -errno;
1806
1807 *ret = d;
1808 return 0;
1809 }
1810
1811 static Directory* directory_free(Directory *d) {
1812 if (!d)
1813 return NULL;
1814
1815 if (d->journal) {
1816 if (d->wd > 0 &&
1817 hashmap_remove_value(d->journal->directories_by_wd, INT_TO_PTR(d->wd), d) &&
1818 d->journal->inotify_fd >= 0)
1819 (void) inotify_rm_watch(d->journal->inotify_fd, d->wd);
1820
1821 if (d->path)
1822 hashmap_remove_value(d->journal->directories_by_path, d->path, d);
1823 }
1824
1825 if (d->path) {
1826 if (d->is_root)
1827 log_debug("Root directory %s removed.", d->path);
1828 else
1829 log_debug("Directory %s removed.", d->path);
1830
1831 free(d->path);
1832 }
1833
1834 return mfree(d);
1835 }
1836
1837 DEFINE_TRIVIAL_CLEANUP_FUNC(Directory*, directory_free);
1838
1839 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
1840 directories_by_path_hash_ops,
1841 char,
1842 path_hash_func,
1843 path_compare,
1844 Directory,
1845 directory_free);
1846
1847 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
1848 directories_by_wd_hash_ops,
1849 void,
1850 trivial_hash_func,
1851 trivial_compare_func,
1852 Directory,
1853 directory_free);
1854
1855 static int add_directory_impl(sd_journal *j, const char *path, bool is_root, Directory **ret) {
1856 _cleanup_(directory_freep) Directory *m = NULL;
1857 Directory *existing;
1858 int r;
1859
1860 assert(j);
1861 assert(path);
1862 assert(ret);
1863
1864 existing = hashmap_get(j->directories_by_path, path);
1865 if (existing) {
1866 if (existing->is_root != is_root) {
1867 /* Don't 'downgrade' from root directory */
1868 *ret = NULL;
1869 return 0;
1870 }
1871
1872 *ret = existing;
1873 return 1;
1874 }
1875
1876 m = new(Directory, 1);
1877 if (!m)
1878 return -ENOMEM;
1879
1880 *m = (Directory) {
1881 .journal = j,
1882 .is_root = is_root,
1883 .path = strdup(path),
1884 .wd = -1,
1885 };
1886
1887 if (!m->path)
1888 return -ENOMEM;
1889
1890 r = hashmap_ensure_put(&j->directories_by_path, &directories_by_path_hash_ops, m->path, m);
1891 if (r < 0)
1892 return r;
1893
1894 j->current_invalidate_counter++;
1895
1896 if (is_root)
1897 log_debug("Root directory %s added.", m->path);
1898 else
1899 log_debug("Directory %s added.", m->path);
1900
1901 *ret = TAKE_PTR(m);
1902 return 1;
1903 }
1904
1905 static int add_directory(sd_journal *j, const char *prefix, const char *dirname);
1906
1907 static void directory_enumerate(sd_journal *j, Directory *m, DIR *d) {
1908 assert(j);
1909 assert(m);
1910 assert(d);
1911
1912 FOREACH_DIRENT_ALL(de, d, goto fail) {
1913 if (dirent_is_journal_file(de))
1914 (void) add_file_by_name(j, m->path, de->d_name);
1915
1916 if (m->is_root && dirent_is_journal_subdir(de))
1917 (void) add_directory(j, m->path, de->d_name);
1918 }
1919
1920 return;
1921 fail:
1922 log_debug_errno(errno, "Failed to enumerate directory %s, ignoring: %m", m->path);
1923 }
1924
1925 static void directory_watch(sd_journal *j, Directory *m, int fd, uint32_t mask) {
1926 int r;
1927
1928 assert(j);
1929 assert(m);
1930 assert(fd >= 0);
1931
1932 /* Watch this directory if that's enabled and if it not being watched yet. */
1933
1934 if (m->wd > 0) /* Already have a watch? */
1935 return;
1936 if (j->inotify_fd < 0) /* Not watching at all? */
1937 return;
1938
1939 m->wd = inotify_add_watch_fd(j->inotify_fd, fd, mask);
1940 if (m->wd < 0) {
1941 log_debug_errno(errno, "Failed to watch journal directory '%s', ignoring: %m", m->path);
1942 return;
1943 }
1944
1945 r = hashmap_ensure_put(&j->directories_by_wd, &directories_by_wd_hash_ops, INT_TO_PTR(m->wd), m);
1946 if (r < 0) {
1947 if (r == -EEXIST)
1948 log_debug_errno(r, "Directory '%s' already being watched under a different path, ignoring: %m", m->path);
1949 else {
1950 log_debug_errno(r, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m->path);
1951 (void) inotify_rm_watch(j->inotify_fd, m->wd);
1952 }
1953 m->wd = -1;
1954 }
1955 }
1956
1957 static int add_directory(
1958 sd_journal *j,
1959 const char *prefix,
1960 const char *dirname) {
1961
1962 _cleanup_free_ char *path = NULL;
1963 _cleanup_closedir_ DIR *d = NULL;
1964 Directory *m;
1965 int r, k;
1966
1967 assert(j);
1968 assert(prefix);
1969
1970 /* Adds a journal file directory to watch. If the directory is already tracked this updates the inotify watch
1971 * and reenumerates directory contents */
1972
1973 path = path_join(prefix, dirname);
1974 if (!path) {
1975 r = -ENOMEM;
1976 goto fail;
1977 }
1978
1979 log_debug("Considering directory '%s'.", path);
1980
1981 /* We consider everything local that is in a directory for the local machine ID, or that is stored in /run */
1982 if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
1983 !((dirname && dirname_is_machine_id(dirname) > 0) || path_has_prefix(j, path, "/run")))
1984 return 0;
1985
1986 if (dirname &&
1987 (!(FLAGS_SET(j->flags, SD_JOURNAL_ALL_NAMESPACES) ||
1988 dirname_has_namespace(dirname, j->namespace) > 0 ||
1989 (FLAGS_SET(j->flags, SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE) && dirname_has_namespace(dirname, NULL) > 0))))
1990 return 0;
1991
1992 r = directory_open(j, path, &d);
1993 if (r < 0) {
1994 log_debug_errno(r, "Failed to open directory '%s': %m", path);
1995 goto fail;
1996 }
1997
1998 r = add_directory_impl(j, path, /* is_root = */ false, &m);
1999 if (r < 0)
2000 goto fail;
2001 if (r == 0)
2002 return 0;
2003
2004 m->last_seen_generation = j->generation;
2005
2006 directory_watch(j, m, dirfd(d),
2007 IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
2008 IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
2009 IN_ONLYDIR);
2010
2011 if (!j->no_new_files)
2012 directory_enumerate(j, m, d);
2013
2014 check_network(j, dirfd(d));
2015
2016 return 0;
2017
2018 fail:
2019 k = journal_put_error(j, r, path ?: prefix);
2020 if (k < 0)
2021 return k;
2022
2023 return r;
2024 }
2025
2026 static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
2027
2028 _cleanup_closedir_ DIR *d = NULL;
2029 Directory *m;
2030 int r, k;
2031
2032 assert(j);
2033
2034 /* Adds a root directory to our set of directories to use. If the root directory is already in the set, we
2035 * update the inotify logic, and renumerate the directory entries. This call may hence be called to initially
2036 * populate the set, as well as to update it later. */
2037
2038 if (p) {
2039 /* If there's a path specified, use it. */
2040
2041 log_debug("Considering root directory '%s'.", p);
2042
2043 if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
2044 !path_has_prefix(j, p, "/run"))
2045 return -EINVAL;
2046
2047 if (j->prefix)
2048 p = strjoina(j->prefix, p);
2049
2050 r = directory_open(j, p, &d);
2051 if (r == -ENOENT && missing_ok)
2052 return 0;
2053 if (r < 0) {
2054 log_debug_errno(r, "Failed to open root directory %s: %m", p);
2055 goto fail;
2056 }
2057 } else {
2058 _cleanup_close_ int dfd = -EBADF;
2059
2060 /* If there's no path specified, then we use the top-level fd itself. We duplicate the fd here, since
2061 * opendir() will take possession of the fd, and close it, which we don't want. */
2062
2063 p = "."; /* store this as "." in the directories hashmap */
2064
2065 dfd = fcntl(j->toplevel_fd, F_DUPFD_CLOEXEC, 3);
2066 if (dfd < 0) {
2067 r = -errno;
2068 goto fail;
2069 }
2070
2071 d = take_fdopendir(&dfd);
2072 if (!d) {
2073 r = -errno;
2074 goto fail;
2075 }
2076
2077 rewinddir(d);
2078 }
2079
2080 r = add_directory_impl(j, p, /* is_root = */ true, &m);
2081 if (r < 0)
2082 goto fail;
2083 if (r == 0)
2084 return 0;
2085
2086 directory_watch(j, m, dirfd(d),
2087 IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
2088 IN_ONLYDIR);
2089
2090 if (!j->no_new_files)
2091 directory_enumerate(j, m, d);
2092
2093 check_network(j, dirfd(d));
2094
2095 return 0;
2096
2097 fail:
2098 k = journal_put_error(j, r, p);
2099 if (k < 0)
2100 return k;
2101
2102 return r;
2103 }
2104
2105 static int add_search_paths(sd_journal *j) {
2106
2107 static const char search_paths[] =
2108 "/run/log/journal\0"
2109 "/var/log/journal\0";
2110
2111 assert(j);
2112
2113 /* We ignore most errors here, since the idea is to only open
2114 * what's actually accessible, and ignore the rest. */
2115
2116 NULSTR_FOREACH(p, search_paths)
2117 (void) add_root_directory(j, p, true);
2118
2119 if (!(j->flags & SD_JOURNAL_LOCAL_ONLY))
2120 (void) add_root_directory(j, "/var/log/journal/remote", true);
2121
2122 return 0;
2123 }
2124
2125 static int add_current_paths(sd_journal *j) {
2126 JournalFile *f;
2127
2128 assert(j);
2129 assert(j->no_new_files);
2130
2131 /* Simply adds all directories for files we have open as directories. We don't expect errors here, so we
2132 * treat them as fatal. */
2133
2134 ORDERED_HASHMAP_FOREACH(f, j->files) {
2135 _cleanup_free_ char *dir = NULL;
2136 int r;
2137
2138 r = path_extract_directory(f->path, &dir);
2139 if (r < 0)
2140 return r;
2141
2142 r = add_directory(j, dir, NULL);
2143 if (r < 0)
2144 return r;
2145 }
2146
2147 return 0;
2148 }
2149
2150 static int allocate_inotify(sd_journal *j) {
2151 assert(j);
2152
2153 if (j->inotify_fd < 0) {
2154 j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
2155 if (j->inotify_fd < 0)
2156 return -errno;
2157 }
2158
2159 return 0;
2160 }
2161
2162 static sd_journal *journal_new(int flags, const char *path, const char *namespace) {
2163 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
2164
2165 j = new(sd_journal, 1);
2166 if (!j)
2167 return NULL;
2168
2169 *j = (sd_journal) {
2170 .origin_id = origin_id_query(),
2171 .toplevel_fd = -EBADF,
2172 .inotify_fd = -EBADF,
2173 .flags = flags,
2174 .data_threshold = DEFAULT_DATA_THRESHOLD,
2175 };
2176
2177 if (path) {
2178 char *t;
2179
2180 t = strdup(path);
2181 if (!t)
2182 return NULL;
2183
2184 if (flags & SD_JOURNAL_OS_ROOT)
2185 j->prefix = t;
2186 else
2187 j->path = t;
2188 }
2189
2190 if (namespace) {
2191 j->namespace = strdup(namespace);
2192 if (!j->namespace)
2193 return NULL;
2194 }
2195
2196 j->files = ordered_hashmap_new(&path_hash_ops);
2197 if (!j->files)
2198 return NULL;
2199
2200 j->files_cache = ordered_hashmap_iterated_cache_new(j->files);
2201 j->mmap = mmap_cache_new();
2202 if (!j->files_cache || !j->mmap)
2203 return NULL;
2204
2205 return TAKE_PTR(j);
2206 }
2207
2208 #define OPEN_ALLOWED_FLAGS \
2209 (SD_JOURNAL_LOCAL_ONLY | \
2210 SD_JOURNAL_RUNTIME_ONLY | \
2211 SD_JOURNAL_SYSTEM | \
2212 SD_JOURNAL_CURRENT_USER | \
2213 SD_JOURNAL_ALL_NAMESPACES | \
2214 SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE | \
2215 SD_JOURNAL_ASSUME_IMMUTABLE)
2216
2217 _public_ int sd_journal_open_namespace(sd_journal **ret, const char *namespace, int flags) {
2218 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
2219 int r;
2220
2221 assert_return(ret, -EINVAL);
2222 assert_return((flags & ~OPEN_ALLOWED_FLAGS) == 0, -EINVAL);
2223
2224 j = journal_new(flags, NULL, namespace);
2225 if (!j)
2226 return -ENOMEM;
2227
2228 r = add_search_paths(j);
2229 if (r < 0)
2230 return r;
2231
2232 *ret = TAKE_PTR(j);
2233 return 0;
2234 }
2235
2236 _public_ int sd_journal_open(sd_journal **ret, int flags) {
2237 return sd_journal_open_namespace(ret, NULL, flags);
2238 }
2239
2240 #define OPEN_CONTAINER_ALLOWED_FLAGS \
2241 (SD_JOURNAL_LOCAL_ONLY | \
2242 SD_JOURNAL_SYSTEM | \
2243 SD_JOURNAL_ASSUME_IMMUTABLE)
2244
2245 _public_ int sd_journal_open_container(sd_journal **ret, const char *machine, int flags) {
2246 _cleanup_free_ char *root = NULL, *class = NULL;
2247 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
2248 char *p;
2249 int r;
2250
2251 /* This is deprecated, people should use machined's OpenMachineRootDirectory() call instead in
2252 * combination with sd_journal_open_directory_fd(). */
2253
2254 assert_return(machine, -EINVAL);
2255 assert_return(ret, -EINVAL);
2256 assert_return((flags & ~OPEN_CONTAINER_ALLOWED_FLAGS) == 0, -EINVAL);
2257 assert_return(hostname_is_valid(machine, 0), -EINVAL);
2258
2259 p = strjoina("/run/systemd/machines/", machine);
2260 r = parse_env_file(NULL, p,
2261 "ROOT", &root,
2262 "CLASS", &class);
2263 if (r == -ENOENT)
2264 return -EHOSTDOWN;
2265 if (r < 0)
2266 return r;
2267 if (!root)
2268 return -ENODATA;
2269
2270 if (!streq_ptr(class, "container"))
2271 return -EIO;
2272
2273 j = journal_new(flags, root, NULL);
2274 if (!j)
2275 return -ENOMEM;
2276
2277 r = add_search_paths(j);
2278 if (r < 0)
2279 return r;
2280
2281 *ret = TAKE_PTR(j);
2282 return 0;
2283 }
2284
2285 #define OPEN_DIRECTORY_ALLOWED_FLAGS \
2286 (SD_JOURNAL_OS_ROOT | \
2287 SD_JOURNAL_SYSTEM | \
2288 SD_JOURNAL_CURRENT_USER | \
2289 SD_JOURNAL_ASSUME_IMMUTABLE)
2290
2291 _public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) {
2292 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
2293 int r;
2294
2295 assert_return(ret, -EINVAL);
2296 assert_return(path, -EINVAL);
2297 assert_return((flags & ~OPEN_DIRECTORY_ALLOWED_FLAGS) == 0, -EINVAL);
2298
2299 j = journal_new(flags, path, NULL);
2300 if (!j)
2301 return -ENOMEM;
2302
2303 if (flags & SD_JOURNAL_OS_ROOT)
2304 r = add_search_paths(j);
2305 else
2306 r = add_root_directory(j, path, false);
2307 if (r < 0)
2308 return r;
2309
2310 *ret = TAKE_PTR(j);
2311 return 0;
2312 }
2313
2314 #define OPEN_FILES_ALLOWED_FLAGS \
2315 (SD_JOURNAL_ASSUME_IMMUTABLE)
2316
2317 _public_ int sd_journal_open_files(sd_journal **ret, const char **paths, int flags) {
2318 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
2319 int r;
2320
2321 assert_return(ret, -EINVAL);
2322 assert_return((flags & ~OPEN_FILES_ALLOWED_FLAGS) == 0, -EINVAL);
2323
2324 j = journal_new(flags, NULL, NULL);
2325 if (!j)
2326 return -ENOMEM;
2327
2328 STRV_FOREACH(path, paths) {
2329 r = add_any_file(j, -1, *path);
2330 if (r < 0)
2331 return r;
2332 }
2333
2334 j->no_new_files = true;
2335
2336 *ret = TAKE_PTR(j);
2337 return 0;
2338 }
2339
2340 #define OPEN_DIRECTORY_FD_ALLOWED_FLAGS \
2341 (SD_JOURNAL_OS_ROOT | \
2342 SD_JOURNAL_SYSTEM | \
2343 SD_JOURNAL_CURRENT_USER | \
2344 SD_JOURNAL_TAKE_DIRECTORY_FD | \
2345 SD_JOURNAL_ASSUME_IMMUTABLE)
2346
2347 _public_ int sd_journal_open_directory_fd(sd_journal **ret, int fd, int flags) {
2348 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
2349 struct stat st;
2350 bool take_fd;
2351 int r;
2352
2353 assert_return(ret, -EINVAL);
2354 assert_return(fd >= 0, -EBADF);
2355 assert_return((flags & ~OPEN_DIRECTORY_FD_ALLOWED_FLAGS) == 0, -EINVAL);
2356
2357 if (fstat(fd, &st) < 0)
2358 return -errno;
2359
2360 if (!S_ISDIR(st.st_mode))
2361 return -EBADFD;
2362
2363 take_fd = FLAGS_SET(flags, SD_JOURNAL_TAKE_DIRECTORY_FD);
2364 j = journal_new(flags & ~SD_JOURNAL_TAKE_DIRECTORY_FD, NULL, NULL);
2365 if (!j)
2366 return -ENOMEM;
2367
2368 j->toplevel_fd = fd;
2369
2370 if (flags & SD_JOURNAL_OS_ROOT)
2371 r = add_search_paths(j);
2372 else
2373 r = add_root_directory(j, NULL, false);
2374 if (r < 0)
2375 return r;
2376
2377 SET_FLAG(j->flags, SD_JOURNAL_TAKE_DIRECTORY_FD, take_fd);
2378
2379 *ret = TAKE_PTR(j);
2380 return 0;
2381 }
2382
2383 #define OPEN_FILES_FD_ALLOWED_FLAGS \
2384 (SD_JOURNAL_ASSUME_IMMUTABLE)
2385
2386 _public_ int sd_journal_open_files_fd(sd_journal **ret, int fds[], unsigned n_fds, int flags) {
2387 JournalFile *f;
2388 _cleanup_(sd_journal_closep) sd_journal *j = NULL;
2389 int r;
2390
2391 assert_return(ret, -EINVAL);
2392 assert_return(n_fds > 0, -EBADF);
2393 assert_return((flags & ~OPEN_FILES_FD_ALLOWED_FLAGS) == 0, -EINVAL);
2394
2395 j = journal_new(flags, NULL, NULL);
2396 if (!j)
2397 return -ENOMEM;
2398
2399 for (unsigned i = 0; i < n_fds; i++) {
2400 struct stat st;
2401
2402 if (fds[i] < 0) {
2403 r = -EBADF;
2404 goto fail;
2405 }
2406
2407 if (fstat(fds[i], &st) < 0) {
2408 r = -errno;
2409 goto fail;
2410 }
2411
2412 r = stat_verify_regular(&st);
2413 if (r < 0)
2414 goto fail;
2415
2416 r = add_any_file(j, fds[i], NULL);
2417 if (r < 0)
2418 goto fail;
2419 }
2420
2421 j->no_new_files = true;
2422 j->no_inotify = true;
2423
2424 *ret = TAKE_PTR(j);
2425 return 0;
2426
2427 fail:
2428 /* If we fail, make sure we don't take possession of the files we managed to make use of successfully, and they
2429 * remain open */
2430 ORDERED_HASHMAP_FOREACH(f, j->files)
2431 f->close_fd = false;
2432
2433 return r;
2434 }
2435
2436 _public_ void sd_journal_close(sd_journal *j) {
2437 if (!j || journal_origin_changed(j))
2438 return;
2439
2440 journal_clear_newest_by_boot_id(j);
2441
2442 sd_journal_flush_matches(j);
2443
2444 ordered_hashmap_free_with_destructor(j->files, journal_file_close);
2445 iterated_cache_free(j->files_cache);
2446
2447 hashmap_free(j->directories_by_path);
2448 hashmap_free(j->directories_by_wd);
2449
2450 if (FLAGS_SET(j->flags, SD_JOURNAL_TAKE_DIRECTORY_FD))
2451 safe_close(j->toplevel_fd);
2452
2453 safe_close(j->inotify_fd);
2454
2455 if (j->mmap) {
2456 mmap_cache_stats_log_debug(j->mmap);
2457 mmap_cache_unref(j->mmap);
2458 }
2459
2460 hashmap_free_free(j->errors);
2461
2462 set_free(j->exclude_syslog_identifiers);
2463
2464 free(j->path);
2465 free(j->prefix);
2466 free(j->namespace);
2467 free(j->unique_field);
2468 free(j->fields_buffer);
2469 free(j);
2470 }
2471
2472 static int journal_file_read_tail_timestamp(sd_journal *j, JournalFile *f) {
2473 uint64_t offset, mo, rt;
2474 sd_id128_t id;
2475 ObjectType type;
2476 Object *o;
2477 int r;
2478
2479 assert(j);
2480 assert(f);
2481 assert(f->header);
2482
2483 /* Tries to read the timestamp of the most recently written entry. */
2484
2485 if (FLAGS_SET(j->flags, SD_JOURNAL_ASSUME_IMMUTABLE) && f->newest_entry_offset != 0)
2486 return 0; /* We have already read the file, and we assume that the file is immutable. */
2487
2488 if (f->header->state == f->newest_state &&
2489 f->header->state == STATE_ARCHIVED &&
2490 f->newest_entry_offset != 0)
2491 return 0; /* We have already read archived file. */
2492
2493 if (JOURNAL_HEADER_CONTAINS(f->header, tail_entry_offset)) {
2494 offset = le64toh(READ_NOW(f->header->tail_entry_offset));
2495 type = OBJECT_ENTRY;
2496 } else {
2497 offset = le64toh(READ_NOW(f->header->tail_object_offset));
2498 type = OBJECT_UNUSED;
2499 }
2500 if (offset == 0)
2501 return -ENODATA; /* not a single object/entry, hence no tail timestamp */
2502 if (offset == f->newest_entry_offset)
2503 return 0; /* No new entry is added after we read last time. */
2504
2505 /* Move to the last object in the journal file, in the hope it is an entry (which it usually will
2506 * be). If we lack the "tail_entry_offset" field in the header, we specify the type as OBJECT_UNUSED
2507 * here, since we cannot be sure what the last object will be, and want no noisy logging if it isn't
2508 * an entry. We instead check after figuring out the pointer. */
2509 r = journal_file_move_to_object(f, type, offset, &o);
2510 if (r < 0) {
2511 log_debug_errno(r, "Failed to move to last object in journal file, ignoring: %m");
2512 o = NULL;
2513 offset = 0;
2514 }
2515 if (o && o->object.type == OBJECT_ENTRY) {
2516 /* Yay, last object is an entry, let's use the data. */
2517 id = o->entry.boot_id;
2518 mo = le64toh(o->entry.monotonic);
2519 rt = le64toh(o->entry.realtime);
2520 } else {
2521 /* So the object is not an entry or we couldn't access it? In that case, let's read the most
2522 * recent entry timestamps from the header. It's equally good. Unfortunately though, in old
2523 * versions of the journal the boot ID in the header doesn't have to match the monotonic
2524 * timestamp of the header. Let's check the header flag that indicates whether this strictly
2525 * matches first hence, before using the data. */
2526
2527 if (JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f->header) && f->header->state == STATE_ARCHIVED) {
2528 mo = le64toh(f->header->tail_entry_monotonic);
2529 rt = le64toh(f->header->tail_entry_realtime);
2530 id = f->header->tail_entry_boot_id;
2531 offset = UINT64_MAX;
2532 } else {
2533 /* Otherwise let's find the last entry manually (this possibly means traversing the
2534 * chain of entry arrays, till the end */
2535 r = journal_file_next_entry(f, 0, DIRECTION_UP, &o, offset == 0 ? &offset : NULL);
2536 if (r < 0)
2537 return r;
2538 if (r == 0)
2539 return -ENODATA;
2540
2541 id = o->entry.boot_id;
2542 mo = le64toh(o->entry.monotonic);
2543 rt = le64toh(o->entry.realtime);
2544 }
2545 }
2546
2547 if (mo > rt) /* monotonic clock is further ahead than realtime? that's weird, refuse to use the data */
2548 return -ENODATA;
2549
2550 if (offset == f->newest_entry_offset) {
2551 /* Cached data and the current one should be equivalent. */
2552 if (!sd_id128_equal(f->newest_machine_id, f->header->machine_id) ||
2553 !sd_id128_equal(f->newest_boot_id, id) ||
2554 f->newest_monotonic_usec != mo ||
2555 f->newest_realtime_usec != rt)
2556 return -EBADMSG;
2557
2558 return 0; /* No new entry is added after we read last time. */
2559 }
2560
2561 if (!sd_id128_equal(f->newest_boot_id, id))
2562 journal_file_unlink_newest_by_boot_id(j, f);
2563
2564 f->newest_boot_id = id;
2565 f->newest_monotonic_usec = mo;
2566 f->newest_realtime_usec = rt;
2567 f->newest_machine_id = f->header->machine_id;
2568 f->newest_entry_offset = offset;
2569 f->newest_state = f->header->state;
2570
2571 r = journal_file_reshuffle_newest_by_boot_id(j, f);
2572 if (r < 0)
2573 return r;
2574
2575 return 1; /* Updated. */
2576 }
2577
2578 _public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) {
2579 JournalFile *f;
2580 Object *o;
2581 int r;
2582
2583 assert_return(j, -EINVAL);
2584 assert_return(!journal_origin_changed(j), -ECHILD);
2585
2586 f = j->current_file;
2587 if (!f)
2588 return -EADDRNOTAVAIL;
2589 if (f->current_offset <= 0)
2590 return -EADDRNOTAVAIL;
2591
2592 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2593 if (r < 0)
2594 return r;
2595
2596 uint64_t t = le64toh(o->entry.realtime);
2597 if (!VALID_REALTIME(t))
2598 return -EBADMSG;
2599
2600 if (ret)
2601 *ret = t;
2602
2603 return 0;
2604 }
2605
2606 _public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id) {
2607 JournalFile *f;
2608 Object *o;
2609 int r;
2610
2611 assert_return(j, -EINVAL);
2612 assert_return(!journal_origin_changed(j), -ECHILD);
2613
2614 f = j->current_file;
2615 if (!f)
2616 return -EADDRNOTAVAIL;
2617 if (f->current_offset <= 0)
2618 return -EADDRNOTAVAIL;
2619
2620 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2621 if (r < 0)
2622 return r;
2623
2624 if (ret_boot_id)
2625 *ret_boot_id = o->entry.boot_id;
2626 else {
2627 sd_id128_t id;
2628
2629 r = sd_id128_get_boot(&id);
2630 if (r < 0)
2631 return r;
2632
2633 if (!sd_id128_equal(id, o->entry.boot_id))
2634 return -ESTALE;
2635 }
2636
2637 uint64_t t = le64toh(o->entry.monotonic);
2638 if (!VALID_MONOTONIC(t))
2639 return -EBADMSG;
2640
2641 if (ret)
2642 *ret = t;
2643
2644 return 0;
2645 }
2646
2647 _public_ int sd_journal_get_seqnum(
2648 sd_journal *j,
2649 uint64_t *ret_seqnum,
2650 sd_id128_t *ret_seqnum_id) {
2651
2652 JournalFile *f;
2653 Object *o;
2654 int r;
2655
2656 assert_return(j, -EINVAL);
2657 assert_return(!journal_origin_changed(j), -ECHILD);
2658
2659 f = j->current_file;
2660 if (!f)
2661 return -EADDRNOTAVAIL;
2662
2663 if (f->current_offset <= 0)
2664 return -EADDRNOTAVAIL;
2665
2666 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2667 if (r < 0)
2668 return r;
2669
2670 if (ret_seqnum_id)
2671 *ret_seqnum_id = f->header->seqnum_id;
2672 if (ret_seqnum)
2673 *ret_seqnum = le64toh(o->entry.seqnum);
2674
2675 return 0;
2676 }
2677
2678 static bool field_is_valid(const char *field) {
2679 assert(field);
2680
2681 if (isempty(field))
2682 return false;
2683
2684 if (startswith(field, "__"))
2685 return false;
2686
2687 for (const char *p = field; *p; p++) {
2688
2689 if (*p == '_')
2690 continue;
2691
2692 if (*p >= 'A' && *p <= 'Z')
2693 continue;
2694
2695 if (ascii_isdigit(*p))
2696 continue;
2697
2698 return false;
2699 }
2700
2701 return true;
2702 }
2703
2704 _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) {
2705 JournalFile *f;
2706 size_t field_length;
2707 Object *o;
2708 int r;
2709
2710 assert_return(j, -EINVAL);
2711 assert_return(!journal_origin_changed(j), -ECHILD);
2712 assert_return(field, -EINVAL);
2713 assert_return(data, -EINVAL);
2714 assert_return(size, -EINVAL);
2715 assert_return(field_is_valid(field), -EINVAL);
2716
2717 f = j->current_file;
2718 if (!f)
2719 return -EADDRNOTAVAIL;
2720
2721 if (f->current_offset <= 0)
2722 return -EADDRNOTAVAIL;
2723
2724 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2725 if (r < 0)
2726 return r;
2727
2728 field_length = strlen(field);
2729
2730 uint64_t n = journal_file_entry_n_items(f, o);
2731 for (uint64_t i = 0; i < n; i++) {
2732 uint64_t p;
2733 void *d;
2734 size_t l;
2735
2736 p = journal_file_entry_item_object_offset(f, o, i);
2737 r = journal_file_data_payload(f, NULL, p, field, field_length, j->data_threshold, &d, &l);
2738 if (r == 0)
2739 continue;
2740 if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) {
2741 log_debug_errno(r, "Entry item %"PRIu64" data object is bad, skipping over it: %m", i);
2742 continue;
2743 }
2744 if (r < 0)
2745 return r;
2746
2747 *data = d;
2748 *size = l;
2749
2750 return 0;
2751 }
2752
2753 return -ENOENT;
2754 }
2755
2756 _public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) {
2757 JournalFile *f;
2758 Object *o;
2759 int r;
2760
2761 assert_return(j, -EINVAL);
2762 assert_return(!journal_origin_changed(j), -ECHILD);
2763 assert_return(data, -EINVAL);
2764 assert_return(size, -EINVAL);
2765
2766 f = j->current_file;
2767 if (!f)
2768 return -EADDRNOTAVAIL;
2769
2770 if (f->current_offset <= 0)
2771 return -EADDRNOTAVAIL;
2772
2773 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2774 if (r < 0)
2775 return r;
2776
2777 for (uint64_t n = journal_file_entry_n_items(f, o); j->current_field < n; j->current_field++) {
2778 uint64_t p;
2779 void *d;
2780 size_t l;
2781
2782 p = journal_file_entry_item_object_offset(f, o, j->current_field);
2783 r = journal_file_data_payload(f, NULL, p, NULL, 0, j->data_threshold, &d, &l);
2784 if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) {
2785 log_debug_errno(r, "Entry item %"PRIu64" data object is bad, skipping over it: %m", j->current_field);
2786 continue;
2787 }
2788 if (r < 0)
2789 return r;
2790 assert(r > 0);
2791
2792 *data = d;
2793 *size = l;
2794
2795 j->current_field++;
2796
2797 return 1;
2798 }
2799
2800 return 0;
2801 }
2802
2803 _public_ int sd_journal_enumerate_available_data(sd_journal *j, const void **data, size_t *size) {
2804 for (;;) {
2805 int r;
2806
2807 r = sd_journal_enumerate_data(j, data, size);
2808 if (r >= 0)
2809 return r;
2810 if (!JOURNAL_ERRNO_IS_UNAVAILABLE_FIELD(r))
2811 return r;
2812 j->current_field++; /* Try with the next field */
2813 }
2814 }
2815
2816 _public_ void sd_journal_restart_data(sd_journal *j) {
2817 if (!j || journal_origin_changed(j))
2818 return;
2819
2820 j->current_field = 0;
2821 }
2822
2823 static int reiterate_all_paths(sd_journal *j) {
2824 assert(j);
2825
2826 if (j->no_new_files)
2827 return add_current_paths(j);
2828
2829 if (j->flags & SD_JOURNAL_OS_ROOT)
2830 return add_search_paths(j);
2831
2832 if (j->toplevel_fd >= 0)
2833 return add_root_directory(j, NULL, false);
2834
2835 if (j->path)
2836 return add_root_directory(j, j->path, true);
2837
2838 return add_search_paths(j);
2839 }
2840
2841 _public_ int sd_journal_get_fd(sd_journal *j) {
2842 int r;
2843
2844 assert_return(j, -EINVAL);
2845 assert_return(!journal_origin_changed(j), -ECHILD);
2846 assert_return(!FLAGS_SET(j->flags, SD_JOURNAL_ASSUME_IMMUTABLE), -EUNATCH);
2847
2848 if (j->no_inotify)
2849 return -EMEDIUMTYPE;
2850
2851 if (j->inotify_fd >= 0)
2852 return j->inotify_fd;
2853
2854 r = allocate_inotify(j);
2855 if (r < 0)
2856 return r;
2857
2858 log_debug("Reiterating files to get inotify watches established.");
2859
2860 /* Iterate through all dirs again, to add them to the inotify */
2861 r = reiterate_all_paths(j);
2862 if (r < 0)
2863 return r;
2864
2865 return j->inotify_fd;
2866 }
2867
2868 _public_ int sd_journal_get_events(sd_journal *j) {
2869 int fd;
2870
2871 assert_return(j, -EINVAL);
2872 assert_return(!journal_origin_changed(j), -ECHILD);
2873 assert_return(!FLAGS_SET(j->flags, SD_JOURNAL_ASSUME_IMMUTABLE), -EUNATCH);
2874
2875 fd = sd_journal_get_fd(j);
2876 if (fd < 0)
2877 return fd;
2878
2879 return POLLIN;
2880 }
2881
2882 _public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) {
2883 int fd;
2884
2885 assert_return(j, -EINVAL);
2886 assert_return(!journal_origin_changed(j), -ECHILD);
2887 assert_return(!FLAGS_SET(j->flags, SD_JOURNAL_ASSUME_IMMUTABLE), -EUNATCH);
2888 assert_return(timeout_usec, -EINVAL);
2889
2890 fd = sd_journal_get_fd(j);
2891 if (fd < 0)
2892 return fd;
2893
2894 if (!j->on_network) {
2895 *timeout_usec = UINT64_MAX;
2896 return 0;
2897 }
2898
2899 /* If we are on the network we need to regularly check for
2900 * changes manually */
2901
2902 *timeout_usec = j->last_process_usec + JOURNAL_FILES_RECHECK_USEC;
2903 return 1;
2904 }
2905
2906 static void process_q_overflow(sd_journal *j) {
2907 JournalFile *f;
2908 Directory *m;
2909
2910 assert(j);
2911
2912 /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list
2913 * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all
2914 * journal files we encounter. All journal files and all directories that don't carry it after reenumeration
2915 * are subject for unloading. */
2916
2917 log_debug("Inotify queue overrun, reiterating everything.");
2918
2919 j->generation++;
2920 (void) reiterate_all_paths(j);
2921
2922 ORDERED_HASHMAP_FOREACH(f, j->files) {
2923
2924 if (f->last_seen_generation == j->generation)
2925 continue;
2926
2927 log_debug("File '%s' hasn't been seen in this enumeration, removing.", f->path);
2928 remove_file_real(j, f);
2929 }
2930
2931 HASHMAP_FOREACH(m, j->directories_by_path) {
2932
2933 if (m->last_seen_generation == j->generation)
2934 continue;
2935
2936 if (m->is_root) /* Never GC root directories */
2937 continue;
2938
2939 log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f->path);
2940 directory_free(m);
2941 }
2942
2943 log_debug("Reiteration complete.");
2944 }
2945
2946 static void process_inotify_event(sd_journal *j, const struct inotify_event *e) {
2947 Directory *d;
2948
2949 assert(j);
2950 assert(e);
2951
2952 if (e->mask & IN_Q_OVERFLOW) {
2953 process_q_overflow(j);
2954 return;
2955 }
2956
2957 /* Is this a subdirectory we watch? */
2958 d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
2959 if (d) {
2960 if (!(e->mask & IN_ISDIR) && e->len > 0 &&
2961 (endswith(e->name, ".journal") ||
2962 endswith(e->name, ".journal~"))) {
2963
2964 /* Event for a journal file */
2965
2966 if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB))
2967 (void) add_file_by_name(j, d->path, e->name);
2968 else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT))
2969 (void) remove_file_by_name(j, d->path, e->name);
2970
2971 } else if (!d->is_root && e->len == 0) {
2972
2973 /* Event for a subdirectory */
2974
2975 if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT))
2976 directory_free(d);
2977
2978 } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && id128_is_valid(e->name)) {
2979
2980 /* Event for root directory */
2981
2982 if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB))
2983 (void) add_directory(j, d->path, e->name);
2984 }
2985
2986 return;
2987 }
2988
2989 if (e->mask & IN_IGNORED)
2990 return;
2991
2992 log_debug("Unexpected inotify event.");
2993 }
2994
2995 static int determine_change(sd_journal *j) {
2996 bool b;
2997
2998 assert(j);
2999
3000 b = j->current_invalidate_counter != j->last_invalidate_counter;
3001 j->last_invalidate_counter = j->current_invalidate_counter;
3002
3003 return b ? SD_JOURNAL_INVALIDATE : SD_JOURNAL_APPEND;
3004 }
3005
3006 _public_ int sd_journal_process(sd_journal *j) {
3007 bool got_something = false;
3008
3009 assert_return(j, -EINVAL);
3010 assert_return(!journal_origin_changed(j), -ECHILD);
3011
3012 if (j->inotify_fd < 0) /* We have no inotify fd yet? Then there's noting to process. */
3013 return 0;
3014
3015 assert_return(!FLAGS_SET(j->flags, SD_JOURNAL_ASSUME_IMMUTABLE), -EUNATCH);
3016
3017 j->last_process_usec = now(CLOCK_MONOTONIC);
3018 j->last_invalidate_counter = j->current_invalidate_counter;
3019
3020 for (;;) {
3021 union inotify_event_buffer buffer;
3022 ssize_t l;
3023
3024 l = read(j->inotify_fd, &buffer, sizeof(buffer));
3025 if (l < 0) {
3026 if (ERRNO_IS_TRANSIENT(errno))
3027 return got_something ? determine_change(j) : SD_JOURNAL_NOP;
3028
3029 return -errno;
3030 }
3031
3032 got_something = true;
3033
3034 FOREACH_INOTIFY_EVENT(e, buffer, l)
3035 process_inotify_event(j, e);
3036 }
3037 }
3038
3039 _public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) {
3040 int r;
3041 uint64_t t;
3042
3043 assert_return(j, -EINVAL);
3044 assert_return(!journal_origin_changed(j), -ECHILD);
3045 assert_return(!FLAGS_SET(j->flags, SD_JOURNAL_ASSUME_IMMUTABLE), -EUNATCH);
3046
3047 if (j->inotify_fd < 0) {
3048 JournalFile *f;
3049
3050 /* This is the first invocation, hence create the inotify watch */
3051 r = sd_journal_get_fd(j);
3052 if (r < 0)
3053 return r;
3054
3055 /* Server might have done some vacuuming while we weren't watching. Get rid of the deleted
3056 * files now so they don't stay around indefinitely. */
3057 ORDERED_HASHMAP_FOREACH(f, j->files) {
3058 r = journal_file_fstat(f);
3059 if (r == -EIDRM)
3060 remove_file_real(j, f);
3061 else if (r < 0)
3062 log_debug_errno(r, "Failed to fstat() journal file '%s', ignoring: %m", f->path);
3063 }
3064
3065 /* The journal might have changed since the context object was created and we weren't
3066 * watching before, hence don't wait for anything, and return immediately. */
3067 return determine_change(j);
3068 }
3069
3070 r = sd_journal_get_timeout(j, &t);
3071 if (r < 0)
3072 return r;
3073
3074 if (t != UINT64_MAX) {
3075 t = usec_sub_unsigned(t, now(CLOCK_MONOTONIC));
3076
3077 if (timeout_usec == UINT64_MAX || timeout_usec > t)
3078 timeout_usec = t;
3079 }
3080
3081 do {
3082 r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec);
3083 } while (r == -EINTR);
3084
3085 if (r < 0)
3086 return r;
3087
3088 return sd_journal_process(j);
3089 }
3090
3091 _public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) {
3092 JournalFile *f;
3093 bool first = true;
3094 uint64_t fmin = 0, tmax = 0;
3095 int r;
3096
3097 assert_return(j, -EINVAL);
3098 assert_return(!journal_origin_changed(j), -ECHILD);
3099 assert_return(from || to, -EINVAL);
3100 assert_return(from != to, -EINVAL);
3101
3102 ORDERED_HASHMAP_FOREACH(f, j->files) {
3103 usec_t fr, t;
3104
3105 r = journal_file_get_cutoff_realtime_usec(f, &fr, &t);
3106 if (r == -ENOENT)
3107 continue;
3108 if (r < 0)
3109 return r;
3110 if (r == 0)
3111 continue;
3112
3113 if (first) {
3114 fmin = fr;
3115 tmax = t;
3116 first = false;
3117 } else {
3118 fmin = MIN(fr, fmin);
3119 tmax = MAX(t, tmax);
3120 }
3121 }
3122
3123 if (from)
3124 *from = fmin;
3125 if (to)
3126 *to = tmax;
3127
3128 return first ? 0 : 1;
3129 }
3130
3131 _public_ int sd_journal_get_cutoff_monotonic_usec(
3132 sd_journal *j,
3133 sd_id128_t boot_id,
3134 uint64_t *ret_from,
3135 uint64_t *ret_to) {
3136
3137 uint64_t from = UINT64_MAX, to = UINT64_MAX;
3138 bool found = false;
3139 JournalFile *f;
3140 int r;
3141
3142 assert_return(j, -EINVAL);
3143 assert_return(!journal_origin_changed(j), -ECHILD);
3144 assert_return(ret_from != ret_to, -EINVAL);
3145
3146 ORDERED_HASHMAP_FOREACH(f, j->files) {
3147 usec_t ff, tt;
3148
3149 r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &ff, &tt);
3150 if (r == -ENOENT)
3151 continue;
3152 if (r < 0)
3153 return r;
3154 if (r == 0)
3155 continue;
3156
3157 if (found) {
3158 from = MIN(ff, from);
3159 to = MAX(tt, to);
3160 } else {
3161 from = ff;
3162 to = tt;
3163 found = true;
3164 }
3165 }
3166
3167 if (ret_from)
3168 *ret_from = from;
3169 if (ret_to)
3170 *ret_to = to;
3171
3172 return found;
3173 }
3174
3175 void journal_print_header(sd_journal *j) {
3176 JournalFile *f;
3177 bool newline = false;
3178
3179 assert(j);
3180
3181 ORDERED_HASHMAP_FOREACH(f, j->files) {
3182 if (newline)
3183 putchar('\n');
3184 else
3185 newline = true;
3186
3187 journal_file_print_header(f);
3188 }
3189 }
3190
3191 _public_ int sd_journal_get_usage(sd_journal *j, uint64_t *ret) {
3192 JournalFile *f;
3193 uint64_t sum = 0;
3194
3195 assert_return(j, -EINVAL);
3196 assert_return(!journal_origin_changed(j), -ECHILD);
3197 assert_return(ret, -EINVAL);
3198
3199 ORDERED_HASHMAP_FOREACH(f, j->files) {
3200 struct stat st;
3201 uint64_t b;
3202
3203 if (fstat(f->fd, &st) < 0)
3204 return -errno;
3205
3206 b = (uint64_t) st.st_blocks;
3207 if (b > UINT64_MAX / 512)
3208 return -EOVERFLOW;
3209 b *= 512;
3210
3211 if (sum > UINT64_MAX - b)
3212 return -EOVERFLOW;
3213 sum += b;
3214 }
3215
3216 *ret = sum;
3217 return 0;
3218 }
3219
3220 _public_ int sd_journal_query_unique(sd_journal *j, const char *field) {
3221 int r;
3222
3223 assert_return(j, -EINVAL);
3224 assert_return(!journal_origin_changed(j), -ECHILD);
3225
3226 if (!field_is_valid(field))
3227 return -EINVAL;
3228
3229 r = free_and_strdup(&j->unique_field, field);
3230 if (r < 0)
3231 return r;
3232
3233 j->unique_file = NULL;
3234 j->unique_offset = 0;
3235 j->unique_file_lost = false;
3236
3237 return 0;
3238 }
3239
3240 _public_ int sd_journal_enumerate_unique(
3241 sd_journal *j,
3242 const void **ret_data,
3243 size_t *ret_size) {
3244
3245 size_t k;
3246
3247 assert_return(j, -EINVAL);
3248 assert_return(!journal_origin_changed(j), -ECHILD);
3249 assert_return(j->unique_field, -EINVAL);
3250
3251 k = strlen(j->unique_field);
3252
3253 if (!j->unique_file) {
3254 if (j->unique_file_lost)
3255 return 0;
3256
3257 j->unique_file = ordered_hashmap_first(j->files);
3258 if (!j->unique_file)
3259 return 0;
3260
3261 j->unique_offset = 0;
3262 }
3263
3264 for (;;) {
3265 JournalFile *of;
3266 Object *o;
3267 void *odata;
3268 size_t ol;
3269 bool found;
3270 int r;
3271
3272 /* Proceed to next data object in the field's linked list */
3273 if (j->unique_offset == 0) {
3274 r = journal_file_find_field_object(j->unique_file, j->unique_field, k, &o, NULL);
3275 if (r < 0)
3276 return r;
3277
3278 j->unique_offset = r > 0 ? le64toh(o->field.head_data_offset) : 0;
3279 } else {
3280 r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
3281 if (r < 0)
3282 return r;
3283
3284 j->unique_offset = le64toh(o->data.next_field_offset);
3285 }
3286
3287 /* We reached the end of the list? Then start again, with the next file */
3288 if (j->unique_offset == 0) {
3289 j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
3290 if (!j->unique_file)
3291 return 0;
3292
3293 continue;
3294 }
3295
3296 r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
3297 if (r < 0)
3298 return r;
3299
3300 /* Let's pin the data object, so we can look at it at the same time as one on another file. */
3301 r = journal_file_pin_object(j->unique_file, o);
3302 if (r < 0)
3303 return r;
3304
3305 r = journal_file_data_payload(j->unique_file, o, j->unique_offset, NULL, 0,
3306 j->data_threshold, &odata, &ol);
3307 if (r < 0)
3308 return r;
3309
3310 /* Check if we have at least the field name and "=". */
3311 if (ol <= k)
3312 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
3313 "%s:offset " OFSfmt ": object has size %zu, expected at least %zu",
3314 j->unique_file->path,
3315 j->unique_offset, ol, k + 1);
3316
3317 if (memcmp(odata, j->unique_field, k) != 0 || ((const char*) odata)[k] != '=')
3318 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
3319 "%s:offset " OFSfmt ": object does not start with \"%s=\"",
3320 j->unique_file->path,
3321 j->unique_offset,
3322 j->unique_field);
3323
3324 /* OK, now let's see if we already returned this data object by checking if it exists in the
3325 * earlier traversed files. */
3326 found = false;
3327 ORDERED_HASHMAP_FOREACH(of, j->files) {
3328 if (of == j->unique_file)
3329 break;
3330
3331 /* Skip this file it didn't have any fields indexed */
3332 if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
3333 continue;
3334
3335 /* We can reuse the hash from our current file only on old-style journal files
3336 * without keyed hashes. On new-style files we have to calculate the hash anew, to
3337 * take the per-file hash seed into consideration. */
3338 if (!JOURNAL_HEADER_KEYED_HASH(j->unique_file->header) && !JOURNAL_HEADER_KEYED_HASH(of->header))
3339 r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), NULL, NULL);
3340 else
3341 r = journal_file_find_data_object(of, odata, ol, NULL, NULL);
3342 if (r < 0)
3343 return r;
3344 if (r > 0) {
3345 found = true;
3346 break;
3347 }
3348 }
3349
3350 if (found)
3351 continue;
3352
3353 *ret_data = odata;
3354 *ret_size = ol;
3355
3356 return 1;
3357 }
3358 }
3359
3360 _public_ int sd_journal_enumerate_available_unique(sd_journal *j, const void **data, size_t *size) {
3361 for (;;) {
3362 int r;
3363
3364 r = sd_journal_enumerate_unique(j, data, size);
3365 if (r >= 0)
3366 return r;
3367 if (!JOURNAL_ERRNO_IS_UNAVAILABLE_FIELD(r))
3368 return r;
3369 /* Try with the next field. sd_journal_enumerate_unique() modifies state, so on the next try
3370 * we will access the next field. */
3371 }
3372 }
3373
3374 _public_ void sd_journal_restart_unique(sd_journal *j) {
3375 if (!j || journal_origin_changed(j))
3376 return;
3377
3378 j->unique_file = NULL;
3379 j->unique_offset = 0;
3380 j->unique_file_lost = false;
3381 }
3382
3383 _public_ int sd_journal_enumerate_fields(sd_journal *j, const char **field) {
3384 int r;
3385
3386 assert_return(j, -EINVAL);
3387 assert_return(!journal_origin_changed(j), -ECHILD);
3388 assert_return(field, -EINVAL);
3389
3390 if (!j->fields_file) {
3391 if (j->fields_file_lost)
3392 return 0;
3393
3394 j->fields_file = ordered_hashmap_first(j->files);
3395 if (!j->fields_file)
3396 return 0;
3397
3398 j->fields_hash_table_index = 0;
3399 j->fields_offset = 0;
3400 }
3401
3402 for (;;) {
3403 JournalFile *f, *of;
3404 uint64_t m;
3405 Object *o;
3406 size_t sz;
3407 bool found;
3408
3409 f = j->fields_file;
3410
3411 if (j->fields_offset == 0) {
3412 bool eof = false;
3413
3414 /* We are not yet positioned at any field. Let's pick the first one */
3415 r = journal_file_map_field_hash_table(f);
3416 if (r < 0)
3417 return r;
3418
3419 m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
3420 for (;;) {
3421 if (j->fields_hash_table_index >= m) {
3422 /* Reached the end of the hash table, go to the next file. */
3423 eof = true;
3424 break;
3425 }
3426
3427 j->fields_offset = le64toh(f->field_hash_table[j->fields_hash_table_index].head_hash_offset);
3428
3429 if (j->fields_offset != 0)
3430 break;
3431
3432 /* Empty hash table bucket, go to next one */
3433 j->fields_hash_table_index++;
3434 }
3435
3436 if (eof) {
3437 /* Proceed with next file */
3438 j->fields_file = ordered_hashmap_next(j->files, f->path);
3439 if (!j->fields_file) {
3440 *field = NULL;
3441 return 0;
3442 }
3443
3444 j->fields_offset = 0;
3445 j->fields_hash_table_index = 0;
3446 continue;
3447 }
3448
3449 } else {
3450 /* We are already positioned at a field. If so, let's figure out the next field from it */
3451
3452 r = journal_file_move_to_object(f, OBJECT_FIELD, j->fields_offset, &o);
3453 if (r < 0)
3454 return r;
3455
3456 j->fields_offset = le64toh(o->field.next_hash_offset);
3457 if (j->fields_offset == 0) {
3458 /* Reached the end of the hash table chain */
3459 j->fields_hash_table_index++;
3460 continue;
3461 }
3462 }
3463
3464 /* We use OBJECT_UNUSED here, so that the iterator below doesn't remove our mmap window */
3465 r = journal_file_move_to_object(f, OBJECT_UNUSED, j->fields_offset, &o);
3466 if (r < 0)
3467 return r;
3468
3469 /* Because we used OBJECT_UNUSED above, we need to do our type check manually */
3470 if (o->object.type != OBJECT_FIELD)
3471 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
3472 "%s:offset " OFSfmt ": object has type %i, expected %i",
3473 f->path, j->fields_offset,
3474 o->object.type, OBJECT_FIELD);
3475
3476 sz = le64toh(o->object.size) - offsetof(Object, field.payload);
3477
3478 /* Let's see if we already returned this field name before. */
3479 found = false;
3480 ORDERED_HASHMAP_FOREACH(of, j->files) {
3481 if (of == f)
3482 break;
3483
3484 /* Skip this file it didn't have any fields indexed */
3485 if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
3486 continue;
3487
3488 if (!JOURNAL_HEADER_KEYED_HASH(f->header) && !JOURNAL_HEADER_KEYED_HASH(of->header))
3489 r = journal_file_find_field_object_with_hash(of, o->field.payload, sz,
3490 le64toh(o->field.hash), NULL, NULL);
3491 else
3492 r = journal_file_find_field_object(of, o->field.payload, sz, NULL, NULL);
3493 if (r < 0)
3494 return r;
3495 if (r > 0) {
3496 found = true;
3497 break;
3498 }
3499 }
3500
3501 if (found)
3502 continue;
3503
3504 /* Check if this is really a valid string containing no NUL byte */
3505 if (memchr(o->field.payload, 0, sz))
3506 return -EBADMSG;
3507
3508 if (j->data_threshold > 0 && sz > j->data_threshold)
3509 sz = j->data_threshold;
3510
3511 if (!GREEDY_REALLOC(j->fields_buffer, sz + 1))
3512 return -ENOMEM;
3513
3514 memcpy(j->fields_buffer, o->field.payload, sz);
3515 j->fields_buffer[sz] = 0;
3516
3517 if (!field_is_valid(j->fields_buffer))
3518 return -EBADMSG;
3519
3520 *field = j->fields_buffer;
3521 return 1;
3522 }
3523 }
3524
3525 _public_ void sd_journal_restart_fields(sd_journal *j) {
3526 if (!j || journal_origin_changed(j))
3527 return;
3528
3529 j->fields_file = NULL;
3530 j->fields_hash_table_index = 0;
3531 j->fields_offset = 0;
3532 j->fields_file_lost = false;
3533 }
3534
3535 _public_ int sd_journal_reliable_fd(sd_journal *j) {
3536 assert_return(j, -EINVAL);
3537 assert_return(!journal_origin_changed(j), -ECHILD);
3538
3539 return !j->on_network;
3540 }
3541
3542 static char *lookup_field(const char *field, void *userdata) {
3543 sd_journal *j = ASSERT_PTR(userdata);
3544 const void *data;
3545 size_t size, d;
3546 int r;
3547
3548 assert(field);
3549
3550 r = sd_journal_get_data(j, field, &data, &size);
3551 if (r < 0 ||
3552 size > REPLACE_VAR_MAX)
3553 return strdup(field);
3554
3555 d = strlen(field) + 1;
3556
3557 return strndup((const char*) data + d, size - d);
3558 }
3559
3560 _public_ int sd_journal_get_catalog(sd_journal *j, char **ret) {
3561 const void *data;
3562 size_t size;
3563 sd_id128_t id;
3564 _cleanup_free_ char *text = NULL, *cid = NULL;
3565 char *t;
3566 int r;
3567
3568 assert_return(j, -EINVAL);
3569 assert_return(!journal_origin_changed(j), -ECHILD);
3570 assert_return(ret, -EINVAL);
3571
3572 r = sd_journal_get_data(j, "MESSAGE_ID", &data, &size);
3573 if (r < 0)
3574 return r;
3575
3576 cid = strndup((const char*) data + 11, size - 11);
3577 if (!cid)
3578 return -ENOMEM;
3579
3580 r = sd_id128_from_string(cid, &id);
3581 if (r < 0)
3582 return r;
3583
3584 r = catalog_get(secure_getenv("SYSTEMD_CATALOG") ?: CATALOG_DATABASE, id, &text);
3585 if (r < 0)
3586 return r;
3587
3588 t = replace_var(text, lookup_field, j);
3589 if (!t)
3590 return -ENOMEM;
3591
3592 *ret = t;
3593 return 0;
3594 }
3595
3596 _public_ int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **ret) {
3597 assert_return(ret, -EINVAL);
3598
3599 return catalog_get(CATALOG_DATABASE, id, ret);
3600 }
3601
3602 _public_ int sd_journal_set_data_threshold(sd_journal *j, size_t sz) {
3603 assert_return(j, -EINVAL);
3604 assert_return(!journal_origin_changed(j), -ECHILD);
3605
3606 j->data_threshold = sz;
3607 return 0;
3608 }
3609
3610 _public_ int sd_journal_get_data_threshold(sd_journal *j, size_t *sz) {
3611 assert_return(j, -EINVAL);
3612 assert_return(!journal_origin_changed(j), -ECHILD);
3613 assert_return(sz, -EINVAL);
3614
3615 *sz = j->data_threshold;
3616 return 0;
3617 }
3618
3619 _public_ int sd_journal_has_runtime_files(sd_journal *j) {
3620 assert_return(j, -EINVAL);
3621
3622 return j->has_runtime_files;
3623 }
3624
3625 _public_ int sd_journal_has_persistent_files(sd_journal *j) {
3626 assert_return(j, -EINVAL);
3627
3628 return j->has_persistent_files;
3629 }