src/libsystemd/sd-journal/sd-journal.c

   1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
   2
   3 #include <fcntl.h>
   4 #include <poll.h>
   5 #include <stdlib.h>
   6 #include <sys/inotify.h>
   7 #include <sys/vfs.h>
   8 #include <unistd.h>
   9
  10 #include "sd-journal.h"
  11
  12 #include "alloc-util.h"
  13 #include "catalog.h"
  14 #include "dirent-util.h"
  15 #include "env-file.h"
  16 #include "escape.h"
  17 #include "extract-word.h"
  18 #include "fd-util.h"
  19 #include "fileio.h"
  20 #include "format-util.h"
  21 #include "hashmap.h"
  22 #include "hostname-util.h"
  23 #include "id128-util.h"
  24 #include "inotify-util.h"
  25 #include "io-util.h"
  26 #include "journal-def.h"
  27 #include "journal-file.h"
  28 #include "journal-internal.h"
  29 #include "list.h"
  30 #include "log.h"
  31 #include "lookup3.h"
  32 #include "nulstr-util.h"
  33 #include "origin-id.h"
  34 #include "path-util.h"
  35 #include "prioq.h"
  36 #include "replace-var.h"
  37 #include "set.h"
  38 #include "sort-util.h"
  39 #include "stat-util.h"
  40 #include "stdio-util.h"
  41 #include "string-util.h"
  42 #include "strv.h"
  43 #include "syslog-util.h"
  44 #include "time-util.h"
  45 #include "uid-classification.h"
  46
  47 #define JOURNAL_FILES_RECHECK_USEC (2 * USEC_PER_SEC)
  48
  49 /* The maximum size of variable values we'll expand in catalog entries. We bind this to PATH_MAX for now, as
  50  * we want to be able to show all officially valid paths at least */
  51 #define REPLACE_VAR_MAX PATH_MAX
  52
  53 #define DEFAULT_DATA_THRESHOLD (64*1024)
  54
  55 DEFINE_PRIVATE_ORIGIN_ID_HELPERS(sd_journal, journal);
  56
  57 static void remove_file_real(sd_journal *j, JournalFile *f);
  58 static int journal_file_read_tail_timestamp(sd_journal *j, JournalFile *f);
  59 static void journal_file_unlink_newest_by_boot_id(sd_journal *j, JournalFile *f);
  60
  61 static int journal_put_error(sd_journal *j, int r, const char *path) {
  62         _cleanup_free_ char *copy = NULL;
  63
  64         /* Memorize an error we encountered, and store which
  65          * file/directory it was generated from. Note that we store
  66          * only *one* path per error code, as the error code is the
  67          * key into the hashmap, and the path is the value. This means
  68          * we keep track only of all error kinds, but not of all error
  69          * locations. This has the benefit that the hashmap cannot
  70          * grow beyond bounds.
  71          *
  72          * We return an error here only if we didn't manage to
  73          * memorize the real error. */
  74
  75         if (r >= 0)
  76                 return r;
  77
  78         if (path) {
  79                 copy = strdup(path);
  80                 if (!copy)
  81                         return -ENOMEM;
  82         }
  83
  84         r = hashmap_ensure_put(&j->errors, &trivial_hash_ops_value_free, INT_TO_PTR(r), copy);
  85         if (r == -EEXIST)
  86                 return 0;
  87         if (r < 0)
  88                 return r;
  89
  90         TAKE_PTR(copy);
  91         return 0;
  92 }
  93
  94 static void detach_location(sd_journal *j) {
  95         JournalFile *f;
  96
  97         assert(j);
  98
  99         j->current_file = NULL;
 100         j->current_field = 0;
 101
 102         ORDERED_HASHMAP_FOREACH(f, j->files)
 103                 journal_file_reset_location(f);
 104 }
 105
 106 static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) {
 107         assert(l);
 108         assert(IN_SET(type, LOCATION_DISCRETE, LOCATION_SEEK));
 109         assert(f);
 110
 111         *l = (Location) {
 112                 .type = type,
 113                 .seqnum = le64toh(o->entry.seqnum),
 114                 .seqnum_id = f->header->seqnum_id,
 115                 .realtime = le64toh(o->entry.realtime),
 116                 .monotonic = le64toh(o->entry.monotonic),
 117                 .boot_id = o->entry.boot_id,
 118                 .xor_hash = le64toh(o->entry.xor_hash),
 119                 .seqnum_set = true,
 120                 .realtime_set = true,
 121                 .monotonic_set = true,
 122                 .xor_hash_set = true,
 123         };
 124 }
 125
 126 static void set_location(sd_journal *j, JournalFile *f, Object *o) {
 127         assert(j);
 128         assert(f);
 129         assert(o);
 130
 131         init_location(&j->current_location, LOCATION_DISCRETE, f, o);
 132
 133         j->current_file = f;
 134         j->current_field = 0;
 135
 136         /* Let f know its candidate entry was picked. */
 137         assert(f->location_type == LOCATION_SEEK);
 138         f->location_type = LOCATION_DISCRETE;
 139 }
 140
 141 static int match_is_valid(const void *data, size_t size) {
 142         const char *b = ASSERT_PTR(data);
 143
 144         if (size < 2)
 145                 return false;
 146
 147         if (((char*) data)[0] == '_' && ((char*) data)[1] == '_')
 148                 return false;
 149
 150         for (const char *p = b; p < b + size; p++) {
 151
 152                 if (*p == '=')
 153                         return p > b;
 154
 155                 if (*p == '_')
 156                         continue;
 157
 158                 if (*p >= 'A' && *p <= 'Z')
 159                         continue;
 160
 161                 if (ascii_isdigit(*p))
 162                         continue;
 163
 164                 return false;
 165         }
 166
 167         return false;
 168 }
 169
 170 static bool same_field(const void *_a, size_t s, const void *_b, size_t t) {
 171         const uint8_t *a = _a, *b = _b;
 172
 173         for (size_t j = 0; j < s && j < t; j++) {
 174
 175                 if (a[j] != b[j])
 176                         return false;
 177
 178                 if (a[j] == '=')
 179                         return true;
 180         }
 181
 182         assert_not_reached();
 183 }
 184
 185 static Match *match_new(Match *p, MatchType t) {
 186         Match *m;
 187
 188         m = new(Match, 1);
 189         if (!m)
 190                 return NULL;
 191
 192         *m = (Match) {
 193                 .type = t,
 194                 .parent = p,
 195         };
 196
 197         if (p)
 198                 LIST_PREPEND(matches, p->matches, m);
 199
 200         return m;
 201 }
 202
 203 static Match *match_free(Match *m) {
 204         assert(m);
 205
 206         while (m->matches)
 207                 match_free(m->matches);
 208
 209         if (m->parent)
 210                 LIST_REMOVE(matches, m->parent->matches, m);
 211
 212         free(m->data);
 213         return mfree(m);
 214 }
 215
 216 static Match *match_free_if_empty(Match *m) {
 217         if (!m || m->matches)
 218                 return m;
 219
 220         return match_free(m);
 221 }
 222
 223 _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) {
 224         Match *add_here = NULL, *m = NULL;
 225         uint64_t hash;
 226
 227         assert_return(j, -EINVAL);
 228         assert_return(!journal_origin_changed(j), -ECHILD);
 229         assert_return(data, -EINVAL);
 230
 231         /* If the size is unspecified, assume it's a string. Note: 0 is the public value we document for
 232          * this, for historical reasons. Internally, we pretty widely started using SIZE_MAX for this in
 233          * similar cases however, hence accept that too. And internally we actually prefer it, to make things
 234          * less surprising. */
 235         if (IN_SET(size, 0, SIZE_MAX))
 236                 size = strlen(data);
 237
 238         if (!match_is_valid(data, size))
 239                 return -EINVAL;
 240
 241         /* level 0: AND term
 242          * level 1: OR terms
 243          * level 2: AND terms
 244          * level 3: OR terms
 245          * level 4: concrete matches */
 246
 247         if (!j->level0) {
 248                 j->level0 = match_new(NULL, MATCH_AND_TERM);
 249                 if (!j->level0)
 250                         return -ENOMEM;
 251         }
 252
 253         if (!j->level1) {
 254                 j->level1 = match_new(j->level0, MATCH_OR_TERM);
 255                 if (!j->level1)
 256                         return -ENOMEM;
 257         }
 258
 259         if (!j->level2) {
 260                 j->level2 = match_new(j->level1, MATCH_AND_TERM);
 261                 if (!j->level2)
 262                         return -ENOMEM;
 263         }
 264
 265         assert(j->level0->type == MATCH_AND_TERM);
 266         assert(j->level1->type == MATCH_OR_TERM);
 267         assert(j->level2->type == MATCH_AND_TERM);
 268
 269         /* Old-style Jenkins (unkeyed) hashing only here. We do not cover new-style siphash (keyed) hashing
 270          * here, since it's different for each file, and thus can't be pre-calculated in the Match object. */
 271         hash = jenkins_hash64(data, size);
 272
 273         LIST_FOREACH(matches, l3, j->level2->matches) {
 274                 assert(l3->type == MATCH_OR_TERM);
 275
 276                 LIST_FOREACH(matches, l4, l3->matches) {
 277                         assert(l4->type == MATCH_DISCRETE);
 278
 279                         /* Exactly the same match already? Then ignore
 280                          * this addition */
 281                         if (l4->hash == hash &&
 282                             l4->size == size &&
 283                             memcmp(l4->data, data, size) == 0)
 284                                 return 0;
 285
 286                         /* Same field? Then let's add this to this OR term */
 287                         if (same_field(data, size, l4->data, l4->size)) {
 288                                 add_here = l3;
 289                                 break;
 290                         }
 291                 }
 292
 293                 if (add_here)
 294                         break;
 295         }
 296
 297         if (!add_here) {
 298                 add_here = match_new(j->level2, MATCH_OR_TERM);
 299                 if (!add_here)
 300                         goto fail;
 301         }
 302
 303         m = match_new(add_here, MATCH_DISCRETE);
 304         if (!m)
 305                 goto fail;
 306
 307         m->hash = hash;
 308         m->size = size;
 309         m->data = memdup(data, size);
 310         if (!m->data)
 311                 goto fail;
 312
 313         detach_location(j);
 314
 315         return 0;
 316
 317 fail:
 318         match_free(m);
 319         match_free_if_empty(add_here);
 320         j->level2 = match_free_if_empty(j->level2);
 321         j->level1 = match_free_if_empty(j->level1);
 322         j->level0 = match_free_if_empty(j->level0);
 323
 324         return -ENOMEM;
 325 }
 326
 327 int journal_add_match_pair(sd_journal *j, const char *field, const char *value) {
 328         _cleanup_free_ char *s = NULL;
 329
 330         assert(j);
 331         assert(field);
 332         assert(value);
 333
 334         s = strjoin(field, "=", value);
 335         if (!s)
 336                 return -ENOMEM;
 337
 338         return sd_journal_add_match(j, s, SIZE_MAX);
 339 }
 340
 341 int journal_add_matchf(sd_journal *j, const char *format, ...) {
 342         _cleanup_free_ char *s = NULL;
 343         va_list ap;
 344         int r;
 345
 346         assert(j);
 347         assert(format);
 348
 349         va_start(ap, format);
 350         r = vasprintf(&s, format, ap);
 351         va_end(ap);
 352         if (r < 0)
 353                 return -ENOMEM;
 354
 355         return sd_journal_add_match(j, s, SIZE_MAX);
 356 }
 357
 358 _public_ int sd_journal_add_conjunction(sd_journal *j) {
 359         assert_return(j, -EINVAL);
 360         assert_return(!journal_origin_changed(j), -ECHILD);
 361
 362         if (!j->level0)
 363                 return 0;
 364
 365         if (!j->level1)
 366                 return 0;
 367
 368         if (!j->level1->matches)
 369                 return 0;
 370
 371         j->level1 = NULL;
 372         j->level2 = NULL;
 373
 374         return 0;
 375 }
 376
 377 _public_ int sd_journal_add_disjunction(sd_journal *j) {
 378         assert_return(j, -EINVAL);
 379         assert_return(!journal_origin_changed(j), -ECHILD);
 380
 381         if (!j->level0)
 382                 return 0;
 383
 384         if (!j->level1)
 385                 return 0;
 386
 387         if (!j->level2)
 388                 return 0;
 389
 390         if (!j->level2->matches)
 391                 return 0;
 392
 393         j->level2 = NULL;
 394         return 0;
 395 }
 396
 397 static char *match_make_string(Match *m) {
 398         _cleanup_free_ char *p = NULL;
 399         bool enclose = false;
 400
 401         if (!m)
 402                 return strdup("none");
 403
 404         if (m->type == MATCH_DISCRETE)
 405                 return cescape_length(m->data, m->size);
 406
 407         LIST_FOREACH(matches, i, m->matches) {
 408                 _cleanup_free_ char *t = NULL;
 409
 410                 t = match_make_string(i);
 411                 if (!t)
 412                         return NULL;
 413
 414                 if (p) {
 415                         if (!strextend(&p, m->type == MATCH_OR_TERM ? " OR " : " AND ", t))
 416                                 return NULL;
 417
 418                         enclose = true;
 419                 } else
 420                         p = TAKE_PTR(t);
 421         }
 422
 423         if (enclose)
 424                 return strjoin("(", p, ")");
 425
 426         return TAKE_PTR(p);
 427 }
 428
 429 char* journal_make_match_string(sd_journal *j) {
 430         assert(j);
 431
 432         return match_make_string(j->level0);
 433 }
 434
 435 _public_ void sd_journal_flush_matches(sd_journal *j) {
 436         if (!j || journal_origin_changed(j))
 437                 return;
 438
 439         if (j->level0)
 440                 match_free(j->level0);
 441
 442         j->level0 = j->level1 = j->level2 = NULL;
 443
 444         detach_location(j);
 445 }
 446
 447 static int newest_by_boot_id_compare(const NewestByBootId *a, const NewestByBootId *b) {
 448         return id128_compare_func(&a->boot_id, &b->boot_id);
 449 }
 450
 451 static void journal_file_unlink_newest_by_boot_id(sd_journal *j, JournalFile *f) {
 452         NewestByBootId *found;
 453
 454         assert(j);
 455         assert(f);
 456
 457         if (f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL) /* not linked currently, hence this is a NOP */
 458                 return;
 459
 460         found = typesafe_bsearch(&(NewestByBootId) { .boot_id = f->newest_boot_id },
 461                                  j->newest_by_boot_id, j->n_newest_by_boot_id, newest_by_boot_id_compare);
 462         assert(found);
 463
 464         assert_se(prioq_remove(found->prioq, f, &f->newest_boot_id_prioq_idx) > 0);
 465         f->newest_boot_id_prioq_idx = PRIOQ_IDX_NULL;
 466
 467         /* The prioq may be empty, but that should not cause any issue. Let's keep it. */
 468 }
 469
 470 static void journal_clear_newest_by_boot_id(sd_journal *j) {
 471         FOREACH_ARRAY(i, j->newest_by_boot_id, j->n_newest_by_boot_id) {
 472                 JournalFile *f;
 473
 474                 while ((f = prioq_peek(i->prioq)))
 475                         journal_file_unlink_newest_by_boot_id(j, f);
 476
 477                 prioq_free(i->prioq);
 478         }
 479
 480         j->newest_by_boot_id = mfree(j->newest_by_boot_id);
 481         j->n_newest_by_boot_id = 0;
 482 }
 483
 484 static int journal_file_newest_monotonic_compare(const void *a, const void *b) {
 485         const JournalFile *x = a, *y = b;
 486
 487         return -CMP(x->newest_monotonic_usec, y->newest_monotonic_usec); /* Invert order, we want newest first! */
 488 }
 489
 490 static int journal_file_reshuffle_newest_by_boot_id(sd_journal *j, JournalFile *f) {
 491         NewestByBootId *found;
 492         int r;
 493
 494         assert(j);
 495         assert(f);
 496
 497         found = typesafe_bsearch(&(NewestByBootId) { .boot_id = f->newest_boot_id },
 498                                  j->newest_by_boot_id, j->n_newest_by_boot_id, newest_by_boot_id_compare);
 499         if (found) {
 500                 /* There's already a priority queue for this boot ID */
 501
 502                 if (f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL) {
 503                         r = prioq_put(found->prioq, f, &f->newest_boot_id_prioq_idx); /* Insert if we aren't in there yet */
 504                         if (r < 0)
 505                                 return r;
 506                 } else
 507                         prioq_reshuffle(found->prioq, f, &f->newest_boot_id_prioq_idx); /* Reshuffle otherwise */
 508
 509         } else {
 510                 _cleanup_(prioq_freep) Prioq *q = NULL;
 511
 512                 /* No priority queue yet, then allocate one */
 513
 514                 assert(f->newest_boot_id_prioq_idx == PRIOQ_IDX_NULL); /* we can't be a member either */
 515
 516                 q = prioq_new(journal_file_newest_monotonic_compare);
 517                 if (!q)
 518                         return -ENOMEM;
 519
 520                 r = prioq_put(q, f, &f->newest_boot_id_prioq_idx);
 521                 if (r < 0)
 522                         return r;
 523
 524                 if (!GREEDY_REALLOC(j->newest_by_boot_id, j->n_newest_by_boot_id + 1)) {
 525                         f->newest_boot_id_prioq_idx = PRIOQ_IDX_NULL;
 526                         return -ENOMEM;
 527                 }
 528
 529                 j->newest_by_boot_id[j->n_newest_by_boot_id++] = (NewestByBootId) {
 530                         .boot_id = f->newest_boot_id,
 531                         .prioq = TAKE_PTR(q),
 532                 };
 533
 534                 typesafe_qsort(j->newest_by_boot_id, j->n_newest_by_boot_id, newest_by_boot_id_compare);
 535         }
 536
 537         return 0;
 538 }
 539
 540 static int journal_file_find_newest_for_boot_id(
 541                 sd_journal *j,
 542                 sd_id128_t id,
 543                 JournalFile **ret) {
 544
 545         JournalFile *prev = NULL;
 546         int r;
 547
 548         assert(j);
 549         assert(ret);
 550
 551         /* Before we use it, let's refresh the timestamp from the header, and reshuffle our prioq
 552          * accordingly. We do this only a bunch of times, to not be caught in some update loop. */
 553         for (unsigned n_tries = 0;; n_tries++) {
 554                 NewestByBootId *found;
 555                 JournalFile *f;
 556
 557                 found = typesafe_bsearch(&(NewestByBootId) { .boot_id = id },
 558                                          j->newest_by_boot_id, j->n_newest_by_boot_id, newest_by_boot_id_compare);
 559
 560                 f = found ? prioq_peek(found->prioq) : NULL;
 561                 if (!f)
 562                         return log_debug_errno(SYNTHETIC_ERRNO(ENODATA),
 563                                                "Requested delta for boot ID %s, but we have no information about that boot ID.", SD_ID128_TO_STRING(id));
 564
 565                 if (f == prev || n_tries >= 5) {
 566                         /* This was already the best answer in the previous run, or we tried too often, use it */
 567                         *ret = f;
 568                         return 0;
 569                 }
 570
 571                 prev = f;
 572
 573                 /* Let's read the journal file's current timestamp once, before we return it, maybe it has changed. */
 574                 r = journal_file_read_tail_timestamp(j, f);
 575                 if (r < 0)
 576                         return log_debug_errno(r, "Failed to read tail timestamp while trying to find newest journal file for boot ID %s.", SD_ID128_TO_STRING(id));
 577                 if (r == 0) {
 578                         /* No new entry found. */
 579                         *ret = f;
 580                         return 0;
 581                 }
 582
 583                 /* Refreshing the timestamp we read might have reshuffled the prioq, hence let's check the
 584                  * prioq again and only use the information once we reached an equilibrium or hit a limit */
 585         }
 586 }
 587
 588 static int compare_boot_ids(sd_journal *j, sd_id128_t a, sd_id128_t b) {
 589         JournalFile *x, *y;
 590
 591         assert(j);
 592
 593         /* Try to find the newest open journal file for the two boot ids */
 594         if (journal_file_find_newest_for_boot_id(j, a, &x) < 0 ||
 595             journal_file_find_newest_for_boot_id(j, b, &y) < 0)
 596                 return 0;
 597
 598         /* Only compare the boot id timestamps if they originate from the same machine. If they are from
 599          * different machines, then we timestamps of the boot ids might be as off as the timestamps on the
 600          * entries and hence not useful for comparing. */
 601         if (!sd_id128_equal(x->newest_machine_id, y->newest_machine_id))
 602                 return 0;
 603
 604         return CMP(x->newest_realtime_usec, y->newest_realtime_usec);
 605 }
 606
 607 static int compare_with_location(
 608                 sd_journal *j,
 609                 const JournalFile *f,
 610                 const Location *l,
 611                 const JournalFile *current_file) {
 612         int r;
 613
 614         assert(j);
 615         assert(f);
 616         assert(l);
 617         assert(f->location_type == LOCATION_SEEK);
 618         assert(IN_SET(l->type, LOCATION_DISCRETE, LOCATION_SEEK));
 619
 620         if (l->monotonic_set &&
 621             sd_id128_equal(f->current_boot_id, l->boot_id) &&
 622             l->realtime_set &&
 623             f->current_realtime == l->realtime &&
 624             l->xor_hash_set &&
 625             f->current_xor_hash == l->xor_hash &&
 626             l->seqnum_set &&
 627             sd_id128_equal(f->header->seqnum_id, l->seqnum_id) &&
 628             f->current_seqnum == l->seqnum &&
 629             f != current_file)
 630                 return 0;
 631
 632         if (l->seqnum_set &&
 633             sd_id128_equal(f->header->seqnum_id, l->seqnum_id)) {
 634                 r = CMP(f->current_seqnum, l->seqnum);
 635                 if (r != 0)
 636                         return r;
 637         }
 638
 639         if (l->monotonic_set) {
 640                 /* If both arguments have the same boot ID, then we can compare the monotonic timestamps. If
 641                  * they are distinct, then we might able to lookup the timestamps of those boot IDs (if they
 642                  * are from the same machine) and order by that. */
 643                 if (sd_id128_equal(f->current_boot_id, l->boot_id))
 644                         r = CMP(f->current_monotonic, l->monotonic);
 645                 else
 646                         r = compare_boot_ids(j, f->current_boot_id, l->boot_id);
 647                 if (r != 0)
 648                         return r;
 649         }
 650
 651         if (l->realtime_set) {
 652                 r = CMP(f->current_realtime, l->realtime);
 653                 if (r != 0)
 654                         return r;
 655         }
 656
 657         if (l->xor_hash_set) {
 658                 r = CMP(f->current_xor_hash, l->xor_hash);
 659                 if (r != 0)
 660                         return r;
 661         }
 662
 663         return 0;
 664 }
 665
 666 static int next_for_match(
 667                 sd_journal *j,
 668                 Match *m,
 669                 JournalFile *f,
 670                 uint64_t after_offset,
 671                 direction_t direction,
 672                 Object **ret,
 673                 uint64_t *ret_offset) {
 674
 675         int r;
 676         uint64_t np = 0;
 677
 678         assert(j);
 679         assert(m);
 680         assert(f);
 681
 682         if (m->type == MATCH_DISCRETE) {
 683                 Object *d;
 684                 uint64_t hash;
 685
 686                 /* If the keyed hash logic is used, we need to calculate the hash fresh per file. Otherwise
 687                  * we can use what we pre-calculated. */
 688                 if (JOURNAL_HEADER_KEYED_HASH(f->header))
 689                         hash = journal_file_hash_data(f, m->data, m->size);
 690                 else
 691                         hash = m->hash;
 692
 693                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, &d, NULL);
 694                 if (r <= 0)
 695                         return r;
 696
 697                 return journal_file_move_to_entry_by_offset_for_data(f, d, after_offset, direction, ret, ret_offset);
 698
 699         } else if (m->type == MATCH_OR_TERM) {
 700
 701                 /* Find the earliest match beyond after_offset */
 702
 703                 LIST_FOREACH(matches, i, m->matches) {
 704                         uint64_t cp;
 705
 706                         r = next_for_match(j, i, f, after_offset, direction, NULL, &cp);
 707                         if (r < 0)
 708                                 return r;
 709                         else if (r > 0) {
 710                                 if (np == 0 || (direction == DIRECTION_DOWN ? cp < np : cp > np))
 711                                         np = cp;
 712                         }
 713                 }
 714
 715                 if (np == 0)
 716                         return 0;
 717
 718         } else if (m->type == MATCH_AND_TERM) {
 719                 Match *last_moved;
 720
 721                 /* Always jump to the next matching entry and repeat
 722                  * this until we find an offset that matches for all
 723                  * matches. */
 724
 725                 if (!m->matches)
 726                         return 0;
 727
 728                 r = next_for_match(j, m->matches, f, after_offset, direction, NULL, &np);
 729                 if (r <= 0)
 730                         return r;
 731
 732                 assert(direction == DIRECTION_DOWN ? np >= after_offset : np <= after_offset);
 733                 last_moved = m->matches;
 734
 735                 LIST_LOOP_BUT_ONE(matches, i, m->matches, last_moved) {
 736                         uint64_t cp;
 737
 738                         r = next_for_match(j, i, f, np, direction, NULL, &cp);
 739                         if (r <= 0)
 740                                 return r;
 741
 742                         assert(direction == DIRECTION_DOWN ? cp >= np : cp <= np);
 743                         if (direction == DIRECTION_DOWN ? cp > np : cp < np) {
 744                                 np = cp;
 745                                 last_moved = i;
 746                         }
 747                 }
 748         }
 749
 750         assert(np > 0);
 751
 752         if (ret) {
 753                 r = journal_file_move_to_object(f, OBJECT_ENTRY, np, ret);
 754                 if (r < 0)
 755                         return r;
 756         }
 757
 758         if (ret_offset)
 759                 *ret_offset = np;
 760
 761         return 1;
 762 }
 763
 764 static int move_by_boot_for_data(
 765                 sd_journal *j,
 766                 JournalFile *f,
 767                 direction_t direction,
 768                 sd_id128_t boot_id,
 769                 uint64_t data_offset,
 770                 Object **ret,
 771                 uint64_t *ret_offset) {
 772
 773         int r;
 774
 775         assert(j);
 776         assert(f);
 777         assert(IN_SET(direction, DIRECTION_DOWN, DIRECTION_UP));
 778
 779         for (;;) {
 780                 /* First, move to the last (or first when DIRECTION_UP) entry for the boot. */
 781                 uint64_t p = 0;
 782                 r = journal_file_move_to_entry_by_monotonic(f, boot_id,
 783                                                             direction == DIRECTION_DOWN ? USEC_INFINITY : 0,
 784                                                             direction == DIRECTION_DOWN ? DIRECTION_UP : DIRECTION_DOWN,
 785                                                             NULL, &p);
 786                 if (r <= 0)
 787                         return r;
 788
 789                 /* Then, move to the first entry of the next boot (or the last entry of the previous boot with DIRECTION_UP). */
 790                 Object *entry;
 791                 r = journal_file_next_entry(f, p, direction, &entry, NULL);
 792                 if (r <= 0) /* r == 0 means that no next (or previous) boot found. That is, we are at HEAD or TAIL now. */
 793                         return r;
 794
 795                 assert(entry->object.type == OBJECT_ENTRY);
 796                 boot_id = entry->entry.boot_id;
 797
 798                 /* Note, this object cannot be reused, as journal_file_move_to_entry_by_monotonic() may invalidate the object. */
 799                 Object *data;
 800                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &data);
 801                 if (r < 0)
 802                         return r;
 803
 804                 /* Then, move to the matching entry. */
 805                 r = journal_file_move_to_entry_by_monotonic_for_data(f, data, boot_id,
 806                                                                      direction == DIRECTION_DOWN ? 0 : USEC_INFINITY, direction,
 807                                                                      ret, ret_offset);
 808                 if (r != 0) /* Here r == 0 is OK, as that means the boot contains no entry matching with the data. */
 809                         return r;
 810         }
 811 }
 812
 813 static int find_location_for_match(
 814                 sd_journal *j,
 815                 Match *m,
 816                 JournalFile *f,
 817                 direction_t direction,
 818                 Object **ret,
 819                 uint64_t *ret_offset) {
 820
 821         int r;
 822
 823         assert(j);
 824         assert(m);
 825         assert(f);
 826
 827         if (m->type == MATCH_DISCRETE) {
 828                 Object *d;
 829                 uint64_t dp, hash;
 830
 831                 if (JOURNAL_HEADER_KEYED_HASH(f->header))
 832                         hash = journal_file_hash_data(f, m->data, m->size);
 833                 else
 834                         hash = m->hash;
 835
 836                 r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, &d, &dp);
 837                 if (r <= 0)
 838                         return r;
 839
 840                 if (j->current_location.type == LOCATION_HEAD)
 841                         return direction == DIRECTION_DOWN ? journal_file_move_to_entry_for_data(f, d, DIRECTION_DOWN, ret, ret_offset) : 0;
 842                 if (j->current_location.type == LOCATION_TAIL)
 843                         return direction == DIRECTION_UP ? journal_file_move_to_entry_for_data(f, d, DIRECTION_UP, ret, ret_offset) : 0;
 844                 if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
 845                         return journal_file_move_to_entry_by_seqnum_for_data(f, d, j->current_location.seqnum, direction, ret, ret_offset);
 846                 if (j->current_location.monotonic_set) {
 847                         r = journal_file_move_to_entry_by_monotonic_for_data(f, d, j->current_location.boot_id, j->current_location.monotonic, direction, ret, ret_offset);
 848                         if (r != 0)
 849                                 return r;
 850
 851                         /* The data object might have been invalidated. */
 852                         r = journal_file_move_to_object(f, OBJECT_DATA, dp, &d);
 853                         if (r < 0)
 854                                 return r;
 855
 856                         /* If not found, fall back to realtime if set, or go to the first entry of the next boot
 857                          * (or the last entry of the previous boot when DIRECTION_UP). */
 858                 }
 859                 if (j->current_location.realtime_set)
 860                         return journal_file_move_to_entry_by_realtime_for_data(f, d, j->current_location.realtime, direction, ret, ret_offset);
 861
 862                 if (j->current_location.monotonic_set)
 863                         return move_by_boot_for_data(j, f, direction, j->current_location.boot_id, dp, ret, ret_offset);
 864
 865                 return journal_file_move_to_entry_for_data(f, d, direction, ret, ret_offset);
 866
 867         } else if (m->type == MATCH_OR_TERM) {
 868                 uint64_t np = 0;
 869
 870                 /* Find the earliest match */
 871
 872                 LIST_FOREACH(matches, i, m->matches) {
 873                         uint64_t cp;
 874
 875                         r = find_location_for_match(j, i, f, direction, NULL, &cp);
 876                         if (r < 0)
 877                                 return r;
 878                         else if (r > 0) {
 879                                 if (np == 0 || (direction == DIRECTION_DOWN ? np > cp : np < cp))
 880                                         np = cp;
 881                         }
 882                 }
 883
 884                 if (np == 0)
 885                         return 0;
 886
 887                 if (ret) {
 888                         r = journal_file_move_to_object(f, OBJECT_ENTRY, np, ret);
 889                         if (r < 0)
 890                                 return r;
 891                 }
 892
 893                 if (ret_offset)
 894                         *ret_offset = np;
 895
 896                 return 1;
 897
 898         } else {
 899                 uint64_t np = 0;
 900
 901                 assert(m->type == MATCH_AND_TERM);
 902
 903                 /* First jump to the last match, and then find the
 904                  * next one where all matches match */
 905
 906                 if (!m->matches)
 907                         return 0;
 908
 909                 LIST_FOREACH(matches, i, m->matches) {
 910                         uint64_t cp;
 911
 912                         r = find_location_for_match(j, i, f, direction, NULL, &cp);
 913                         if (r <= 0)
 914                                 return r;
 915
 916                         if (np == 0 || (direction == DIRECTION_DOWN ? cp > np : cp < np))
 917                                 np = cp;
 918                 }
 919
 920                 return next_for_match(j, m, f, np, direction, ret, ret_offset);
 921         }
 922 }
 923
 924 static int find_location_with_matches(
 925                 sd_journal *j,
 926                 JournalFile *f,
 927                 direction_t direction,
 928                 Object **ret,
 929                 uint64_t *ret_offset) {
 930
 931         int r;
 932
 933         assert(j);
 934         assert(f);
 935
 936         if (j->level0)
 937                 return find_location_for_match(j, j->level0, f, direction, ret, ret_offset);
 938
 939         /* No matches is simple */
 940
 941         if (j->current_location.type == LOCATION_HEAD)
 942                 return direction == DIRECTION_DOWN ? journal_file_next_entry(f, 0, DIRECTION_DOWN, ret, ret_offset) : 0;
 943         if (j->current_location.type == LOCATION_TAIL)
 944                 return direction == DIRECTION_UP ? journal_file_next_entry(f, 0, DIRECTION_UP, ret, ret_offset) : 0;
 945         if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id))
 946                 return journal_file_move_to_entry_by_seqnum(f, j->current_location.seqnum, direction, ret, ret_offset);
 947         if (j->current_location.monotonic_set) {
 948                 r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id, j->current_location.monotonic, direction, ret, ret_offset);
 949                 if (r != 0)
 950                         return r;
 951
 952                 /* If not found, fall back to realtime if set, or go to the first entry of the next boot
 953                  * (or the last entry of the previous boot when DIRECTION_UP). */
 954         }
 955         if (j->current_location.realtime_set)
 956                 return journal_file_move_to_entry_by_realtime(f, j->current_location.realtime, direction, ret, ret_offset);
 957
 958         if (j->current_location.monotonic_set) {
 959                 uint64_t p = 0;
 960
 961                 /* If not found in the above, first move to the last (or first when DIRECTION_UP) entry for the boot. */
 962                 r = journal_file_move_to_entry_by_monotonic(f, j->current_location.boot_id,
 963                                                             direction == DIRECTION_DOWN ? USEC_INFINITY : 0,
 964                                                             direction == DIRECTION_DOWN ? DIRECTION_UP : DIRECTION_DOWN,
 965                                                             NULL, &p);
 966                 if (r <= 0)
 967                         return r;
 968
 969                 /* Then, move to the next or previous boot. */
 970                 return journal_file_next_entry(f, p, direction, ret, ret_offset);
 971         }
 972
 973         return journal_file_next_entry(f, 0, direction, ret, ret_offset);
 974 }
 975
 976 static int next_with_matches(
 977                 sd_journal *j,
 978                 JournalFile *f,
 979                 direction_t direction,
 980                 Object **ret,
 981                 uint64_t *ret_offset) {
 982
 983         assert(j);
 984         assert(f);
 985
 986         /* No matches is easy. We simple advance the file
 987          * pointer by one. */
 988         if (!j->level0)
 989                 return journal_file_next_entry(f, f->current_offset, direction, ret, ret_offset);
 990
 991         /* If we have a match then we look for the next matching entry
 992          * with an offset at least one step larger */
 993         return next_for_match(j, j->level0, f,
 994                               direction == DIRECTION_DOWN ? f->current_offset + 1
 995                                                           : f->current_offset - 1,
 996                               direction, ret, ret_offset);
 997 }
 998
 999 static int next_beyond_location(sd_journal *j, JournalFile *f, direction_t direction) {
1000         Object *c;
1001         uint64_t cp, n_entries;
1002         int r;
1003
1004         assert(j);
1005         assert(f);
1006
1007         (void) journal_file_read_tail_timestamp(j, f);
1008
1009         n_entries = le64toh(f->header->n_entries);
1010
1011         /* If we hit EOF before, we don't need to look into this file again
1012          * unless direction changed or new entries appeared. */
1013         if (f->last_direction == direction &&
1014             f->location_type == (direction == DIRECTION_DOWN ? LOCATION_TAIL : LOCATION_HEAD) &&
1015             n_entries == f->last_n_entries)
1016                 return 0;
1017
1018         f->last_n_entries = n_entries;
1019
1020         if (f->last_direction == direction && f->current_offset > 0) {
1021                 /* LOCATION_SEEK here means we did the work in a previous
1022                  * iteration and the current location already points to a
1023                  * candidate entry. */
1024                 if (f->location_type != LOCATION_SEEK) {
1025                         r = next_with_matches(j, f, direction, &c, &cp);
1026                         if (r <= 0)
1027                                 return r;
1028
1029                         journal_file_save_location(f, c, cp);
1030                 }
1031         } else {
1032                 r = find_location_with_matches(j, f, direction, &c, &cp);
1033                 /* LOCATION_SEEK specified to j->current_location.type here means that this is called first
1034                  * after sd_journal_seek_monotonic_usec() or friends was called. In that case, this file may
1035                  * not contain any matching entries with the user-specified location, but another file may
1036                  * contain them. If so, the second call of this function will use the seqnum, and we may find
1037                  * an entry in _this_ file with the seqnum. To prevent the second call of this function exits
1038                  * earlier by the first 'if' block of this function, do not save the direction if the current
1039                  * location is LOCATION_SEEK. */
1040                 if (r > 0 || j->current_location.type != LOCATION_SEEK)
1041                         f->last_direction = direction;
1042                 else
1043                         assert(f->last_direction == _DIRECTION_INVALID);
1044                 if (r <= 0)
1045                         return r;
1046
1047                 journal_file_save_location(f, c, cp);
1048         }
1049
1050         /* OK, we found the spot, now let's advance until an entry
1051          * that is actually different from what we were previously
1052          * looking at. This is necessary to handle entries which exist
1053          * in two (or more) journal files, and which shall all be
1054          * suppressed but one. */
1055
1056         for (;;) {
1057                 bool found;
1058
1059                 if (j->current_location.type == LOCATION_DISCRETE) {
1060                         r = compare_with_location(j, f, &j->current_location, j->current_file);
1061                         found = direction == DIRECTION_DOWN ? r > 0 : r < 0;
1062                 } else
1063                         found = true;
1064
1065                 if (found)
1066                         return 1;
1067
1068                 r = next_with_matches(j, f, direction, &c, &cp);
1069                 if (r <= 0)
1070                         return r;
1071
1072                 journal_file_save_location(f, c, cp);
1073         }
1074 }
1075
1076 static int compare_locations(sd_journal *j, JournalFile *af, JournalFile *bf) {
1077         int r;
1078
1079         assert(j);
1080         assert(af);
1081         assert(af->header);
1082         assert(bf);
1083         assert(bf->header);
1084         assert(af->location_type == LOCATION_SEEK);
1085         assert(bf->location_type == LOCATION_SEEK);
1086
1087         /* If contents, timestamps and seqnum match, these entries are identical. */
1088         if (sd_id128_equal(af->current_boot_id, bf->current_boot_id) &&
1089             af->current_monotonic == bf->current_monotonic &&
1090             af->current_realtime == bf->current_realtime &&
1091             af->current_xor_hash == bf->current_xor_hash &&
1092             sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id) &&
1093             af->current_seqnum == bf->current_seqnum)
1094                 return 0;
1095
1096         if (sd_id128_equal(af->header->seqnum_id, bf->header->seqnum_id)) {
1097                 /* If this is from the same seqnum source, compare seqnums */
1098                 r = CMP(af->current_seqnum, bf->current_seqnum);
1099                 if (r != 0)
1100                         return r;
1101
1102                 /* Wow! This is weird, different data but the same seqnums? Something is borked, but let's
1103                  * make the best of it and compare by time. */
1104         }
1105
1106         if (sd_id128_equal(af->current_boot_id, bf->current_boot_id))
1107                 /* If the boot id matches, compare monotonic time */
1108                 r = CMP(af->current_monotonic, bf->current_monotonic);
1109         else
1110                 /* If they don't match try to compare boot IDs */
1111                 r = compare_boot_ids(j, af->current_boot_id, bf->current_boot_id);
1112         if (r != 0)
1113                 return r;
1114
1115         /* Otherwise, compare UTC time */
1116         r = CMP(af->current_realtime, bf->current_realtime);
1117         if (r != 0)
1118                 return r;
1119
1120         /* Finally, compare by contents */
1121         return CMP(af->current_xor_hash, bf->current_xor_hash);
1122 }
1123
1124 static int real_journal_next(sd_journal *j, direction_t direction) {
1125         JournalFile *new_file = NULL;
1126         unsigned n_files;
1127         const void **files;
1128         Object *o;
1129         int r;
1130
1131         assert_return(j, -EINVAL);
1132         assert_return(!journal_origin_changed(j), -ECHILD);
1133
1134         r = iterated_cache_get(j->files_cache, NULL, &files, &n_files);
1135         if (r < 0)
1136                 return r;
1137
1138         FOREACH_ARRAY(_f, files, n_files) {
1139                 JournalFile *f = (JournalFile*) *_f;
1140                 bool found;
1141
1142                 r = next_beyond_location(j, f, direction);
1143                 if (r < 0) {
1144                         log_debug_errno(r, "Can't iterate through %s, ignoring: %m", f->path);
1145                         remove_file_real(j, f);
1146                         continue;
1147                 } else if (r == 0) {
1148                         f->location_type = direction == DIRECTION_DOWN ? LOCATION_TAIL : LOCATION_HEAD;
1149                         continue;
1150                 }
1151
1152                 if (!new_file)
1153                         found = true;
1154                 else {
1155                         r = compare_locations(j, f, new_file);
1156                         found = direction == DIRECTION_DOWN ? r < 0 : r > 0;
1157                 }
1158
1159                 if (found)
1160                         new_file = f;
1161         }
1162
1163         if (!new_file)
1164                 return 0;
1165
1166         r = journal_file_move_to_object(new_file, OBJECT_ENTRY, new_file->current_offset, &o);
1167         if (r < 0)
1168                 return r;
1169
1170         set_location(j, new_file, o);
1171
1172         return 1;
1173 }
1174
1175 _public_ int sd_journal_next(sd_journal *j) {
1176         return real_journal_next(j, DIRECTION_DOWN);
1177 }
1178
1179 _public_ int sd_journal_previous(sd_journal *j) {
1180         return real_journal_next(j, DIRECTION_UP);
1181 }
1182
1183 _public_ int sd_journal_step_one(sd_journal *j, int advanced) {
1184         assert_return(j, -EINVAL);
1185
1186         if (j->current_location.type == LOCATION_HEAD)
1187                 return sd_journal_next(j);
1188         if (j->current_location.type == LOCATION_TAIL)
1189                 return sd_journal_previous(j);
1190         return real_journal_next(j, advanced ? DIRECTION_DOWN : DIRECTION_UP);
1191 }
1192
1193 static int real_journal_next_skip(sd_journal *j, direction_t direction, uint64_t skip) {
1194         int c = 0, r;
1195
1196         assert_return(j, -EINVAL);
1197         assert_return(!journal_origin_changed(j), -ECHILD);
1198         assert_return(skip <= INT_MAX, -ERANGE);
1199
1200         if (skip == 0) {
1201                 /* If this is not a discrete skip, then at least
1202                  * resolve the current location */
1203                 if (j->current_location.type != LOCATION_DISCRETE) {
1204                         r = real_journal_next(j, direction);
1205                         if (r < 0)
1206                                 return r;
1207                 }
1208
1209                 return 0;
1210         }
1211
1212         do {
1213                 r = real_journal_next(j, direction);
1214                 if (r < 0)
1215                         return r;
1216
1217                 if (r == 0)
1218                         return c;
1219
1220                 skip--;
1221                 c++;
1222         } while (skip > 0);
1223
1224         return c;
1225 }
1226
1227 _public_ int sd_journal_next_skip(sd_journal *j, uint64_t skip) {
1228         return real_journal_next_skip(j, DIRECTION_DOWN, skip);
1229 }
1230
1231 _public_ int sd_journal_previous_skip(sd_journal *j, uint64_t skip) {
1232         return real_journal_next_skip(j, DIRECTION_UP, skip);
1233 }
1234
1235 _public_ int sd_journal_get_cursor(sd_journal *j, char **ret_cursor) {
1236         Object *o;
1237         int r;
1238
1239         assert_return(j, -EINVAL);
1240         assert_return(!journal_origin_changed(j), -ECHILD);
1241
1242         if (!j->current_file || j->current_file->current_offset <= 0)
1243                 return -EADDRNOTAVAIL;
1244
1245         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
1246         if (r < 0)
1247                 return r;
1248
1249         if (!ret_cursor)
1250                 return 0;
1251
1252         if (asprintf(ret_cursor,
1253                      "s=%s;i=%"PRIx64";b=%s;m=%"PRIx64";t=%"PRIx64";x=%"PRIx64,
1254                      SD_ID128_TO_STRING(j->current_file->header->seqnum_id), le64toh(o->entry.seqnum),
1255                      SD_ID128_TO_STRING(o->entry.boot_id), le64toh(o->entry.monotonic),
1256                      le64toh(o->entry.realtime),
1257                      le64toh(o->entry.xor_hash)) < 0)
1258                 return -ENOMEM;
1259
1260         return 0;
1261 }
1262
1263 _public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
1264         unsigned long long seqnum, monotonic, realtime, xor_hash;
1265         bool seqnum_id_set = false,
1266              seqnum_set = false,
1267              boot_id_set = false,
1268              monotonic_set = false,
1269              realtime_set = false,
1270              xor_hash_set = false;
1271         sd_id128_t seqnum_id, boot_id;
1272         int r;
1273
1274         assert_return(j, -EINVAL);
1275         assert_return(!journal_origin_changed(j), -ECHILD);
1276         assert_return(!isempty(cursor), -EINVAL);
1277
1278         for (const char *p = cursor;;) {
1279                 _cleanup_free_ char *word = NULL;
1280
1281                 r = extract_first_word(&p, &word, ";", EXTRACT_DONT_COALESCE_SEPARATORS);
1282                 if (r < 0)
1283                         return r;
1284                 if (r == 0)
1285                         break;
1286
1287                 if (word[0] == '\0' || word[1] != '=')
1288                         return -EINVAL;
1289
1290                 switch (word[0]) {
1291                 case 's':
1292                         seqnum_id_set = true;
1293                         r = sd_id128_from_string(word + 2, &seqnum_id);
1294                         if (r < 0)
1295                                 return r;
1296                         break;
1297
1298                 case 'i':
1299                         seqnum_set = true;
1300                         if (sscanf(word + 2, "%llx", &seqnum) != 1)
1301                                 return -EINVAL;
1302                         break;
1303
1304                 case 'b':
1305                         boot_id_set = true;
1306                         r = sd_id128_from_string(word + 2, &boot_id);
1307                         if (r < 0)
1308                                 return r;
1309                         break;
1310
1311                 case 'm':
1312                         monotonic_set = true;
1313                         if (sscanf(word + 2, "%llx", &monotonic) != 1)
1314                                 return -EINVAL;
1315                         break;
1316
1317                 case 't':
1318                         realtime_set = true;
1319                         if (sscanf(word + 2, "%llx", &realtime) != 1)
1320                                 return -EINVAL;
1321                         break;
1322
1323                 case 'x':
1324                         xor_hash_set = true;
1325                         if (sscanf(word + 2, "%llx", &xor_hash) != 1)
1326                                 return -EINVAL;
1327                         break;
1328                 }
1329         }
1330
1331         if ((!seqnum_set || !seqnum_id_set) &&
1332             (!monotonic_set || !boot_id_set) &&
1333             !realtime_set)
1334                 return -EINVAL;
1335
1336         detach_location(j);
1337         j->current_location = (Location) {
1338                 .type = LOCATION_SEEK,
1339         };
1340
1341         if (realtime_set) {
1342                 j->current_location.realtime = (uint64_t) realtime;
1343                 j->current_location.realtime_set = true;
1344         }
1345
1346         if (seqnum_set && seqnum_id_set) {
1347                 j->current_location.seqnum = (uint64_t) seqnum;
1348                 j->current_location.seqnum_id = seqnum_id;
1349                 j->current_location.seqnum_set = true;
1350         }
1351
1352         if (monotonic_set && boot_id_set) {
1353                 j->current_location.monotonic = (uint64_t) monotonic;
1354                 j->current_location.boot_id = boot_id;
1355                 j->current_location.monotonic_set = true;
1356         }
1357
1358         if (xor_hash_set) {
1359                 j->current_location.xor_hash = (uint64_t) xor_hash;
1360                 j->current_location.xor_hash_set = true;
1361         }
1362
1363         return 0;
1364 }
1365
1366 _public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) {
1367         int r;
1368         Object *o;
1369
1370         assert_return(j, -EINVAL);
1371         assert_return(!journal_origin_changed(j), -ECHILD);
1372         assert_return(!isempty(cursor), -EINVAL);
1373
1374         if (!j->current_file || j->current_file->current_offset <= 0)
1375                 return -EADDRNOTAVAIL;
1376
1377         r = journal_file_move_to_object(j->current_file, OBJECT_ENTRY, j->current_file->current_offset, &o);
1378         if (r < 0)
1379                 return r;
1380
1381         for (;;) {
1382                 _cleanup_free_ char *item = NULL;
1383                 unsigned long long ll;
1384                 sd_id128_t id;
1385
1386                 r = extract_first_word(&cursor, &item, ";", EXTRACT_DONT_COALESCE_SEPARATORS);
1387                 if (r < 0)
1388                         return r;
1389
1390                 if (r == 0)
1391                         break;
1392
1393                 if (strlen(item) < 2 || item[1] != '=')
1394                         return -EINVAL;
1395
1396                 switch (item[0]) {
1397
1398                 case 's':
1399                         r = sd_id128_from_string(item+2, &id);
1400                         if (r < 0)
1401                                 return r;
1402                         if (!sd_id128_equal(id, j->current_file->header->seqnum_id))
1403                                 return 0;
1404                         break;
1405
1406                 case 'i':
1407                         if (sscanf(item+2, "%llx", &ll) != 1)
1408                                 return -EINVAL;
1409                         if (ll != le64toh(o->entry.seqnum))
1410                                 return 0;
1411                         break;
1412
1413                 case 'b':
1414                         r = sd_id128_from_string(item+2, &id);
1415                         if (r < 0)
1416                                 return r;
1417                         if (!sd_id128_equal(id, o->entry.boot_id))
1418                                 return 0;
1419                         break;
1420
1421                 case 'm':
1422                         if (sscanf(item+2, "%llx", &ll) != 1)
1423                                 return -EINVAL;
1424                         if (ll != le64toh(o->entry.monotonic))
1425                                 return 0;
1426                         break;
1427
1428                 case 't':
1429                         if (sscanf(item+2, "%llx", &ll) != 1)
1430                                 return -EINVAL;
1431                         if (ll != le64toh(o->entry.realtime))
1432                                 return 0;
1433                         break;
1434
1435                 case 'x':
1436                         if (sscanf(item+2, "%llx", &ll) != 1)
1437                                 return -EINVAL;
1438                         if (ll != le64toh(o->entry.xor_hash))
1439                                 return 0;
1440                         break;
1441                 }
1442         }
1443
1444         return 1;
1445 }
1446
1447 _public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) {
1448         assert_return(j, -EINVAL);
1449         assert_return(!journal_origin_changed(j), -ECHILD);
1450
1451         detach_location(j);
1452
1453         j->current_location = (Location) {
1454                 .type = LOCATION_SEEK,
1455                 .boot_id = boot_id,
1456                 .monotonic = usec,
1457                 .monotonic_set = true,
1458         };
1459
1460         return 0;
1461 }
1462
1463 _public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
1464         assert_return(j, -EINVAL);
1465         assert_return(!journal_origin_changed(j), -ECHILD);
1466
1467         detach_location(j);
1468
1469         j->current_location = (Location) {
1470                 .type = LOCATION_SEEK,
1471                 .realtime = usec,
1472                 .realtime_set = true,
1473         };
1474
1475         return 0;
1476 }
1477
1478 _public_ int sd_journal_seek_head(sd_journal *j) {
1479         assert_return(j, -EINVAL);
1480         assert_return(!journal_origin_changed(j), -ECHILD);
1481
1482         detach_location(j);
1483
1484         j->current_location = (Location) {
1485                 .type = LOCATION_HEAD,
1486         };
1487
1488         return 0;
1489 }
1490
1491 _public_ int sd_journal_seek_tail(sd_journal *j) {
1492         assert_return(j, -EINVAL);
1493         assert_return(!journal_origin_changed(j), -ECHILD);
1494
1495         detach_location(j);
1496
1497         j->current_location = (Location) {
1498                 .type = LOCATION_TAIL,
1499         };
1500
1501         return 0;
1502 }
1503
1504 static void check_network(sd_journal *j, int fd) {
1505         assert(j);
1506
1507         if (j->on_network)
1508                 return;
1509
1510         j->on_network = fd_is_network_fs(fd);
1511 }
1512
1513 static bool file_has_type_prefix(const char *prefix, const char *filename) {
1514         const char *full, *tilded, *atted;
1515
1516         full = strjoina(prefix, ".journal");
1517         tilded = strjoina(full, "~");
1518         atted = strjoina(prefix, "@");
1519
1520         return STR_IN_SET(filename, full, tilded) ||
1521                startswith(filename, atted);
1522 }
1523
1524 static bool file_type_wanted(int flags, const char *filename) {
1525         assert(filename);
1526
1527         if (!ENDSWITH_SET(filename, ".journal", ".journal~"))
1528                 return false;
1529
1530         /* no flags set → every type is OK */
1531         if (!(flags & (SD_JOURNAL_SYSTEM | SD_JOURNAL_CURRENT_USER)))
1532                 return true;
1533
1534         if (FLAGS_SET(flags, SD_JOURNAL_CURRENT_USER)) {
1535                 char prefix[5 + DECIMAL_STR_MAX(uid_t) + 1];
1536
1537                 xsprintf(prefix, "user-" UID_FMT, getuid());
1538
1539                 if (file_has_type_prefix(prefix, filename))
1540                         return true;
1541
1542                 /* If SD_JOURNAL_CURRENT_USER is specified and we are invoked under a system UID, then
1543                  * automatically enable SD_JOURNAL_SYSTEM too, because journald will actually put system user
1544                  * data into the system journal. */
1545
1546                 if (uid_for_system_journal(getuid()))
1547                         flags |= SD_JOURNAL_SYSTEM;
1548         }
1549
1550         if (FLAGS_SET(flags, SD_JOURNAL_SYSTEM) && file_has_type_prefix("system", filename))
1551                 return true;
1552
1553         return false;
1554 }
1555
1556 static bool path_has_prefix(sd_journal *j, const char *path, const char *prefix) {
1557         assert(j);
1558         assert(path);
1559         assert(prefix);
1560
1561         if (j->toplevel_fd >= 0)
1562                 return false;
1563
1564         return path_startswith(path, prefix);
1565 }
1566
1567 static void track_file_disposition(sd_journal *j, JournalFile *f) {
1568         assert(j);
1569         assert(f);
1570
1571         if (!j->has_runtime_files && path_has_prefix(j, f->path, "/run"))
1572                 j->has_runtime_files = true;
1573         else if (!j->has_persistent_files && path_has_prefix(j, f->path, "/var"))
1574                 j->has_persistent_files = true;
1575 }
1576
1577 static int add_any_file(
1578                 sd_journal *j,
1579                 int fd,
1580                 const char *path) {
1581
1582         _cleanup_close_ int our_fd = -EBADF;
1583         JournalFile *f;
1584         struct stat st;
1585         int r;
1586
1587         assert(j);
1588         assert(fd >= 0 || path);
1589
1590         if (fd < 0) {
1591                 assert(path);  /* For gcc. */
1592                 if (j->toplevel_fd >= 0)
1593                         /* If there's a top-level fd defined make the path relative, explicitly, since otherwise
1594                          * openat() ignores the first argument. */
1595
1596                         fd = our_fd = openat(j->toplevel_fd, skip_leading_slash(path), O_RDONLY|O_CLOEXEC|O_NONBLOCK);
1597                 else
1598                         fd = our_fd = open(path, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
1599                 if (fd < 0) {
1600                         r = log_debug_errno(errno, "Failed to open journal file %s: %m", path);
1601                         goto error;
1602                 }
1603
1604                 r = fd_nonblock(fd, false);
1605                 if (r < 0) {
1606                         r = log_debug_errno(errno, "Failed to turn off O_NONBLOCK for %s: %m", path);
1607                         goto error;
1608                 }
1609         }
1610
1611         if (fstat(fd, &st) < 0) {
1612                 r = log_debug_errno(errno, "Failed to fstat %s: %m", path ?: "fd");
1613                 goto error;
1614         }
1615
1616         r = stat_verify_regular(&st);
1617         if (r < 0) {
1618                 log_debug_errno(r, "Refusing to open %s: %m", path ?: "fd");
1619                 goto error;
1620         }
1621
1622         if (path) {
1623                 f = ordered_hashmap_get(j->files, path);
1624                 if (f) {
1625                         if (stat_inode_same(&f->last_stat, &st)) {
1626                                 /* We already track this file, under the same path and with the same
1627                                  * device/inode numbers, it's hence really the same. Mark this file as seen
1628                                  * in this generation. This is used to GC old files in process_q_overflow()
1629                                  * to detect journal files that are still there and discern them from those
1630                                  * which are gone. */
1631
1632                                 f->last_seen_generation = j->generation;
1633                                 (void) journal_file_read_tail_timestamp(j, f);
1634                                 return 0;
1635                         }
1636
1637                         /* So we tracked a file under this name, but it has a different inode/device. In that
1638                          * case, it got replaced (probably due to rotation?), let's drop it hence from our
1639                          * list. */
1640                         remove_file_real(j, f);
1641                         f = NULL;
1642                 }
1643         }
1644
1645         if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
1646                 r = log_debug_errno(SYNTHETIC_ERRNO(ETOOMANYREFS),
1647                                     "Too many open journal files, not adding %s.", path ?: "fd");
1648                 goto error;
1649         }
1650
1651         r = journal_file_open(fd, path, O_RDONLY, 0, 0, 0, NULL, j->mmap, NULL, &f);
1652         if (r < 0) {
1653                 log_debug_errno(r, "Failed to open journal file %s: %m", path ?: "from fd");
1654                 goto error;
1655         }
1656
1657         /* journal_file_dump(f); */
1658
1659         /* journal_file_open() generates an replacement fname if necessary, so we can use f->path. */
1660         r = ordered_hashmap_put(j->files, f->path, f);
1661         if (r < 0) {
1662                 f->close_fd = false; /* Make sure journal_file_close() doesn't close the caller's fd
1663                                       * (or our own). The caller or we will do that ourselves. */
1664                 (void) journal_file_close(f);
1665                 goto error;
1666         }
1667
1668         TAKE_FD(our_fd); /* the fd is now owned by the JournalFile object */
1669
1670         f->last_seen_generation = j->generation;
1671
1672         track_file_disposition(j, f);
1673         check_network(j, f->fd);
1674         (void) journal_file_read_tail_timestamp(j, f);
1675
1676         j->current_invalidate_counter++;
1677
1678         log_debug("File %s added.", f->path);
1679
1680         return 0;
1681
1682 error:
1683         (void) journal_put_error(j, r, path);   /* path==NULL is OK. */
1684         return r;
1685 }
1686
1687 int journal_get_directories(sd_journal *j, char ***ret) {
1688         _cleanup_strv_free_ char **paths = NULL;
1689         JournalFile *f;
1690         const char *p;
1691         size_t n = SIZE_MAX;
1692         int r;
1693
1694         assert(j);
1695         assert(ret);
1696
1697         /* This returns parent directories of opened journal files. */
1698
1699         ORDERED_HASHMAP_FOREACH_KEY(f, p, j->files) {
1700                 _cleanup_free_ char *d = NULL;
1701
1702                 /* Ignore paths generated from fd. */
1703                 if (path_startswith(p, "/proc/"))
1704                         continue;
1705
1706                 r = path_extract_directory(p, &d);
1707                 if (r < 0)
1708                         return r;
1709
1710                 if (path_strv_contains(paths, d))
1711                         continue;
1712
1713                 r = strv_extend_with_size(&paths, &n, d);
1714                 if (r < 0)
1715                         return r;
1716         }
1717
1718         *ret = TAKE_PTR(paths);
1719         return 0;
1720 }
1721
1722 static int add_file_by_name(
1723                 sd_journal *j,
1724                 const char *prefix,
1725                 const char *filename) {
1726
1727         _cleanup_free_ char *path = NULL;
1728
1729         assert(j);
1730         assert(prefix);
1731         assert(filename);
1732
1733         if (j->no_new_files)
1734                 return 0;
1735
1736         if (!file_type_wanted(j->flags, filename))
1737                 return 0;
1738
1739         path = path_join(prefix, filename);
1740         if (!path)
1741                 return -ENOMEM;
1742
1743         return add_any_file(j, -1, path);
1744 }
1745
1746 static int remove_file_by_name(
1747                 sd_journal *j,
1748                 const char *prefix,
1749                 const char *filename) {
1750
1751         _cleanup_free_ char *path = NULL;
1752         JournalFile *f;
1753
1754         assert(j);
1755         assert(prefix);
1756         assert(filename);
1757
1758         path = path_join(prefix, filename);
1759         if (!path)
1760                 return -ENOMEM;
1761
1762         f = ordered_hashmap_get(j->files, path);
1763         if (!f)
1764                 return 0;
1765
1766         remove_file_real(j, f);
1767         return 1;
1768 }
1769
1770 static void remove_file_real(sd_journal *j, JournalFile *f) {
1771         assert(j);
1772         assert(f);
1773
1774         (void) ordered_hashmap_remove(j->files, f->path);
1775
1776         log_debug("File %s removed.", f->path);
1777
1778         if (j->current_file == f) {
1779                 j->current_file = NULL;
1780                 j->current_field = 0;
1781         }
1782
1783         if (j->unique_file == f) {
1784                 /* Jump to the next unique_file or NULL if that one was last */
1785                 j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
1786                 j->unique_offset = 0;
1787                 if (!j->unique_file)
1788                         j->unique_file_lost = true;
1789         }
1790
1791         if (j->fields_file == f) {
1792                 j->fields_file = ordered_hashmap_next(j->files, j->fields_file->path);
1793                 j->fields_offset = 0;
1794                 if (!j->fields_file)
1795                         j->fields_file_lost = true;
1796         }
1797
1798         journal_file_unlink_newest_by_boot_id(j, f);
1799         (void) journal_file_close(f);
1800
1801         j->current_invalidate_counter++;
1802 }
1803
1804 static int dirname_is_machine_id(const char *fn) {
1805         sd_id128_t id, machine;
1806         const char *e;
1807         int r;
1808
1809         /* Returns true if the specified directory name matches the local machine ID */
1810
1811         r = sd_id128_get_machine(&machine);
1812         if (r < 0)
1813                 return r;
1814
1815         e = strchr(fn, '.');
1816         if (e) {
1817                 const char *k;
1818
1819                 /* Looks like it has a namespace suffix. Verify that. */
1820                 if (!log_namespace_name_valid(e + 1))
1821                         return false;
1822
1823                 k = strndupa_safe(fn, e - fn);
1824                 r = sd_id128_from_string(k, &id);
1825         } else
1826                 r = sd_id128_from_string(fn, &id);
1827         if (r < 0)
1828                 return r;
1829
1830         return sd_id128_equal(id, machine);
1831 }
1832
1833 static int dirname_has_namespace(const char *fn, const char *namespace) {
1834         const char *e;
1835
1836         /* Returns true if the specified directory name matches the specified namespace */
1837
1838         e = strchr(fn, '.');
1839         if (e) {
1840                 const char *k;
1841
1842                 if (!namespace)
1843                         return false;
1844
1845                 if (!streq(e + 1, namespace))
1846                         return false;
1847
1848                 k = strndupa_safe(fn, e - fn);
1849                 return id128_is_valid(k);
1850         }
1851
1852         if (namespace)
1853                 return false;
1854
1855         return id128_is_valid(fn);
1856 }
1857
1858 static bool dirent_is_journal_file(const struct dirent *de) {
1859         assert(de);
1860
1861         /* Returns true if the specified directory entry looks like a journal file we might be interested in */
1862
1863         if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
1864                 return false;
1865
1866         return endswith(de->d_name, ".journal") ||
1867                 endswith(de->d_name, ".journal~");
1868 }
1869
1870 static bool dirent_is_journal_subdir(const struct dirent *de) {
1871         const char *e, *n;
1872         assert(de);
1873
1874         /* returns true if the specified directory entry looks like a directory that might contain journal
1875          * files we might be interested in, i.e. is either a 128-bit ID or a 128-bit ID suffixed by a
1876          * namespace. */
1877
1878         if (!IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN))
1879                 return false;
1880
1881         e = strchr(de->d_name, '.');
1882         if (!e)
1883                 return id128_is_valid(de->d_name); /* No namespace */
1884
1885         n = strndupa_safe(de->d_name, e - de->d_name);
1886         if (!id128_is_valid(n))
1887                 return false;
1888
1889         return log_namespace_name_valid(e + 1);
1890 }
1891
1892 static int directory_open(sd_journal *j, const char *path, DIR **ret) {
1893         DIR *d;
1894
1895         assert(j);
1896         assert(path);
1897         assert(ret);
1898
1899         if (j->toplevel_fd < 0)
1900                 d = opendir(path);
1901         else
1902                 /* Open the specified directory relative to the toplevel fd. Enforce that the path specified is
1903                  * relative, by dropping the initial slash */
1904                 d = xopendirat(j->toplevel_fd, skip_leading_slash(path), 0);
1905         if (!d)
1906                 return -errno;
1907
1908         *ret = d;
1909         return 0;
1910 }
1911
1912 static Directory* directory_free(Directory *d) {
1913         if (!d)
1914                 return NULL;
1915
1916         if (d->journal) {
1917                 if (d->wd > 0 &&
1918                     hashmap_remove_value(d->journal->directories_by_wd, INT_TO_PTR(d->wd), d) &&
1919                     d->journal->inotify_fd >= 0)
1920                         (void) inotify_rm_watch(d->journal->inotify_fd, d->wd);
1921
1922                 if (d->path)
1923                         hashmap_remove_value(d->journal->directories_by_path, d->path, d);
1924         }
1925
1926         if (d->path) {
1927                 if (d->is_root)
1928                         log_debug("Root directory %s removed.", d->path);
1929                 else
1930                         log_debug("Directory %s removed.", d->path);
1931
1932                 free(d->path);
1933         }
1934
1935         return mfree(d);
1936 }
1937
1938 DEFINE_TRIVIAL_CLEANUP_FUNC(Directory*, directory_free);
1939
1940 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
1941         directories_by_path_hash_ops,
1942         char,
1943         path_hash_func,
1944         path_compare,
1945         Directory,
1946         directory_free);
1947
1948 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
1949         directories_by_wd_hash_ops,
1950         void,
1951         trivial_hash_func,
1952         trivial_compare_func,
1953         Directory,
1954         directory_free);
1955
1956 static int add_directory_impl(sd_journal *j, const char *path, bool is_root, Directory **ret) {
1957         _cleanup_(directory_freep) Directory *m = NULL;
1958         Directory *existing;
1959         int r;
1960
1961         assert(j);
1962         assert(path);
1963         assert(ret);
1964
1965         existing = hashmap_get(j->directories_by_path, path);
1966         if (existing) {
1967                 if (existing->is_root != is_root) {
1968                         /* Don't 'downgrade' from root directory */
1969                         *ret = NULL;
1970                         return 0;
1971                 }
1972
1973                 *ret = existing;
1974                 return 1;
1975         }
1976
1977         m = new(Directory, 1);
1978         if (!m)
1979                 return -ENOMEM;
1980
1981         *m = (Directory) {
1982                 .journal = j,
1983                 .is_root = is_root,
1984                 .path = strdup(path),
1985                 .wd = -1,
1986         };
1987
1988         if (!m->path)
1989                 return -ENOMEM;
1990
1991         r = hashmap_ensure_put(&j->directories_by_path, &directories_by_path_hash_ops, m->path, m);
1992         if (r < 0)
1993                 return r;
1994
1995         j->current_invalidate_counter++;
1996
1997         if (is_root)
1998                 log_debug("Root directory %s added.", m->path);
1999         else
2000                 log_debug("Directory %s added.", m->path);
2001
2002         *ret = TAKE_PTR(m);
2003         return 1;
2004 }
2005
2006 static int add_directory(sd_journal *j, const char *prefix, const char *dirname);
2007
2008 static void directory_enumerate(sd_journal *j, Directory *m, DIR *d) {
2009         assert(j);
2010         assert(m);
2011         assert(d);
2012
2013         FOREACH_DIRENT_ALL(de, d, goto fail) {
2014                 if (dirent_is_journal_file(de))
2015                         (void) add_file_by_name(j, m->path, de->d_name);
2016
2017                 if (m->is_root && dirent_is_journal_subdir(de))
2018                         (void) add_directory(j, m->path, de->d_name);
2019         }
2020
2021         return;
2022 fail:
2023         log_debug_errno(errno, "Failed to enumerate directory %s, ignoring: %m", m->path);
2024 }
2025
2026 static void directory_watch(sd_journal *j, Directory *m, int fd, uint32_t mask) {
2027         int r;
2028
2029         assert(j);
2030         assert(m);
2031         assert(fd >= 0);
2032
2033         /* Watch this directory if that's enabled and if it not being watched yet. */
2034
2035         if (m->wd > 0) /* Already have a watch? */
2036                 return;
2037         if (j->inotify_fd < 0) /* Not watching at all? */
2038                 return;
2039
2040         m->wd = inotify_add_watch_fd(j->inotify_fd, fd, mask);
2041         if (m->wd < 0) {
2042                 log_debug_errno(m->wd, "Failed to watch journal directory '%s', ignoring: %m", m->path);
2043                 return;
2044         }
2045
2046         r = hashmap_ensure_put(&j->directories_by_wd, &directories_by_wd_hash_ops, INT_TO_PTR(m->wd), m);
2047         if (r < 0) {
2048                 if (r == -EEXIST)
2049                         log_debug_errno(r, "Directory '%s' already being watched under a different path, ignoring: %m", m->path);
2050                 else {
2051                         log_debug_errno(r, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m->path);
2052                         (void) inotify_rm_watch(j->inotify_fd, m->wd);
2053                 }
2054                 m->wd = -1;
2055         }
2056 }
2057
2058 static int add_directory(
2059                 sd_journal *j,
2060                 const char *prefix,
2061                 const char *dirname) {
2062
2063         _cleanup_free_ char *path = NULL;
2064         _cleanup_closedir_ DIR *d = NULL;
2065         Directory *m;
2066         int r, k;
2067
2068         assert(j);
2069         assert(prefix);
2070
2071         /* Adds a journal file directory to watch. If the directory is already tracked this updates the inotify watch
2072          * and reenumerates directory contents */
2073
2074         path = path_join(prefix, dirname);
2075         if (!path) {
2076                 r = -ENOMEM;
2077                 goto fail;
2078         }
2079
2080         log_debug("Considering directory '%s'.", path);
2081
2082         /* We consider everything local that is in a directory for the local machine ID, or that is stored in /run */
2083         if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
2084             !((dirname && dirname_is_machine_id(dirname) > 0) || path_has_prefix(j, path, "/run")))
2085                 return 0;
2086
2087         if (dirname &&
2088             (!(FLAGS_SET(j->flags, SD_JOURNAL_ALL_NAMESPACES) ||
2089                dirname_has_namespace(dirname, j->namespace) > 0 ||
2090                (FLAGS_SET(j->flags, SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE) && dirname_has_namespace(dirname, NULL) > 0))))
2091                 return 0;
2092
2093         r = directory_open(j, path, &d);
2094         if (r < 0) {
2095                 log_debug_errno(r, "Failed to open directory '%s': %m", path);
2096                 goto fail;
2097         }
2098
2099         r = add_directory_impl(j, path, /* is_root = */ false, &m);
2100         if (r < 0)
2101                 goto fail;
2102         if (r == 0)
2103                 return 0;
2104
2105         m->last_seen_generation = j->generation;
2106
2107         directory_watch(j, m, dirfd(d),
2108                         IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
2109                         IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
2110                         IN_ONLYDIR);
2111
2112         if (!j->no_new_files)
2113                 directory_enumerate(j, m, d);
2114
2115         check_network(j, dirfd(d));
2116
2117         return 0;
2118
2119 fail:
2120         k = journal_put_error(j, r, path ?: prefix);
2121         if (k < 0)
2122                 return k;
2123
2124         return r;
2125 }
2126
2127 static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
2128
2129         _cleanup_closedir_ DIR *d = NULL;
2130         Directory *m;
2131         int r, k;
2132
2133         assert(j);
2134
2135         /* Adds a root directory to our set of directories to use. If the root directory is already in the set, we
2136          * update the inotify logic, and renumerate the directory entries. This call may hence be called to initially
2137          * populate the set, as well as to update it later. */
2138
2139         if (p) {
2140                 /* If there's a path specified, use it. */
2141
2142                 log_debug("Considering root directory '%s'.", p);
2143
2144                 if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
2145                     !path_has_prefix(j, p, "/run"))
2146                         return -EINVAL;
2147
2148                 if (j->prefix)
2149                         p = strjoina(j->prefix, p);
2150
2151                 r = directory_open(j, p, &d);
2152                 if (r == -ENOENT && missing_ok)
2153                         return 0;
2154                 if (r < 0) {
2155                         log_debug_errno(r, "Failed to open root directory %s: %m", p);
2156                         goto fail;
2157                 }
2158         } else {
2159                 _cleanup_close_ int dfd = -EBADF;
2160
2161                 /* If there's no path specified, then we use the top-level fd itself. We duplicate the fd here, since
2162                  * opendir() will take possession of the fd, and close it, which we don't want. */
2163
2164                 p = "."; /* store this as "." in the directories hashmap */
2165
2166                 dfd = fcntl(j->toplevel_fd, F_DUPFD_CLOEXEC, 3);
2167                 if (dfd < 0) {
2168                         r = -errno;
2169                         goto fail;
2170                 }
2171
2172                 d = take_fdopendir(&dfd);
2173                 if (!d) {
2174                         r = -errno;
2175                         goto fail;
2176                 }
2177
2178                 rewinddir(d);
2179         }
2180
2181         r = add_directory_impl(j, p, /* is_root = */ true, &m);
2182         if (r < 0)
2183                 goto fail;
2184         if (r == 0)
2185                 return 0;
2186
2187         directory_watch(j, m, dirfd(d),
2188                         IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
2189                         IN_ONLYDIR);
2190
2191         if (!j->no_new_files)
2192                 directory_enumerate(j, m, d);
2193
2194         check_network(j, dirfd(d));
2195
2196         return 0;
2197
2198 fail:
2199         k = journal_put_error(j, r, p);
2200         if (k < 0)
2201                 return k;
2202
2203         return r;
2204 }
2205
2206 static int add_search_paths(sd_journal *j) {
2207
2208         static const char search_paths[] =
2209                 "/run/log/journal\0"
2210                 "/var/log/journal\0";
2211
2212         assert(j);
2213
2214         /* We ignore most errors here, since the idea is to only open
2215          * what's actually accessible, and ignore the rest. */
2216
2217         NULSTR_FOREACH(p, search_paths)
2218                 (void) add_root_directory(j, p, true);
2219
2220         if (!(j->flags & SD_JOURNAL_LOCAL_ONLY))
2221                 (void) add_root_directory(j, "/var/log/journal/remote", true);
2222
2223         return 0;
2224 }
2225
2226 static int add_current_paths(sd_journal *j) {
2227         JournalFile *f;
2228
2229         assert(j);
2230         assert(j->no_new_files);
2231
2232         /* Simply adds all directories for files we have open as directories. We don't expect errors here, so we
2233          * treat them as fatal. */
2234
2235         ORDERED_HASHMAP_FOREACH(f, j->files) {
2236                 _cleanup_free_ char *dir = NULL;
2237                 int r;
2238
2239                 r = path_extract_directory(f->path, &dir);
2240                 if (r < 0)
2241                         return r;
2242
2243                 r = add_directory(j, dir, NULL);
2244                 if (r < 0)
2245                         return r;
2246         }
2247
2248         return 0;
2249 }
2250
2251 static int allocate_inotify(sd_journal *j) {
2252         assert(j);
2253
2254         if (j->inotify_fd < 0) {
2255                 j->inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
2256                 if (j->inotify_fd < 0)
2257                         return -errno;
2258         }
2259
2260         return 0;
2261 }
2262
2263 static sd_journal *journal_new(int flags, const char *path, const char *namespace) {
2264         _cleanup_(sd_journal_closep) sd_journal *j = NULL;
2265
2266         j = new(sd_journal, 1);
2267         if (!j)
2268                 return NULL;
2269
2270         *j = (sd_journal) {
2271                 .origin_id = origin_id_query(),
2272                 .toplevel_fd = -EBADF,
2273                 .inotify_fd = -EBADF,
2274                 .flags = flags,
2275                 .data_threshold = DEFAULT_DATA_THRESHOLD,
2276         };
2277
2278         if (path) {
2279                 char *t;
2280
2281                 t = strdup(path);
2282                 if (!t)
2283                         return NULL;
2284
2285                 if (flags & SD_JOURNAL_OS_ROOT)
2286                         j->prefix = t;
2287                 else
2288                         j->path = t;
2289         }
2290
2291         if (namespace) {
2292                 j->namespace = strdup(namespace);
2293                 if (!j->namespace)
2294                         return NULL;
2295         }
2296
2297         j->files = ordered_hashmap_new(&journal_file_hash_ops_by_path);
2298         if (!j->files)
2299                 return NULL;
2300
2301         j->files_cache = ordered_hashmap_iterated_cache_new(j->files);
2302         j->mmap = mmap_cache_new();
2303         if (!j->files_cache || !j->mmap)
2304                 return NULL;
2305
2306         return TAKE_PTR(j);
2307 }
2308
2309 #define OPEN_ALLOWED_FLAGS                              \
2310         (SD_JOURNAL_LOCAL_ONLY |                        \
2311          SD_JOURNAL_RUNTIME_ONLY |                      \
2312          SD_JOURNAL_SYSTEM |                            \
2313          SD_JOURNAL_CURRENT_USER |                      \
2314          SD_JOURNAL_ALL_NAMESPACES |                    \
2315          SD_JOURNAL_INCLUDE_DEFAULT_NAMESPACE |         \
2316          SD_JOURNAL_ASSUME_IMMUTABLE)
2317
2318 _public_ int sd_journal_open_namespace(sd_journal **ret, const char *namespace, int flags) {
2319         _cleanup_(sd_journal_closep) sd_journal *j = NULL;
2320         int r;
2321
2322         assert_return(ret, -EINVAL);
2323         assert_return((flags & ~OPEN_ALLOWED_FLAGS) == 0, -EINVAL);
2324
2325         j = journal_new(flags, NULL, namespace);
2326         if (!j)
2327                 return -ENOMEM;
2328
2329         r = add_search_paths(j);
2330         if (r < 0)
2331                 return r;
2332
2333         *ret = TAKE_PTR(j);
2334         return 0;
2335 }
2336
2337 _public_ int sd_journal_open(sd_journal **ret, int flags) {
2338         return sd_journal_open_namespace(ret, NULL, flags);
2339 }
2340
2341 #define OPEN_CONTAINER_ALLOWED_FLAGS                    \
2342         (SD_JOURNAL_LOCAL_ONLY |                        \
2343          SD_JOURNAL_SYSTEM |                            \
2344          SD_JOURNAL_ASSUME_IMMUTABLE)
2345
2346 _public_ int sd_journal_open_container(sd_journal **ret, const char *machine, int flags) {
2347         _cleanup_free_ char *root = NULL, *class = NULL;
2348         _cleanup_(sd_journal_closep) sd_journal *j = NULL;
2349         char *p;
2350         int r;
2351
2352         /* This is deprecated, people should use machined's OpenMachineRootDirectory() call instead in
2353          * combination with sd_journal_open_directory_fd(). */
2354
2355         assert_return(machine, -EINVAL);
2356         assert_return(ret, -EINVAL);
2357         assert_return((flags & ~OPEN_CONTAINER_ALLOWED_FLAGS) == 0, -EINVAL);
2358         assert_return(hostname_is_valid(machine, 0), -EINVAL);
2359
2360         p = strjoina("/run/systemd/machines/", machine);
2361         r = parse_env_file(NULL, p,
2362                            "ROOT", &root,
2363                            "CLASS", &class);
2364         if (r == -ENOENT)
2365                 return -EHOSTDOWN;
2366         if (r < 0)
2367                 return r;
2368         if (!root)
2369                 return -ENODATA;
2370
2371         if (!streq_ptr(class, "container"))
2372                 return -EIO;
2373
2374         j = journal_new(flags, root, NULL);
2375         if (!j)
2376                 return -ENOMEM;
2377
2378         r = add_search_paths(j);
2379         if (r < 0)
2380                 return r;
2381
2382         *ret = TAKE_PTR(j);
2383         return 0;
2384 }
2385
2386 #define OPEN_DIRECTORY_ALLOWED_FLAGS                    \
2387         (SD_JOURNAL_OS_ROOT |                           \
2388          SD_JOURNAL_SYSTEM |                            \
2389          SD_JOURNAL_CURRENT_USER |                      \
2390          SD_JOURNAL_ASSUME_IMMUTABLE)
2391
2392 _public_ int sd_journal_open_directory(sd_journal **ret, const char *path, int flags) {
2393         _cleanup_(sd_journal_closep) sd_journal *j = NULL;
2394         int r;
2395
2396         assert_return(ret, -EINVAL);
2397         assert_return(path, -EINVAL);
2398         assert_return((flags & ~OPEN_DIRECTORY_ALLOWED_FLAGS) == 0, -EINVAL);
2399
2400         j = journal_new(flags, path, NULL);
2401         if (!j)
2402                 return -ENOMEM;
2403
2404         if (flags & SD_JOURNAL_OS_ROOT)
2405                 r = add_search_paths(j);
2406         else
2407                 r = add_root_directory(j, path, false);
2408         if (r < 0)
2409                 return r;
2410
2411         *ret = TAKE_PTR(j);
2412         return 0;
2413 }
2414
2415 #define OPEN_FILES_ALLOWED_FLAGS                        \
2416         (SD_JOURNAL_ASSUME_IMMUTABLE)
2417
2418 _public_ int sd_journal_open_files(sd_journal **ret, const char **paths, int flags) {
2419         _cleanup_(sd_journal_closep) sd_journal *j = NULL;
2420         int r;
2421
2422         assert_return(ret, -EINVAL);
2423         assert_return((flags & ~OPEN_FILES_ALLOWED_FLAGS) == 0, -EINVAL);
2424
2425         j = journal_new(flags, NULL, NULL);
2426         if (!j)
2427                 return -ENOMEM;
2428
2429         STRV_FOREACH(path, paths) {
2430                 r = add_any_file(j, -1, *path);
2431                 if (r < 0)
2432                         return r;
2433         }
2434
2435         j->no_new_files = true;
2436
2437         *ret = TAKE_PTR(j);
2438         return 0;
2439 }
2440
2441 #define OPEN_DIRECTORY_FD_ALLOWED_FLAGS                 \
2442         (SD_JOURNAL_OS_ROOT |                           \
2443          SD_JOURNAL_SYSTEM |                            \
2444          SD_JOURNAL_CURRENT_USER |                      \
2445          SD_JOURNAL_TAKE_DIRECTORY_FD |                 \
2446          SD_JOURNAL_ASSUME_IMMUTABLE)
2447
2448 _public_ int sd_journal_open_directory_fd(sd_journal **ret, int fd, int flags) {
2449         _cleanup_(sd_journal_closep) sd_journal *j = NULL;
2450         struct stat st;
2451         bool take_fd;
2452         int r;
2453
2454         assert_return(ret, -EINVAL);
2455         assert_return(fd >= 0, -EBADF);
2456         assert_return((flags & ~OPEN_DIRECTORY_FD_ALLOWED_FLAGS) == 0, -EINVAL);
2457
2458         if (fstat(fd, &st) < 0)
2459                 return -errno;
2460
2461         if (!S_ISDIR(st.st_mode))
2462                 return -EBADFD;
2463
2464         take_fd = FLAGS_SET(flags, SD_JOURNAL_TAKE_DIRECTORY_FD);
2465         j = journal_new(flags & ~SD_JOURNAL_TAKE_DIRECTORY_FD, NULL, NULL);
2466         if (!j)
2467                 return -ENOMEM;
2468
2469         j->toplevel_fd = fd;
2470
2471         if (flags & SD_JOURNAL_OS_ROOT)
2472                 r = add_search_paths(j);
2473         else
2474                 r = add_root_directory(j, NULL, false);
2475         if (r < 0)
2476                 return r;
2477
2478         SET_FLAG(j->flags, SD_JOURNAL_TAKE_DIRECTORY_FD, take_fd);
2479
2480         *ret = TAKE_PTR(j);
2481         return 0;
2482 }
2483
2484 #define OPEN_FILES_FD_ALLOWED_FLAGS                        \
2485         (SD_JOURNAL_ASSUME_IMMUTABLE)
2486
2487 _public_ int sd_journal_open_files_fd(sd_journal **ret, int fds[], unsigned n_fds, int flags) {
2488         JournalFile *f;
2489         _cleanup_(sd_journal_closep) sd_journal *j = NULL;
2490         int r;
2491
2492         assert_return(ret, -EINVAL);
2493         assert_return(n_fds > 0, -EBADF);
2494         assert_return((flags & ~OPEN_FILES_FD_ALLOWED_FLAGS) == 0, -EINVAL);
2495
2496         j = journal_new(flags, NULL, NULL);
2497         if (!j)
2498                 return -ENOMEM;
2499
2500         for (unsigned i = 0; i < n_fds; i++) {
2501                 struct stat st;
2502
2503                 if (fds[i] < 0) {
2504                         r = -EBADF;
2505                         goto fail;
2506                 }
2507
2508                 if (fstat(fds[i], &st) < 0) {
2509                         r = -errno;
2510                         goto fail;
2511                 }
2512
2513                 r = stat_verify_regular(&st);
2514                 if (r < 0)
2515                         goto fail;
2516
2517                 r = add_any_file(j, fds[i], NULL);
2518                 if (r < 0)
2519                         goto fail;
2520         }
2521
2522         j->no_new_files = true;
2523         j->no_inotify = true;
2524
2525         *ret = TAKE_PTR(j);
2526         return 0;
2527
2528 fail:
2529         /* If we fail, make sure we don't take possession of the files we managed to make use of successfully, and they
2530          * remain open */
2531         ORDERED_HASHMAP_FOREACH(f, j->files)
2532                 f->close_fd = false;
2533
2534         return r;
2535 }
2536
2537 _public_ void sd_journal_close(sd_journal *j) {
2538         if (!j || journal_origin_changed(j))
2539                 return;
2540
2541         journal_clear_newest_by_boot_id(j);
2542
2543         sd_journal_flush_matches(j);
2544
2545         /* log stats before closing files so we can see the windows state */
2546         if (j->mmap)
2547                 mmap_cache_stats_log_debug(j->mmap);
2548
2549         ordered_hashmap_free(j->files);
2550         iterated_cache_free(j->files_cache);
2551
2552         hashmap_free(j->directories_by_path);
2553         hashmap_free(j->directories_by_wd);
2554
2555         if (FLAGS_SET(j->flags, SD_JOURNAL_TAKE_DIRECTORY_FD))
2556                 safe_close(j->toplevel_fd);
2557
2558         safe_close(j->inotify_fd);
2559
2560         if (j->mmap)
2561                 mmap_cache_unref(j->mmap);
2562
2563         hashmap_free(j->errors);
2564
2565         set_free(j->exclude_syslog_identifiers);
2566
2567         free(j->path);
2568         free(j->prefix);
2569         free(j->namespace);
2570         free(j->unique_field);
2571         free(j->fields_buffer);
2572         free(j);
2573 }
2574
2575 static int journal_file_read_tail_timestamp(sd_journal *j, JournalFile *f) {
2576         uint64_t offset, mo, rt;
2577         sd_id128_t id;
2578         ObjectType type;
2579         Object *o;
2580         int r;
2581
2582         assert(j);
2583         assert(f);
2584         assert(f->header);
2585
2586         /* Tries to read the timestamp of the most recently written entry. */
2587
2588         if (FLAGS_SET(j->flags, SD_JOURNAL_ASSUME_IMMUTABLE) && f->newest_entry_offset != 0)
2589                 return 0; /* We have already read the file, and we assume that the file is immutable. */
2590
2591         if (f->header->state == f->newest_state &&
2592             f->header->state == STATE_ARCHIVED &&
2593             f->newest_entry_offset != 0)
2594                 return 0; /* We have already read archived file. */
2595
2596         if (JOURNAL_HEADER_CONTAINS(f->header, tail_entry_offset)) {
2597                 offset = le64toh(READ_NOW(f->header->tail_entry_offset));
2598                 type = OBJECT_ENTRY;
2599         } else {
2600                 offset = le64toh(READ_NOW(f->header->tail_object_offset));
2601                 type = OBJECT_UNUSED;
2602         }
2603         if (offset == 0)
2604                 return -ENODATA; /* not a single object/entry, hence no tail timestamp */
2605         if (offset == f->newest_entry_offset)
2606                 return 0; /* No new entry is added after we read last time. */
2607
2608         /* Move to the last object in the journal file, in the hope it is an entry (which it usually will
2609          * be). If we lack the "tail_entry_offset" field in the header, we specify the type as OBJECT_UNUSED
2610          * here, since we cannot be sure what the last object will be, and want no noisy logging if it isn't
2611          * an entry. We instead check after figuring out the pointer. */
2612         r = journal_file_move_to_object(f, type, offset, &o);
2613         if (r < 0) {
2614                 log_debug_errno(r, "Failed to move to last object in journal file, ignoring: %m");
2615                 o = NULL;
2616                 offset = 0;
2617         }
2618         if (o && o->object.type == OBJECT_ENTRY) {
2619                 /* Yay, last object is an entry, let's use the data. */
2620                 id = o->entry.boot_id;
2621                 mo = le64toh(o->entry.monotonic);
2622                 rt = le64toh(o->entry.realtime);
2623         } else {
2624                 /* So the object is not an entry or we couldn't access it? In that case, let's read the most
2625                  * recent entry timestamps from the header. It's equally good. Unfortunately though, in old
2626                  * versions of the journal the boot ID in the header doesn't have to match the monotonic
2627                  * timestamp of the header. Let's check the header flag that indicates whether this strictly
2628                  * matches first hence, before using the data. */
2629
2630                 if (JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f->header) && f->header->state == STATE_ARCHIVED) {
2631                         mo = le64toh(f->header->tail_entry_monotonic);
2632                         rt = le64toh(f->header->tail_entry_realtime);
2633                         id = f->header->tail_entry_boot_id;
2634                         offset = UINT64_MAX;
2635                 } else {
2636                         /* Otherwise let's find the last entry manually (this possibly means traversing the
2637                          * chain of entry arrays, till the end */
2638                         r = journal_file_next_entry(f, 0, DIRECTION_UP, &o, offset == 0 ? &offset : NULL);
2639                         if (r < 0)
2640                                 return r;
2641                         if (r == 0)
2642                                 return -ENODATA;
2643
2644                         id = o->entry.boot_id;
2645                         mo = le64toh(o->entry.monotonic);
2646                         rt = le64toh(o->entry.realtime);
2647                 }
2648         }
2649
2650         if (mo > rt) /* monotonic clock is further ahead than realtime? that's weird, refuse to use the data */
2651                 return -ENODATA;
2652
2653         if (offset == f->newest_entry_offset) {
2654                 /* Cached data and the current one should be equivalent. */
2655                 if (!sd_id128_equal(f->newest_machine_id, f->header->machine_id) ||
2656                     !sd_id128_equal(f->newest_boot_id, id) ||
2657                     f->newest_monotonic_usec != mo ||
2658                     f->newest_realtime_usec != rt)
2659                         return -EBADMSG;
2660
2661                 return 0; /* No new entry is added after we read last time. */
2662         }
2663
2664         if (!sd_id128_equal(f->newest_boot_id, id))
2665                 journal_file_unlink_newest_by_boot_id(j, f);
2666
2667         f->newest_boot_id = id;
2668         f->newest_monotonic_usec = mo;
2669         f->newest_realtime_usec = rt;
2670         f->newest_machine_id = f->header->machine_id;
2671         f->newest_entry_offset = offset;
2672         f->newest_state = f->header->state;
2673
2674         r = journal_file_reshuffle_newest_by_boot_id(j, f);
2675         if (r < 0)
2676                 return r;
2677
2678         return 1; /* Updated. */
2679 }
2680
2681 _public_ int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret) {
2682         JournalFile *f;
2683         Object *o;
2684         int r;
2685
2686         assert_return(j, -EINVAL);
2687         assert_return(!journal_origin_changed(j), -ECHILD);
2688
2689         f = j->current_file;
2690         if (!f)
2691                 return -EADDRNOTAVAIL;
2692         if (f->current_offset <= 0)
2693                 return -EADDRNOTAVAIL;
2694
2695         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2696         if (r < 0)
2697                 return r;
2698
2699         uint64_t t = le64toh(o->entry.realtime);
2700         if (!VALID_REALTIME(t))
2701                 return -EBADMSG;
2702
2703         if (ret)
2704                 *ret = t;
2705
2706         return 0;
2707 }
2708
2709 _public_ int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret_monotonic, sd_id128_t *ret_boot_id) {
2710         JournalFile *f;
2711         Object *o;
2712         int r;
2713
2714         assert_return(j, -EINVAL);
2715         assert_return(!journal_origin_changed(j), -ECHILD);
2716
2717         f = j->current_file;
2718         if (!f)
2719                 return -EADDRNOTAVAIL;
2720         if (f->current_offset <= 0)
2721                 return -EADDRNOTAVAIL;
2722
2723         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2724         if (r < 0)
2725                 return r;
2726
2727         if (!ret_boot_id) {
2728                 sd_id128_t id;
2729
2730                 r = sd_id128_get_boot(&id);
2731                 if (r < 0)
2732                         return r;
2733
2734                 if (!sd_id128_equal(id, o->entry.boot_id))
2735                         return -ESTALE;
2736         }
2737
2738         uint64_t t = le64toh(o->entry.monotonic);
2739         if (!VALID_MONOTONIC(t))
2740                 return -EBADMSG;
2741
2742         if (ret_monotonic)
2743                 *ret_monotonic = t;
2744         if (ret_boot_id)
2745                 *ret_boot_id = o->entry.boot_id;
2746
2747         return 0;
2748 }
2749
2750 _public_ int sd_journal_get_seqnum(
2751                 sd_journal *j,
2752                 uint64_t *ret_seqnum,
2753                 sd_id128_t *ret_seqnum_id) {
2754
2755         JournalFile *f;
2756         Object *o;
2757         int r;
2758
2759         assert_return(j, -EINVAL);
2760         assert_return(!journal_origin_changed(j), -ECHILD);
2761
2762         f = j->current_file;
2763         if (!f)
2764                 return -EADDRNOTAVAIL;
2765
2766         if (f->current_offset <= 0)
2767                 return -EADDRNOTAVAIL;
2768
2769         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2770         if (r < 0)
2771                 return r;
2772
2773         if (ret_seqnum_id)
2774                 *ret_seqnum_id = f->header->seqnum_id;
2775         if (ret_seqnum)
2776                 *ret_seqnum = le64toh(o->entry.seqnum);
2777
2778         return 0;
2779 }
2780
2781 static bool field_is_valid(const char *field) {
2782         assert(field);
2783
2784         if (isempty(field))
2785                 return false;
2786
2787         if (startswith(field, "__"))
2788                 return false;
2789
2790         for (const char *p = field; *p; p++) {
2791
2792                 if (*p == '_')
2793                         continue;
2794
2795                 if (*p >= 'A' && *p <= 'Z')
2796                         continue;
2797
2798                 if (ascii_isdigit(*p))
2799                         continue;
2800
2801                 return false;
2802         }
2803
2804         return true;
2805 }
2806
2807 _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *size) {
2808         JournalFile *f;
2809         size_t field_length;
2810         Object *o;
2811         int r;
2812
2813         assert_return(j, -EINVAL);
2814         assert_return(!journal_origin_changed(j), -ECHILD);
2815         assert_return(field, -EINVAL);
2816         assert_return(data, -EINVAL);
2817         assert_return(size, -EINVAL);
2818         assert_return(field_is_valid(field), -EINVAL);
2819
2820         f = j->current_file;
2821         if (!f)
2822                 return -EADDRNOTAVAIL;
2823
2824         if (f->current_offset <= 0)
2825                 return -EADDRNOTAVAIL;
2826
2827         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2828         if (r < 0)
2829                 return r;
2830
2831         field_length = strlen(field);
2832
2833         uint64_t n = journal_file_entry_n_items(f, o);
2834         for (uint64_t i = 0; i < n; i++) {
2835                 uint64_t p;
2836                 void *d;
2837                 size_t l;
2838
2839                 p = journal_file_entry_item_object_offset(f, o, i);
2840                 r = journal_file_data_payload(f, NULL, p, field, field_length, j->data_threshold, &d, &l);
2841                 if (r == 0)
2842                         continue;
2843                 if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) {
2844                         log_debug_errno(r, "Entry item %"PRIu64" data object is bad, skipping over it: %m", i);
2845                         continue;
2846                 }
2847                 if (r < 0)
2848                         return r;
2849
2850                 *data = d;
2851                 *size = l;
2852
2853                 return 0;
2854         }
2855
2856         return -ENOENT;
2857 }
2858
2859 _public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *size) {
2860         JournalFile *f;
2861         Object *o;
2862         int r;
2863
2864         assert_return(j, -EINVAL);
2865         assert_return(!journal_origin_changed(j), -ECHILD);
2866         assert_return(data, -EINVAL);
2867         assert_return(size, -EINVAL);
2868
2869         f = j->current_file;
2870         if (!f)
2871                 return -EADDRNOTAVAIL;
2872
2873         if (f->current_offset <= 0)
2874                 return -EADDRNOTAVAIL;
2875
2876         r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
2877         if (r < 0)
2878                 return r;
2879
2880         for (uint64_t n = journal_file_entry_n_items(f, o); j->current_field < n; j->current_field++) {
2881                 uint64_t p;
2882                 void *d;
2883                 size_t l;
2884
2885                 p = journal_file_entry_item_object_offset(f, o, j->current_field);
2886                 r = journal_file_data_payload(f, NULL, p, NULL, 0, j->data_threshold, &d, &l);
2887                 if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) {
2888                         log_debug_errno(r, "Entry item %"PRIu64" data object is bad, skipping over it: %m", j->current_field);
2889                         continue;
2890                 }
2891                 if (r < 0)
2892                         return r;
2893                 assert(r > 0);
2894
2895                 *data = d;
2896                 *size = l;
2897
2898                 j->current_field++;
2899
2900                 return 1;
2901         }
2902
2903         return 0;
2904 }
2905
2906 _public_ int sd_journal_enumerate_available_data(sd_journal *j, const void **data, size_t *size) {
2907         for (;;) {
2908                 int r;
2909
2910                 r = sd_journal_enumerate_data(j, data, size);
2911                 if (r >= 0)
2912                         return r;
2913                 if (!JOURNAL_ERRNO_IS_UNAVAILABLE_FIELD(r))
2914                         return r;
2915                 j->current_field++; /* Try with the next field */
2916         }
2917 }
2918
2919 _public_ void sd_journal_restart_data(sd_journal *j) {
2920         if (!j || journal_origin_changed(j))
2921                 return;
2922
2923         j->current_field = 0;
2924 }
2925
2926 static int reiterate_all_paths(sd_journal *j) {
2927         assert(j);
2928
2929         if (j->no_new_files)
2930                 return add_current_paths(j);
2931
2932         if (j->flags & SD_JOURNAL_OS_ROOT)
2933                 return add_search_paths(j);
2934
2935         if (j->toplevel_fd >= 0)
2936                 return add_root_directory(j, NULL, false);
2937
2938         if (j->path)
2939                 return add_root_directory(j, j->path, true);
2940
2941         return add_search_paths(j);
2942 }
2943
2944 _public_ int sd_journal_get_fd(sd_journal *j) {
2945         int r;
2946
2947         assert_return(j, -EINVAL);
2948         assert_return(!journal_origin_changed(j), -ECHILD);
2949         assert_return(!FLAGS_SET(j->flags, SD_JOURNAL_ASSUME_IMMUTABLE), -EUNATCH);
2950
2951         if (j->no_inotify)
2952                 return -EMEDIUMTYPE;
2953
2954         if (j->inotify_fd >= 0)
2955                 return j->inotify_fd;
2956
2957         r = allocate_inotify(j);
2958         if (r < 0)
2959                 return r;
2960
2961         log_debug("Reiterating files to get inotify watches established.");
2962
2963         /* Iterate through all dirs again, to add them to the inotify */
2964         r = reiterate_all_paths(j);
2965         if (r < 0)
2966                 return r;
2967
2968         return j->inotify_fd;
2969 }
2970
2971 _public_ int sd_journal_get_events(sd_journal *j) {
2972         int fd;
2973
2974         assert_return(j, -EINVAL);
2975         assert_return(!journal_origin_changed(j), -ECHILD);
2976         assert_return(!FLAGS_SET(j->flags, SD_JOURNAL_ASSUME_IMMUTABLE), -EUNATCH);
2977
2978         fd = sd_journal_get_fd(j);
2979         if (fd < 0)
2980                 return fd;
2981
2982         return POLLIN;
2983 }
2984
2985 _public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) {
2986         int fd;
2987
2988         assert_return(j, -EINVAL);
2989         assert_return(!journal_origin_changed(j), -ECHILD);
2990         assert_return(!FLAGS_SET(j->flags, SD_JOURNAL_ASSUME_IMMUTABLE), -EUNATCH);
2991         assert_return(timeout_usec, -EINVAL);
2992
2993         fd = sd_journal_get_fd(j);
2994         if (fd < 0)
2995                 return fd;
2996
2997         if (!j->on_network) {
2998                 *timeout_usec = UINT64_MAX;
2999                 return 0;
3000         }
3001
3002         /* If we are on the network we need to regularly check for
3003          * changes manually */
3004
3005         *timeout_usec = j->last_process_usec + JOURNAL_FILES_RECHECK_USEC;
3006         return 1;
3007 }
3008
3009 static void process_q_overflow(sd_journal *j) {
3010         JournalFile *f;
3011         Directory *m;
3012
3013         assert(j);
3014
3015         /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list
3016          * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all
3017          * journal files we encounter. All journal files and all directories that don't carry it after reenumeration
3018          * are subject for unloading. */
3019
3020         log_debug("Inotify queue overrun, reiterating everything.");
3021
3022         j->generation++;
3023         (void) reiterate_all_paths(j);
3024
3025         ORDERED_HASHMAP_FOREACH(f, j->files) {
3026
3027                 if (f->last_seen_generation == j->generation)
3028                         continue;
3029
3030                 log_debug("File '%s' hasn't been seen in this enumeration, removing.", f->path);
3031                 remove_file_real(j, f);
3032         }
3033
3034         HASHMAP_FOREACH(m, j->directories_by_path) {
3035
3036                 if (m->last_seen_generation == j->generation)
3037                         continue;
3038
3039                 if (m->is_root) /* Never GC root directories */
3040                         continue;
3041
3042                 log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f->path);
3043                 directory_free(m);
3044         }
3045
3046         log_debug("Reiteration complete.");
3047 }
3048
3049 static void process_inotify_event(sd_journal *j, const struct inotify_event *e) {
3050         Directory *d;
3051
3052         assert(j);
3053         assert(e);
3054
3055         if (e->mask & IN_Q_OVERFLOW) {
3056                 process_q_overflow(j);
3057                 return;
3058         }
3059
3060         /* Is this a subdirectory we watch? */
3061         d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
3062         if (d) {
3063                 if (!(e->mask & IN_ISDIR) && e->len > 0 &&
3064                     (endswith(e->name, ".journal") ||
3065                      endswith(e->name, ".journal~"))) {
3066
3067                         /* Event for a journal file */
3068
3069                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB))
3070                                 (void) add_file_by_name(j, d->path, e->name);
3071                         else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT))
3072                                 (void) remove_file_by_name(j, d->path, e->name);
3073
3074                 } else if (!d->is_root && e->len == 0) {
3075
3076                         /* Event for a subdirectory */
3077
3078                         if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT))
3079                                 directory_free(d);
3080
3081                 } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && id128_is_valid(e->name)) {
3082
3083                         /* Event for root directory */
3084
3085                         if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB))
3086                                 (void) add_directory(j, d->path, e->name);
3087                 }
3088
3089                 return;
3090         }
3091
3092         if (e->mask & IN_IGNORED)
3093                 return;
3094
3095         log_debug("Unexpected inotify event.");
3096 }
3097
3098 static int determine_change(sd_journal *j) {
3099         bool b;
3100
3101         assert(j);
3102
3103         b = j->current_invalidate_counter != j->last_invalidate_counter;
3104         j->last_invalidate_counter = j->current_invalidate_counter;
3105
3106         return b ? SD_JOURNAL_INVALIDATE : SD_JOURNAL_APPEND;
3107 }
3108
3109 _public_ int sd_journal_process(sd_journal *j) {
3110         bool got_something = false;
3111
3112         assert_return(j, -EINVAL);
3113         assert_return(!journal_origin_changed(j), -ECHILD);
3114
3115         if (j->inotify_fd < 0) /* We have no inotify fd yet? Then there's noting to process. */
3116                 return 0;
3117
3118         assert_return(!FLAGS_SET(j->flags, SD_JOURNAL_ASSUME_IMMUTABLE), -EUNATCH);
3119
3120         j->last_process_usec = now(CLOCK_MONOTONIC);
3121         j->last_invalidate_counter = j->current_invalidate_counter;
3122
3123         for (;;) {
3124                 union inotify_event_buffer buffer;
3125                 ssize_t l;
3126
3127                 l = read(j->inotify_fd, &buffer, sizeof(buffer));
3128                 if (l < 0) {
3129                         if (ERRNO_IS_TRANSIENT(errno))
3130                                 return got_something ? determine_change(j) : SD_JOURNAL_NOP;
3131
3132                         return -errno;
3133                 }
3134
3135                 got_something = true;
3136
3137                 FOREACH_INOTIFY_EVENT(e, buffer, l)
3138                         process_inotify_event(j, e);
3139         }
3140 }
3141
3142 _public_ int sd_journal_wait(sd_journal *j, uint64_t timeout_usec) {
3143         int r;
3144         uint64_t t;
3145
3146         assert_return(j, -EINVAL);
3147         assert_return(!journal_origin_changed(j), -ECHILD);
3148         assert_return(!FLAGS_SET(j->flags, SD_JOURNAL_ASSUME_IMMUTABLE), -EUNATCH);
3149
3150         if (j->inotify_fd < 0) {
3151                 JournalFile *f;
3152
3153                 /* This is the first invocation, hence create the inotify watch */
3154                 r = sd_journal_get_fd(j);
3155                 if (r < 0)
3156                         return r;
3157
3158                 /* Server might have done some vacuuming while we weren't watching. Get rid of the deleted
3159                  * files now so they don't stay around indefinitely. */
3160                 ORDERED_HASHMAP_FOREACH(f, j->files) {
3161                         r = journal_file_fstat(f);
3162                         if (r == -EIDRM)
3163                                 remove_file_real(j, f);
3164                         else if (r < 0)
3165                                 log_debug_errno(r, "Failed to fstat() journal file '%s', ignoring: %m", f->path);
3166                 }
3167
3168                 /* The journal might have changed since the context object was created and we weren't
3169                  * watching before, hence don't wait for anything, and return immediately. */
3170                 return determine_change(j);
3171         }
3172
3173         r = sd_journal_get_timeout(j, &t);
3174         if (r < 0)
3175                 return r;
3176
3177         if (t != UINT64_MAX) {
3178                 t = usec_sub_unsigned(t, now(CLOCK_MONOTONIC));
3179
3180                 if (timeout_usec == UINT64_MAX || timeout_usec > t)
3181                         timeout_usec = t;
3182         }
3183
3184         do {
3185                 r = fd_wait_for_event(j->inotify_fd, POLLIN, timeout_usec);
3186         } while (r == -EINTR);
3187
3188         if (r < 0)
3189                 return r;
3190
3191         return sd_journal_process(j);
3192 }
3193
3194 _public_ int sd_journal_get_cutoff_realtime_usec(sd_journal *j, uint64_t *from, uint64_t *to) {
3195         JournalFile *f;
3196         bool first = true;
3197         uint64_t fmin = 0, tmax = 0;
3198         int r;
3199
3200         assert_return(j, -EINVAL);
3201         assert_return(!journal_origin_changed(j), -ECHILD);
3202         assert_return(from || to, -EINVAL);
3203         assert_return(from != to, -EINVAL);
3204
3205         ORDERED_HASHMAP_FOREACH(f, j->files) {
3206                 usec_t fr, t;
3207
3208                 r = journal_file_get_cutoff_realtime_usec(f, &fr, &t);
3209                 if (r == -ENOENT)
3210                         continue;
3211                 if (r < 0)
3212                         return r;
3213                 if (r == 0)
3214                         continue;
3215
3216                 if (first) {
3217                         fmin = fr;
3218                         tmax = t;
3219                         first = false;
3220                 } else {
3221                         fmin = MIN(fr, fmin);
3222                         tmax = MAX(t, tmax);
3223                 }
3224         }
3225
3226         if (from)
3227                 *from = fmin;
3228         if (to)
3229                 *to = tmax;
3230
3231         return first ? 0 : 1;
3232 }
3233
3234 _public_ int sd_journal_get_cutoff_monotonic_usec(
3235                 sd_journal *j,
3236                 sd_id128_t boot_id,
3237                 uint64_t *ret_from,
3238                 uint64_t *ret_to) {
3239
3240         uint64_t from = UINT64_MAX, to = UINT64_MAX;
3241         bool found = false;
3242         JournalFile *f;
3243         int r;
3244
3245         assert_return(j, -EINVAL);
3246         assert_return(!journal_origin_changed(j), -ECHILD);
3247         assert_return(ret_from != ret_to, -EINVAL);
3248
3249         ORDERED_HASHMAP_FOREACH(f, j->files) {
3250                 usec_t ff, tt;
3251
3252                 r = journal_file_get_cutoff_monotonic_usec(f, boot_id, &ff, &tt);
3253                 if (r == -ENOENT)
3254                         continue;
3255                 if (r < 0)
3256                         return r;
3257                 if (r == 0)
3258                         continue;
3259
3260                 if (found) {
3261                         from = MIN(ff, from);
3262                         to = MAX(tt, to);
3263                 } else {
3264                         from = ff;
3265                         to = tt;
3266                         found = true;
3267                 }
3268         }
3269
3270         if (ret_from)
3271                 *ret_from = from;
3272         if (ret_to)
3273                 *ret_to = to;
3274
3275         return found;
3276 }
3277
3278 void journal_print_header(sd_journal *j) {
3279         JournalFile *f;
3280         bool newline = false;
3281
3282         assert(j);
3283
3284         ORDERED_HASHMAP_FOREACH(f, j->files) {
3285                 if (newline)
3286                         putchar('\n');
3287                 else
3288                         newline = true;
3289
3290                 journal_file_print_header(f);
3291         }
3292 }
3293
3294 _public_ int sd_journal_get_usage(sd_journal *j, uint64_t *ret) {
3295         JournalFile *f;
3296         uint64_t sum = 0;
3297
3298         assert_return(j, -EINVAL);
3299         assert_return(!journal_origin_changed(j), -ECHILD);
3300         assert_return(ret, -EINVAL);
3301
3302         ORDERED_HASHMAP_FOREACH(f, j->files) {
3303                 struct stat st;
3304                 uint64_t b;
3305
3306                 if (fstat(f->fd, &st) < 0)
3307                         return -errno;
3308
3309                 b = (uint64_t) st.st_blocks;
3310                 if (b > UINT64_MAX / 512)
3311                         return -EOVERFLOW;
3312                 b *= 512;
3313
3314                 if (sum > UINT64_MAX - b)
3315                         return -EOVERFLOW;
3316                 sum += b;
3317         }
3318
3319         *ret = sum;
3320         return 0;
3321 }
3322
3323 _public_ int sd_journal_query_unique(sd_journal *j, const char *field) {
3324         int r;
3325
3326         assert_return(j, -EINVAL);
3327         assert_return(!journal_origin_changed(j), -ECHILD);
3328
3329         if (!field_is_valid(field))
3330                 return -EINVAL;
3331
3332         r = free_and_strdup(&j->unique_field, field);
3333         if (r < 0)
3334                 return r;
3335
3336         j->unique_file = NULL;
3337         j->unique_offset = 0;
3338         j->unique_file_lost = false;
3339
3340         return 0;
3341 }
3342
3343 _public_ int sd_journal_enumerate_unique(
3344                 sd_journal *j,
3345                 const void **ret_data,
3346                 size_t *ret_size) {
3347
3348         size_t k;
3349
3350         assert_return(j, -EINVAL);
3351         assert_return(!journal_origin_changed(j), -ECHILD);
3352         assert_return(j->unique_field, -EINVAL);
3353
3354         k = strlen(j->unique_field);
3355
3356         if (!j->unique_file) {
3357                 if (j->unique_file_lost)
3358                         return 0;
3359
3360                 j->unique_file = ordered_hashmap_first(j->files);
3361                 if (!j->unique_file)
3362                         return 0;
3363
3364                 j->unique_offset = 0;
3365         }
3366
3367         for (;;) {
3368                 JournalFile *of;
3369                 Object *o;
3370                 void *odata;
3371                 size_t ol;
3372                 bool found;
3373                 int r;
3374
3375                 /* Proceed to next data object in the field's linked list */
3376                 if (j->unique_offset == 0) {
3377                         r = journal_file_find_field_object(j->unique_file, j->unique_field, k, &o, NULL);
3378                         if (r < 0)
3379                                 return r;
3380
3381                         j->unique_offset = r > 0 ? le64toh(o->field.head_data_offset) : 0;
3382                 } else {
3383                         r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
3384                         if (r < 0)
3385                                 return r;
3386
3387                         j->unique_offset = le64toh(o->data.next_field_offset);
3388                 }
3389
3390                 /* We reached the end of the list? Then start again, with the next file */
3391                 if (j->unique_offset == 0) {
3392                         j->unique_file = ordered_hashmap_next(j->files, j->unique_file->path);
3393                         if (!j->unique_file)
3394                                 return 0;
3395
3396                         continue;
3397                 }
3398
3399                 r = journal_file_move_to_object(j->unique_file, OBJECT_DATA, j->unique_offset, &o);
3400                 if (r < 0)
3401                         return r;
3402
3403                 /* Let's pin the data object, so we can look at it at the same time as one on another file. */
3404                 r = journal_file_pin_object(j->unique_file, o);
3405                 if (r < 0)
3406                         return r;
3407
3408                 r = journal_file_data_payload(j->unique_file, o, j->unique_offset, NULL, 0,
3409                                               j->data_threshold, &odata, &ol);
3410                 if (r < 0)
3411                         return r;
3412
3413                 /* Check if we have at least the field name and "=". */
3414                 if (ol <= k)
3415                         return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
3416                                                "%s:offset " OFSfmt ": object has size %zu, expected at least %zu",
3417                                                j->unique_file->path,
3418                                                j->unique_offset, ol, k + 1);
3419
3420                 if (memcmp(odata, j->unique_field, k) != 0 || ((const char*) odata)[k] != '=')
3421                         return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
3422                                                "%s:offset " OFSfmt ": object does not start with \"%s=\"",
3423                                                j->unique_file->path,
3424                                                j->unique_offset,
3425                                                j->unique_field);
3426
3427                 /* OK, now let's see if we already returned this data object by checking if it exists in the
3428                  * earlier traversed files. */
3429                 found = false;
3430                 ORDERED_HASHMAP_FOREACH(of, j->files) {
3431                         if (of == j->unique_file)
3432                                 break;
3433
3434                         /* Skip this file it didn't have any fields indexed */
3435                         if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
3436                                 continue;
3437
3438                         /* We can reuse the hash from our current file only on old-style journal files
3439                          * without keyed hashes. On new-style files we have to calculate the hash anew, to
3440                          * take the per-file hash seed into consideration. */
3441                         if (!JOURNAL_HEADER_KEYED_HASH(j->unique_file->header) && !JOURNAL_HEADER_KEYED_HASH(of->header))
3442                                 r = journal_file_find_data_object_with_hash(of, odata, ol, le64toh(o->data.hash), NULL, NULL);
3443                         else
3444                                 r = journal_file_find_data_object(of, odata, ol, NULL, NULL);
3445                         if (r < 0)
3446                                 return r;
3447                         if (r > 0) {
3448                                 found = true;
3449                                 break;
3450                         }
3451                 }
3452
3453                 if (found)
3454                         continue;
3455
3456                 *ret_data = odata;
3457                 *ret_size = ol;
3458
3459                 return 1;
3460         }
3461 }
3462
3463 _public_ int sd_journal_enumerate_available_unique(sd_journal *j, const void **data, size_t *size) {
3464         for (;;) {
3465                 int r;
3466
3467                 r = sd_journal_enumerate_unique(j, data, size);
3468                 if (r >= 0)
3469                         return r;
3470                 if (!JOURNAL_ERRNO_IS_UNAVAILABLE_FIELD(r))
3471                         return r;
3472                 /* Try with the next field. sd_journal_enumerate_unique() modifies state, so on the next try
3473                  * we will access the next field. */
3474         }
3475 }
3476
3477 _public_ void sd_journal_restart_unique(sd_journal *j) {
3478         if (!j || journal_origin_changed(j))
3479                 return;
3480
3481         j->unique_file = NULL;
3482         j->unique_offset = 0;
3483         j->unique_file_lost = false;
3484 }
3485
3486 _public_ int sd_journal_enumerate_fields(sd_journal *j, const char **field) {
3487         int r;
3488
3489         assert_return(j, -EINVAL);
3490         assert_return(!journal_origin_changed(j), -ECHILD);
3491         assert_return(field, -EINVAL);
3492
3493         if (!j->fields_file) {
3494                 if (j->fields_file_lost)
3495                         return 0;
3496
3497                 j->fields_file = ordered_hashmap_first(j->files);
3498                 if (!j->fields_file)
3499                         return 0;
3500
3501                 j->fields_hash_table_index = 0;
3502                 j->fields_offset = 0;
3503         }
3504
3505         for (;;) {
3506                 JournalFile *f, *of;
3507                 uint64_t m;
3508                 Object *o;
3509                 size_t sz;
3510                 bool found;
3511
3512                 f = j->fields_file;
3513
3514                 if (j->fields_offset == 0) {
3515                         bool eof = false;
3516
3517                         /* We are not yet positioned at any field. Let's pick the first one */
3518                         r = journal_file_map_field_hash_table(f);
3519                         if (r < 0)
3520                                 return r;
3521
3522                         m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
3523                         for (;;) {
3524                                 if (j->fields_hash_table_index >= m) {
3525                                         /* Reached the end of the hash table, go to the next file. */
3526                                         eof = true;
3527                                         break;
3528                                 }
3529
3530                                 j->fields_offset = le64toh(f->field_hash_table[j->fields_hash_table_index].head_hash_offset);
3531
3532                                 if (j->fields_offset != 0)
3533                                         break;
3534
3535                                 /* Empty hash table bucket, go to next one */
3536                                 j->fields_hash_table_index++;
3537                         }
3538
3539                         if (eof) {
3540                                 /* Proceed with next file */
3541                                 j->fields_file = ordered_hashmap_next(j->files, f->path);
3542                                 if (!j->fields_file) {
3543                                         *field = NULL;
3544                                         return 0;
3545                                 }
3546
3547                                 j->fields_offset = 0;
3548                                 j->fields_hash_table_index = 0;
3549                                 continue;
3550                         }
3551
3552                 } else {
3553                         /* We are already positioned at a field. If so, let's figure out the next field from it */
3554
3555                         r = journal_file_move_to_object(f, OBJECT_FIELD, j->fields_offset, &o);
3556                         if (r < 0)
3557                                 return r;
3558
3559                         j->fields_offset = le64toh(o->field.next_hash_offset);
3560                         if (j->fields_offset == 0) {
3561                                 /* Reached the end of the hash table chain */
3562                                 j->fields_hash_table_index++;
3563                                 continue;
3564                         }
3565                 }
3566
3567                 /* We use OBJECT_UNUSED here, so that the iterator below doesn't remove our mmap window */
3568                 r = journal_file_move_to_object(f, OBJECT_UNUSED, j->fields_offset, &o);
3569                 if (r < 0)
3570                         return r;
3571
3572                 /* Because we used OBJECT_UNUSED above, we need to do our type check manually */
3573                 if (o->object.type != OBJECT_FIELD)
3574                         return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
3575                                                "%s:offset " OFSfmt ": object has type %i, expected %i",
3576                                                f->path, j->fields_offset,
3577                                                o->object.type, OBJECT_FIELD);
3578
3579                 sz = le64toh(o->object.size) - offsetof(Object, field.payload);
3580
3581                 /* Let's see if we already returned this field name before. */
3582                 found = false;
3583                 ORDERED_HASHMAP_FOREACH(of, j->files) {
3584                         if (of == f)
3585                                 break;
3586
3587                         /* Skip this file it didn't have any fields indexed */
3588                         if (JOURNAL_HEADER_CONTAINS(of->header, n_fields) && le64toh(of->header->n_fields) <= 0)
3589                                 continue;
3590
3591                         if (!JOURNAL_HEADER_KEYED_HASH(f->header) && !JOURNAL_HEADER_KEYED_HASH(of->header))
3592                                 r = journal_file_find_field_object_with_hash(of, o->field.payload, sz,
3593                                                                              le64toh(o->field.hash), NULL, NULL);
3594                         else
3595                                 r = journal_file_find_field_object(of, o->field.payload, sz, NULL, NULL);
3596                         if (r < 0)
3597                                 return r;
3598                         if (r > 0) {
3599                                 found = true;
3600                                 break;
3601                         }
3602                 }
3603
3604                 if (found)
3605                         continue;
3606
3607                 /* Check if this is really a valid string containing no NUL byte */
3608                 if (memchr(o->field.payload, 0, sz))
3609                         return -EBADMSG;
3610
3611                 if (j->data_threshold > 0 && sz > j->data_threshold)
3612                         sz = j->data_threshold;
3613
3614                 if (!GREEDY_REALLOC(j->fields_buffer, sz + 1))
3615                         return -ENOMEM;
3616
3617                 memcpy(j->fields_buffer, o->field.payload, sz);
3618                 j->fields_buffer[sz] = 0;
3619
3620                 if (!field_is_valid(j->fields_buffer))
3621                         return -EBADMSG;
3622
3623                 *field = j->fields_buffer;
3624                 return 1;
3625         }
3626 }
3627
3628 _public_ void sd_journal_restart_fields(sd_journal *j) {
3629         if (!j || journal_origin_changed(j))
3630                 return;
3631
3632         j->fields_file = NULL;
3633         j->fields_hash_table_index = 0;
3634         j->fields_offset = 0;
3635         j->fields_file_lost = false;
3636 }
3637
3638 _public_ int sd_journal_reliable_fd(sd_journal *j) {
3639         assert_return(j, -EINVAL);
3640         assert_return(!journal_origin_changed(j), -ECHILD);
3641
3642         return !j->on_network;
3643 }
3644
3645 static char *lookup_field(const char *field, void *userdata) {
3646         sd_journal *j = ASSERT_PTR(userdata);
3647         const void *data;
3648         size_t size, d;
3649         int r;
3650
3651         assert(field);
3652
3653         r = sd_journal_get_data(j, field, &data, &size);
3654         if (r < 0 ||
3655             size > REPLACE_VAR_MAX)
3656                 return strdup(field);
3657
3658         d = strlen(field) + 1;
3659
3660         return strndup((const char*) data + d, size - d);
3661 }
3662
3663 _public_ int sd_journal_get_catalog(sd_journal *j, char **ret) {
3664         const void *data;
3665         size_t size;
3666         sd_id128_t id;
3667         _cleanup_free_ char *text = NULL, *cid = NULL;
3668         char *t;
3669         int r;
3670
3671         assert_return(j, -EINVAL);
3672         assert_return(!journal_origin_changed(j), -ECHILD);
3673         assert_return(ret, -EINVAL);
3674
3675         r = sd_journal_get_data(j, "MESSAGE_ID", &data, &size);
3676         if (r < 0)
3677                 return r;
3678
3679         cid = strndup((const char*) data + 11, size - 11);
3680         if (!cid)
3681                 return -ENOMEM;
3682
3683         r = sd_id128_from_string(cid, &id);
3684         if (r < 0)
3685                 return r;
3686
3687         r = catalog_get(secure_getenv("SYSTEMD_CATALOG") ?: CATALOG_DATABASE, id, &text);
3688         if (r < 0)
3689                 return r;
3690
3691         t = replace_var(text, lookup_field, j);
3692         if (!t)
3693                 return -ENOMEM;
3694
3695         *ret = t;
3696         return 0;
3697 }
3698
3699 _public_ int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **ret) {
3700         assert_return(ret, -EINVAL);
3701
3702         return catalog_get(CATALOG_DATABASE, id, ret);
3703 }
3704
3705 _public_ int sd_journal_set_data_threshold(sd_journal *j, size_t sz) {
3706         assert_return(j, -EINVAL);
3707         assert_return(!journal_origin_changed(j), -ECHILD);
3708
3709         j->data_threshold = sz;
3710         return 0;
3711 }
3712
3713 _public_ int sd_journal_get_data_threshold(sd_journal *j, size_t *sz) {
3714         assert_return(j, -EINVAL);
3715         assert_return(!journal_origin_changed(j), -ECHILD);
3716         assert_return(sz, -EINVAL);
3717
3718         *sz = j->data_threshold;
3719         return 0;
3720 }
3721
3722 _public_ int sd_journal_has_runtime_files(sd_journal *j) {
3723         assert_return(j, -EINVAL);
3724
3725         return j->has_runtime_files;
3726 }
3727
3728 _public_ int sd_journal_has_persistent_files(sd_journal *j) {
3729         assert_return(j, -EINVAL);
3730
3731         return j->has_persistent_files;
3732 }