src/journal/journald-server.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   Copyright 2011 Lennart Poettering
   4 ***/
   5
   6 #if HAVE_SELINUX
   7 #include <selinux/selinux.h>
   8 #endif
   9 #include <sys/ioctl.h>
  10 #include <sys/mman.h>
  11 #include <sys/signalfd.h>
  12 #include <sys/statvfs.h>
  13 #include <linux/sockios.h>
  14
  15 #include "libudev.h"
  16 #include "sd-daemon.h"
  17 #include "sd-journal.h"
  18 #include "sd-messages.h"
  19
  20 #include "acl-util.h"
  21 #include "alloc-util.h"
  22 #include "audit-util.h"
  23 #include "cgroup-util.h"
  24 #include "conf-parser.h"
  25 #include "dirent-util.h"
  26 #include "extract-word.h"
  27 #include "fd-util.h"
  28 #include "fileio.h"
  29 #include "format-util.h"
  30 #include "fs-util.h"
  31 #include "hashmap.h"
  32 #include "hostname-util.h"
  33 #include "id128-util.h"
  34 #include "io-util.h"
  35 #include "journal-authenticate.h"
  36 #include "journal-file.h"
  37 #include "journal-internal.h"
  38 #include "journal-vacuum.h"
  39 #include "journald-audit.h"
  40 #include "journald-context.h"
  41 #include "journald-kmsg.h"
  42 #include "journald-native.h"
  43 #include "journald-rate-limit.h"
  44 #include "journald-server.h"
  45 #include "journald-stream.h"
  46 #include "journald-syslog.h"
  47 #include "log.h"
  48 #include "missing.h"
  49 #include "mkdir.h"
  50 #include "parse-util.h"
  51 #include "proc-cmdline.h"
  52 #include "process-util.h"
  53 #include "rm-rf.h"
  54 #include "selinux-util.h"
  55 #include "signal-util.h"
  56 #include "socket-util.h"
  57 #include "stdio-util.h"
  58 #include "string-table.h"
  59 #include "string-util.h"
  60 #include "syslog-util.h"
  61 #include "user-util.h"
  62
  63 #define USER_JOURNALS_MAX 1024
  64
  65 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
  66 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
  67 #define DEFAULT_RATE_LIMIT_BURST 10000
  68 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
  69
  70 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
  71
  72 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
  73
  74 /* The period to insert between posting changes for coalescing */
  75 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
  76
  77 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
  78  * for a bit of additional metadata. */
  79 #define DEFAULT_LINE_MAX (48*1024)
  80
  81 static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
  82         _cleanup_closedir_ DIR *d = NULL;
  83         struct dirent *de;
  84         struct statvfs ss;
  85
  86         assert(ret_used);
  87         assert(ret_free);
  88
  89         d = opendir(path);
  90         if (!d)
  91                 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
  92                                       errno, "Failed to open %s: %m", path);
  93
  94         if (fstatvfs(dirfd(d), &ss) < 0)
  95                 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
  96
  97         *ret_free = ss.f_bsize * ss.f_bavail;
  98         *ret_used = 0;
  99         FOREACH_DIRENT_ALL(de, d, break) {
 100                 struct stat st;
 101
 102                 if (!endswith(de->d_name, ".journal") &&
 103                     !endswith(de->d_name, ".journal~"))
 104                         continue;
 105
 106                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
 107                         log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
 108                         continue;
 109                 }
 110
 111                 if (!S_ISREG(st.st_mode))
 112                         continue;
 113
 114                 *ret_used += (uint64_t) st.st_blocks * 512UL;
 115         }
 116
 117         return 0;
 118 }
 119
 120 static void cache_space_invalidate(JournalStorageSpace *space) {
 121         zero(*space);
 122 }
 123
 124 static int cache_space_refresh(Server *s, JournalStorage *storage) {
 125         JournalStorageSpace *space;
 126         JournalMetrics *metrics;
 127         uint64_t vfs_used, vfs_avail, avail;
 128         usec_t ts;
 129         int r;
 130
 131         assert(s);
 132
 133         metrics = &storage->metrics;
 134         space = &storage->space;
 135
 136         ts = now(CLOCK_MONOTONIC);
 137
 138         if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
 139                 return 0;
 140
 141         r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
 142         if (r < 0)
 143                 return r;
 144
 145         space->vfs_used = vfs_used;
 146         space->vfs_available = vfs_avail;
 147
 148         avail = LESS_BY(vfs_avail, metrics->keep_free);
 149
 150         space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
 151         space->available = LESS_BY(space->limit, vfs_used);
 152         space->timestamp = ts;
 153         return 1;
 154 }
 155
 156 static void patch_min_use(JournalStorage *storage) {
 157         assert(storage);
 158
 159         /* Let's bump the min_use limit to the current usage on disk. We do
 160          * this when starting up and first opening the journal files. This way
 161          * sudden spikes in disk usage will not cause journald to vacuum files
 162          * without bounds. Note that this means that only a restart of journald
 163          * will make it reset this value. */
 164
 165         storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
 166 }
 167
 168 static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
 169         JournalStorage *js;
 170         int r;
 171
 172         assert(s);
 173
 174         js = s->system_journal ? &s->system_storage : &s->runtime_storage;
 175
 176         r = cache_space_refresh(s, js);
 177         if (r >= 0) {
 178                 if (available)
 179                         *available = js->space.available;
 180                 if (limit)
 181                         *limit = js->space.limit;
 182         }
 183         return r;
 184 }
 185
 186 void server_space_usage_message(Server *s, JournalStorage *storage) {
 187         char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
 188              fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
 189         JournalMetrics *metrics;
 190
 191         assert(s);
 192
 193         if (!storage)
 194                 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
 195
 196         if (cache_space_refresh(s, storage) < 0)
 197                 return;
 198
 199         metrics = &storage->metrics;
 200         format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
 201         format_bytes(fb2, sizeof(fb2), metrics->max_use);
 202         format_bytes(fb3, sizeof(fb3), metrics->keep_free);
 203         format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
 204         format_bytes(fb5, sizeof(fb5), storage->space.limit);
 205         format_bytes(fb6, sizeof(fb6), storage->space.available);
 206
 207         server_driver_message(s, 0,
 208                               "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
 209                               LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
 210                                           storage->name, storage->path, fb1, fb5, fb6),
 211                               "JOURNAL_NAME=%s", storage->name,
 212                               "JOURNAL_PATH=%s", storage->path,
 213                               "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
 214                               "CURRENT_USE_PRETTY=%s", fb1,
 215                               "MAX_USE=%"PRIu64, metrics->max_use,
 216                               "MAX_USE_PRETTY=%s", fb2,
 217                               "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
 218                               "DISK_KEEP_FREE_PRETTY=%s", fb3,
 219                               "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
 220                               "DISK_AVAILABLE_PRETTY=%s", fb4,
 221                               "LIMIT=%"PRIu64, storage->space.limit,
 222                               "LIMIT_PRETTY=%s", fb5,
 223                               "AVAILABLE=%"PRIu64, storage->space.available,
 224                               "AVAILABLE_PRETTY=%s", fb6,
 225                               NULL);
 226 }
 227
 228 static bool uid_for_system_journal(uid_t uid) {
 229
 230         /* Returns true if the specified UID shall get its data stored in the system journal*/
 231
 232         return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY;
 233 }
 234
 235 static void server_add_acls(JournalFile *f, uid_t uid) {
 236 #if HAVE_ACL
 237         int r;
 238 #endif
 239         assert(f);
 240
 241 #if HAVE_ACL
 242         if (uid_for_system_journal(uid))
 243                 return;
 244
 245         r = add_acls_for_user(f->fd, uid);
 246         if (r < 0)
 247                 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
 248 #endif
 249 }
 250
 251 static int open_journal(
 252                 Server *s,
 253                 bool reliably,
 254                 const char *fname,
 255                 int flags,
 256                 bool seal,
 257                 JournalMetrics *metrics,
 258                 JournalFile **ret) {
 259         int r;
 260         JournalFile *f;
 261
 262         assert(s);
 263         assert(fname);
 264         assert(ret);
 265
 266         if (reliably)
 267                 r = journal_file_open_reliably(fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes,
 268                                                seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
 269         else
 270                 r = journal_file_open(-1, fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes, seal,
 271                                       metrics, s->mmap, s->deferred_closes, NULL, &f);
 272
 273         if (r < 0)
 274                 return r;
 275
 276         r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
 277         if (r < 0) {
 278                 (void) journal_file_close(f);
 279                 return r;
 280         }
 281
 282         *ret = f;
 283         return r;
 284 }
 285
 286 static bool flushed_flag_is_set(void) {
 287         return access("/run/systemd/journal/flushed", F_OK) >= 0;
 288 }
 289
 290 static int system_journal_open(Server *s, bool flush_requested) {
 291         const char *fn;
 292         int r = 0;
 293
 294         if (!s->system_journal &&
 295             IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
 296             (flush_requested || flushed_flag_is_set())) {
 297
 298                 /* If in auto mode: first try to create the machine
 299                  * path, but not the prefix.
 300                  *
 301                  * If in persistent mode: create /var/log/journal and
 302                  * the machine path */
 303
 304                 if (s->storage == STORAGE_PERSISTENT)
 305                         (void) mkdir_p("/var/log/journal/", 0755);
 306
 307                 (void) mkdir(s->system_storage.path, 0755);
 308
 309                 fn = strjoina(s->system_storage.path, "/system.journal");
 310                 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
 311                 if (r >= 0) {
 312                         server_add_acls(s->system_journal, 0);
 313                         (void) cache_space_refresh(s, &s->system_storage);
 314                         patch_min_use(&s->system_storage);
 315                 } else if (r < 0) {
 316                         if (!IN_SET(r, -ENOENT, -EROFS))
 317                                 log_warning_errno(r, "Failed to open system journal: %m");
 318
 319                         r = 0;
 320                 }
 321
 322                 /* If the runtime journal is open, and we're post-flush, we're
 323                  * recovering from a failed system journal rotate (ENOSPC)
 324                  * for which the runtime journal was reopened.
 325                  *
 326                  * Perform an implicit flush to var, leaving the runtime
 327                  * journal closed, now that the system journal is back.
 328                  */
 329                 if (!flush_requested)
 330                         (void) server_flush_to_var(s, true);
 331         }
 332
 333         if (!s->runtime_journal &&
 334             (s->storage != STORAGE_NONE)) {
 335
 336                 fn = strjoina(s->runtime_storage.path, "/system.journal");
 337
 338                 if (s->system_journal) {
 339
 340                         /* Try to open the runtime journal, but only
 341                          * if it already exists, so that we can flush
 342                          * it into the system journal */
 343
 344                         r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
 345                         if (r < 0) {
 346                                 if (r != -ENOENT)
 347                                         log_warning_errno(r, "Failed to open runtime journal: %m");
 348
 349                                 r = 0;
 350                         }
 351
 352                 } else {
 353
 354                         /* OK, we really need the runtime journal, so create
 355                          * it if necessary. */
 356
 357                         (void) mkdir("/run/log", 0755);
 358                         (void) mkdir("/run/log/journal", 0755);
 359                         (void) mkdir_parents(fn, 0750);
 360
 361                         r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
 362                         if (r < 0)
 363                                 return log_error_errno(r, "Failed to open runtime journal: %m");
 364                 }
 365
 366                 if (s->runtime_journal) {
 367                         server_add_acls(s->runtime_journal, 0);
 368                         (void) cache_space_refresh(s, &s->runtime_storage);
 369                         patch_min_use(&s->runtime_storage);
 370                 }
 371         }
 372
 373         return r;
 374 }
 375
 376 static JournalFile* find_journal(Server *s, uid_t uid) {
 377         _cleanup_free_ char *p = NULL;
 378         int r;
 379         JournalFile *f;
 380         sd_id128_t machine;
 381
 382         assert(s);
 383
 384         /* A rotate that fails to create the new journal (ENOSPC) leaves the
 385          * rotated journal as NULL.  Unless we revisit opening, even after
 386          * space is made available we'll continue to return NULL indefinitely.
 387          *
 388          * system_journal_open() is a noop if the journals are already open, so
 389          * we can just call it here to recover from failed rotates (or anything
 390          * else that's left the journals as NULL).
 391          *
 392          * Fixes https://github.com/systemd/systemd/issues/3968 */
 393         (void) system_journal_open(s, false);
 394
 395         /* We split up user logs only on /var, not on /run. If the
 396          * runtime file is open, we write to it exclusively, in order
 397          * to guarantee proper order as soon as we flush /run to
 398          * /var and close the runtime file. */
 399
 400         if (s->runtime_journal)
 401                 return s->runtime_journal;
 402
 403         if (uid_for_system_journal(uid))
 404                 return s->system_journal;
 405
 406         r = sd_id128_get_machine(&machine);
 407         if (r < 0)
 408                 return s->system_journal;
 409
 410         f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
 411         if (f)
 412                 return f;
 413
 414         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
 415                      SD_ID128_FORMAT_VAL(machine), uid) < 0)
 416                 return s->system_journal;
 417
 418         while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
 419                 /* Too many open? Then let's close one */
 420                 f = ordered_hashmap_steal_first(s->user_journals);
 421                 assert(f);
 422                 (void) journal_file_close(f);
 423         }
 424
 425         r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
 426         if (r < 0)
 427                 return s->system_journal;
 428
 429         server_add_acls(f, uid);
 430
 431         r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
 432         if (r < 0) {
 433                 (void) journal_file_close(f);
 434                 return s->system_journal;
 435         }
 436
 437         return f;
 438 }
 439
 440 static int do_rotate(
 441                 Server *s,
 442                 JournalFile **f,
 443                 const char* name,
 444                 bool seal,
 445                 uint32_t uid) {
 446
 447         int r;
 448         assert(s);
 449
 450         if (!*f)
 451                 return -EINVAL;
 452
 453         r = journal_file_rotate(f, s->compress.enabled, s->compress.threshold_bytes, seal, s->deferred_closes);
 454         if (r < 0) {
 455                 if (*f)
 456                         return log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
 457                 else
 458                         return log_error_errno(r, "Failed to create new %s journal: %m", name);
 459         }
 460
 461         server_add_acls(*f, uid);
 462
 463         return r;
 464 }
 465
 466 void server_rotate(Server *s) {
 467         JournalFile *f;
 468         void *k;
 469         Iterator i;
 470         int r;
 471
 472         log_debug("Rotating...");
 473
 474         (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
 475         (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
 476
 477         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
 478                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
 479                 if (r >= 0)
 480                         ordered_hashmap_replace(s->user_journals, k, f);
 481                 else if (!f)
 482                         /* Old file has been closed and deallocated */
 483                         ordered_hashmap_remove(s->user_journals, k);
 484         }
 485
 486         /* Perform any deferred closes which aren't still offlining. */
 487         SET_FOREACH(f, s->deferred_closes, i)
 488                 if (!journal_file_is_offlining(f)) {
 489                         (void) set_remove(s->deferred_closes, f);
 490                         (void) journal_file_close(f);
 491                 }
 492 }
 493
 494 void server_sync(Server *s) {
 495         JournalFile *f;
 496         Iterator i;
 497         int r;
 498
 499         if (s->system_journal) {
 500                 r = journal_file_set_offline(s->system_journal, false);
 501                 if (r < 0)
 502                         log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
 503         }
 504
 505         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
 506                 r = journal_file_set_offline(f, false);
 507                 if (r < 0)
 508                         log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
 509         }
 510
 511         if (s->sync_event_source) {
 512                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
 513                 if (r < 0)
 514                         log_error_errno(r, "Failed to disable sync timer source: %m");
 515         }
 516
 517         s->sync_scheduled = false;
 518 }
 519
 520 static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
 521
 522         int r;
 523
 524         assert(s);
 525         assert(storage);
 526
 527         (void) cache_space_refresh(s, storage);
 528
 529         if (verbose)
 530                 server_space_usage_message(s, storage);
 531
 532         r = journal_directory_vacuum(storage->path, storage->space.limit,
 533                                      storage->metrics.n_max_files, s->max_retention_usec,
 534                                      &s->oldest_file_usec, verbose);
 535         if (r < 0 && r != -ENOENT)
 536                 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
 537
 538         cache_space_invalidate(&storage->space);
 539 }
 540
 541 int server_vacuum(Server *s, bool verbose) {
 542         assert(s);
 543
 544         log_debug("Vacuuming...");
 545
 546         s->oldest_file_usec = 0;
 547
 548         if (s->system_journal)
 549                 do_vacuum(s, &s->system_storage, verbose);
 550         if (s->runtime_journal)
 551                 do_vacuum(s, &s->runtime_storage, verbose);
 552
 553         return 0;
 554 }
 555
 556 static void server_cache_machine_id(Server *s) {
 557         sd_id128_t id;
 558         int r;
 559
 560         assert(s);
 561
 562         r = sd_id128_get_machine(&id);
 563         if (r < 0)
 564                 return;
 565
 566         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
 567 }
 568
 569 static void server_cache_boot_id(Server *s) {
 570         sd_id128_t id;
 571         int r;
 572
 573         assert(s);
 574
 575         r = sd_id128_get_boot(&id);
 576         if (r < 0)
 577                 return;
 578
 579         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
 580 }
 581
 582 static void server_cache_hostname(Server *s) {
 583         _cleanup_free_ char *t = NULL;
 584         char *x;
 585
 586         assert(s);
 587
 588         t = gethostname_malloc();
 589         if (!t)
 590                 return;
 591
 592         x = strappend("_HOSTNAME=", t);
 593         if (!x)
 594                 return;
 595
 596         free(s->hostname_field);
 597         s->hostname_field = x;
 598 }
 599
 600 static bool shall_try_append_again(JournalFile *f, int r) {
 601         switch(r) {
 602
 603         case -E2BIG:           /* Hit configured limit          */
 604         case -EFBIG:           /* Hit fs limit                  */
 605         case -EDQUOT:          /* Quota limit hit               */
 606         case -ENOSPC:          /* Disk full                     */
 607                 log_debug("%s: Allocation limit reached, rotating.", f->path);
 608                 return true;
 609
 610         case -EIO:             /* I/O error of some kind (mmap) */
 611                 log_warning("%s: IO error, rotating.", f->path);
 612                 return true;
 613
 614         case -EHOSTDOWN:       /* Other machine                 */
 615                 log_info("%s: Journal file from other machine, rotating.", f->path);
 616                 return true;
 617
 618         case -EBUSY:           /* Unclean shutdown              */
 619                 log_info("%s: Unclean shutdown, rotating.", f->path);
 620                 return true;
 621
 622         case -EPROTONOSUPPORT: /* Unsupported feature           */
 623                 log_info("%s: Unsupported feature, rotating.", f->path);
 624                 return true;
 625
 626         case -EBADMSG:         /* Corrupted                     */
 627         case -ENODATA:         /* Truncated                     */
 628         case -ESHUTDOWN:       /* Already archived              */
 629                 log_warning("%s: Journal file corrupted, rotating.", f->path);
 630                 return true;
 631
 632         case -EIDRM:           /* Journal file has been deleted */
 633                 log_warning("%s: Journal file has been deleted, rotating.", f->path);
 634                 return true;
 635
 636         case -ETXTBSY:         /* Journal file is from the future */
 637                 log_warning("%s: Journal file is from the future, rotating.", f->path);
 638                 return true;
 639
 640         default:
 641                 return false;
 642         }
 643 }
 644
 645 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n, int priority) {
 646         bool vacuumed = false, rotate = false;
 647         struct dual_timestamp ts;
 648         JournalFile *f;
 649         int r;
 650
 651         assert(s);
 652         assert(iovec);
 653         assert(n > 0);
 654
 655         /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
 656          * the source time, and not even the time the event was originally seen, but instead simply the time we started
 657          * processing it, as we want strictly linear ordering in what we write out.) */
 658         assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
 659         assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
 660
 661         if (ts.realtime < s->last_realtime_clock) {
 662                 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
 663                  * regular operation. However, when it does happen, then we should make sure that we start fresh files
 664                  * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
 665                  * bisection works correctly. */
 666
 667                 log_debug("Time jumped backwards, rotating.");
 668                 rotate = true;
 669         } else {
 670
 671                 f = find_journal(s, uid);
 672                 if (!f)
 673                         return;
 674
 675                 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
 676                         log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
 677                         rotate = true;
 678                 }
 679         }
 680
 681         if (rotate) {
 682                 server_rotate(s);
 683                 server_vacuum(s, false);
 684                 vacuumed = true;
 685
 686                 f = find_journal(s, uid);
 687                 if (!f)
 688                         return;
 689         }
 690
 691         s->last_realtime_clock = ts.realtime;
 692
 693         r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
 694         if (r >= 0) {
 695                 server_schedule_sync(s, priority);
 696                 return;
 697         }
 698
 699         if (vacuumed || !shall_try_append_again(f, r)) {
 700                 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
 701                 return;
 702         }
 703
 704         server_rotate(s);
 705         server_vacuum(s, false);
 706
 707         f = find_journal(s, uid);
 708         if (!f)
 709                 return;
 710
 711         log_debug("Retrying write.");
 712         r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
 713         if (r < 0)
 714                 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
 715         else
 716                 server_schedule_sync(s, priority);
 717 }
 718
 719 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field)  \
 720         if (isset(value)) {                                             \
 721                 char *k;                                                \
 722                 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
 723                 sprintf(k, field "=" format, value);                    \
 724                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 725         }
 726
 727 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field)                  \
 728         if (!isempty(value)) {                                          \
 729                 char *k;                                                \
 730                 k = strjoina(field "=", value);                         \
 731                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 732         }
 733
 734 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field)                   \
 735         if (!sd_id128_is_null(value)) {                                 \
 736                 char *k;                                                \
 737                 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
 738                 sd_id128_to_string(value, stpcpy(k, field "="));        \
 739                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 740         }
 741
 742 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field)       \
 743         if (value_size > 0) {                                           \
 744                 char *k;                                                \
 745                 k = newa(char, STRLEN(field "=") + value_size + 1);     \
 746                 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
 747                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 748         }                                                               \
 749
 750 static void dispatch_message_real(
 751                 Server *s,
 752                 struct iovec *iovec, size_t n, size_t m,
 753                 const ClientContext *c,
 754                 const struct timeval *tv,
 755                 int priority,
 756                 pid_t object_pid) {
 757
 758         char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
 759         uid_t journal_uid;
 760         ClientContext *o;
 761
 762         assert(s);
 763         assert(iovec);
 764         assert(n > 0);
 765         assert(n +
 766                N_IOVEC_META_FIELDS +
 767                (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) +
 768                client_context_extra_fields_n_iovec(c) <= m);
 769
 770         if (c) {
 771                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
 772                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
 773                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
 774
 775                 IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM");
 776                 IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE");
 777                 IOVEC_ADD_STRING_FIELD(iovec, n, c->cmdline, "_CMDLINE");
 778                 IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE");
 779
 780                 IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
 781
 782                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
 783                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
 784
 785                 IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP");
 786                 IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
 787                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
 788                 IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT");
 789                 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
 790                 IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
 791                 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
 792
 793                 IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
 794
 795                 if (c->extra_fields_n_iovec > 0) {
 796                         memcpy(iovec + n, c->extra_fields_iovec, c->extra_fields_n_iovec * sizeof(struct iovec));
 797                         n += c->extra_fields_n_iovec;
 798                 }
 799         }
 800
 801         assert(n <= m);
 802
 803         if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
 804
 805                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
 806                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
 807                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
 808
 809                 IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
 810                 IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
 811                 IOVEC_ADD_STRING_FIELD(iovec, n, o->cmdline, "OBJECT_CMDLINE");
 812                 IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
 813
 814                 IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
 815
 816                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
 817                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
 818
 819                 IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
 820                 IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
 821                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
 822                 IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
 823                 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
 824                 IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
 825                 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
 826
 827                 IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
 828         }
 829
 830         assert(n <= m);
 831
 832         if (tv) {
 833                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
 834                 iovec[n++] = IOVEC_MAKE_STRING(source_time);
 835         }
 836
 837         /* Note that strictly speaking storing the boot id here is
 838          * redundant since the entry includes this in-line
 839          * anyway. However, we need this indexed, too. */
 840         if (!isempty(s->boot_id_field))
 841                 iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
 842
 843         if (!isempty(s->machine_id_field))
 844                 iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
 845
 846         if (!isempty(s->hostname_field))
 847                 iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
 848
 849         assert(n <= m);
 850
 851         if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
 852                 /* Split up strictly by (non-root) UID */
 853                 journal_uid = c->uid;
 854         else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
 855                 /* Split up by login UIDs.  We do this only if the
 856                  * realuid is not root, in order not to accidentally
 857                  * leak privileged information to the user that is
 858                  * logged by a privileged process that is part of an
 859                  * unprivileged session. */
 860                 journal_uid = c->owner_uid;
 861         else
 862                 journal_uid = 0;
 863
 864         write_to_journal(s, journal_uid, iovec, n, priority);
 865 }
 866
 867 void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) {
 868
 869         struct iovec *iovec;
 870         size_t n = 0, k, m;
 871         va_list ap;
 872         int r;
 873
 874         assert(s);
 875         assert(format);
 876
 877         m = N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS + client_context_extra_fields_n_iovec(s->my_context) + N_IOVEC_OBJECT_FIELDS;
 878         iovec = newa(struct iovec, m);
 879
 880         assert_cc(3 == LOG_FAC(LOG_DAEMON));
 881         iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
 882         iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
 883
 884         iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
 885         assert_cc(6 == LOG_INFO);
 886         iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
 887
 888         if (message_id)
 889                 iovec[n++] = IOVEC_MAKE_STRING(message_id);
 890         k = n;
 891
 892         va_start(ap, format);
 893         r = log_format_iovec(iovec, m, &n, false, 0, format, ap);
 894         /* Error handling below */
 895         va_end(ap);
 896
 897         if (r >= 0)
 898                 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
 899
 900         while (k < n)
 901                 free(iovec[k++].iov_base);
 902
 903         if (r < 0) {
 904                 /* We failed to format the message. Emit a warning instead. */
 905                 char buf[LINE_MAX];
 906
 907                 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
 908
 909                 n = 3;
 910                 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
 911                 iovec[n++] = IOVEC_MAKE_STRING(buf);
 912                 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
 913         }
 914 }
 915
 916 void server_dispatch_message(
 917                 Server *s,
 918                 struct iovec *iovec, size_t n, size_t m,
 919                 ClientContext *c,
 920                 const struct timeval *tv,
 921                 int priority,
 922                 pid_t object_pid) {
 923
 924         uint64_t available = 0;
 925         int rl;
 926
 927         assert(s);
 928         assert(iovec || n == 0);
 929
 930         if (n == 0)
 931                 return;
 932
 933         if (LOG_PRI(priority) > s->max_level_store)
 934                 return;
 935
 936         /* Stop early in case the information will not be stored
 937          * in a journal. */
 938         if (s->storage == STORAGE_NONE)
 939                 return;
 940
 941         if (c && c->unit) {
 942                 (void) determine_space(s, &available, NULL);
 943
 944                 rl = journal_rate_limit_test(s->rate_limit, c->unit, priority & LOG_PRIMASK, available);
 945                 if (rl == 0)
 946                         return;
 947
 948                 /* Write a suppression message if we suppressed something */
 949                 if (rl > 1)
 950                         server_driver_message(s, c->pid,
 951                                               "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
 952                                               LOG_MESSAGE("Suppressed %i messages from %s", rl - 1, c->unit),
 953                                               "N_DROPPED=%i", rl - 1,
 954                                               NULL);
 955         }
 956
 957         dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
 958 }
 959
 960 int server_flush_to_var(Server *s, bool require_flag_file) {
 961         sd_id128_t machine;
 962         sd_journal *j = NULL;
 963         char ts[FORMAT_TIMESPAN_MAX];
 964         usec_t start;
 965         unsigned n = 0;
 966         int r;
 967
 968         assert(s);
 969
 970         if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
 971                 return 0;
 972
 973         if (!s->runtime_journal)
 974                 return 0;
 975
 976         if (require_flag_file && !flushed_flag_is_set())
 977                 return 0;
 978
 979         (void) system_journal_open(s, true);
 980
 981         if (!s->system_journal)
 982                 return 0;
 983
 984         log_debug("Flushing to /var...");
 985
 986         start = now(CLOCK_MONOTONIC);
 987
 988         r = sd_id128_get_machine(&machine);
 989         if (r < 0)
 990                 return r;
 991
 992         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
 993         if (r < 0)
 994                 return log_error_errno(r, "Failed to read runtime journal: %m");
 995
 996         sd_journal_set_data_threshold(j, 0);
 997
 998         SD_JOURNAL_FOREACH(j) {
 999                 Object *o = NULL;
1000                 JournalFile *f;
1001
1002                 f = j->current_file;
1003                 assert(f && f->current_offset > 0);
1004
1005                 n++;
1006
1007                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1008                 if (r < 0) {
1009                         log_error_errno(r, "Can't read entry: %m");
1010                         goto finish;
1011                 }
1012
1013                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
1014                 if (r >= 0)
1015                         continue;
1016
1017                 if (!shall_try_append_again(s->system_journal, r)) {
1018                         log_error_errno(r, "Can't write entry: %m");
1019                         goto finish;
1020                 }
1021
1022                 server_rotate(s);
1023                 server_vacuum(s, false);
1024
1025                 if (!s->system_journal) {
1026                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1027                         r = -EIO;
1028                         goto finish;
1029                 }
1030
1031                 log_debug("Retrying write.");
1032                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
1033                 if (r < 0) {
1034                         log_error_errno(r, "Can't write entry: %m");
1035                         goto finish;
1036                 }
1037         }
1038
1039         r = 0;
1040
1041 finish:
1042         journal_file_post_change(s->system_journal);
1043
1044         s->runtime_journal = journal_file_close(s->runtime_journal);
1045
1046         if (r >= 0)
1047                 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1048
1049         sd_journal_close(j);
1050
1051         server_driver_message(s, 0, NULL,
1052                               LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1053                                           format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1054                                           n),
1055                               NULL);
1056
1057         return r;
1058 }
1059
1060 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1061         Server *s = userdata;
1062         struct ucred *ucred = NULL;
1063         struct timeval *tv = NULL;
1064         struct cmsghdr *cmsg;
1065         char *label = NULL;
1066         size_t label_len = 0, m;
1067         struct iovec iovec;
1068         ssize_t n;
1069         int *fds = NULL, v = 0;
1070         size_t n_fds = 0;
1071
1072         union {
1073                 struct cmsghdr cmsghdr;
1074
1075                 /* We use NAME_MAX space for the SELinux label
1076                  * here. The kernel currently enforces no
1077                  * limit, but according to suggestions from
1078                  * the SELinux people this will change and it
1079                  * will probably be identical to NAME_MAX. For
1080                  * now we use that, but this should be updated
1081                  * one day when the final limit is known. */
1082                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1083                             CMSG_SPACE(sizeof(struct timeval)) +
1084                             CMSG_SPACE(sizeof(int)) + /* fd */
1085                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
1086         } control = {};
1087
1088         union sockaddr_union sa = {};
1089
1090         struct msghdr msghdr = {
1091                 .msg_iov = &iovec,
1092                 .msg_iovlen = 1,
1093                 .msg_control = &control,
1094                 .msg_controllen = sizeof(control),
1095                 .msg_name = &sa,
1096                 .msg_namelen = sizeof(sa),
1097         };
1098
1099         assert(s);
1100         assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1101
1102         if (revents != EPOLLIN) {
1103                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1104                 return -EIO;
1105         }
1106
1107         /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1108          * it.) */
1109         (void) ioctl(fd, SIOCINQ, &v);
1110
1111         /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1112         m = PAGE_ALIGN(MAX3((size_t) v + 1,
1113                             (size_t) LINE_MAX,
1114                             ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1115
1116         if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1117                 return log_oom();
1118
1119         iovec.iov_base = s->buffer;
1120         iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1121
1122         n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1123         if (n < 0) {
1124                 if (IN_SET(errno, EINTR, EAGAIN))
1125                         return 0;
1126
1127                 return log_error_errno(errno, "recvmsg() failed: %m");
1128         }
1129
1130         CMSG_FOREACH(cmsg, &msghdr) {
1131
1132                 if (cmsg->cmsg_level == SOL_SOCKET &&
1133                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1134                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1135                         ucred = (struct ucred*) CMSG_DATA(cmsg);
1136                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1137                          cmsg->cmsg_type == SCM_SECURITY) {
1138                         label = (char*) CMSG_DATA(cmsg);
1139                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
1140                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1141                            cmsg->cmsg_type == SO_TIMESTAMP &&
1142                            cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1143                         tv = (struct timeval*) CMSG_DATA(cmsg);
1144                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1145                          cmsg->cmsg_type == SCM_RIGHTS) {
1146                         fds = (int*) CMSG_DATA(cmsg);
1147                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1148                 }
1149         }
1150
1151         /* And a trailing NUL, just in case */
1152         s->buffer[n] = 0;
1153
1154         if (fd == s->syslog_fd) {
1155                 if (n > 0 && n_fds == 0)
1156                         server_process_syslog_message(s, s->buffer, n, ucred, tv, label, label_len);
1157                 else if (n_fds > 0)
1158                         log_warning("Got file descriptors via syslog socket. Ignoring.");
1159
1160         } else if (fd == s->native_fd) {
1161                 if (n > 0 && n_fds == 0)
1162                         server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1163                 else if (n == 0 && n_fds == 1)
1164                         server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1165                 else if (n_fds > 0)
1166                         log_warning("Got too many file descriptors via native socket. Ignoring.");
1167
1168         } else {
1169                 assert(fd == s->audit_fd);
1170
1171                 if (n > 0 && n_fds == 0)
1172                         server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1173                 else if (n_fds > 0)
1174                         log_warning("Got file descriptors via audit socket. Ignoring.");
1175         }
1176
1177         close_many(fds, n_fds);
1178         return 0;
1179 }
1180
1181 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1182         Server *s = userdata;
1183         int r;
1184
1185         assert(s);
1186
1187         log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1188
1189         (void) server_flush_to_var(s, false);
1190         server_sync(s);
1191         server_vacuum(s, false);
1192
1193         r = touch("/run/systemd/journal/flushed");
1194         if (r < 0)
1195                 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1196
1197         server_space_usage_message(s, NULL);
1198         return 0;
1199 }
1200
1201 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1202         Server *s = userdata;
1203         int r;
1204
1205         assert(s);
1206
1207         log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1208         server_rotate(s);
1209         server_vacuum(s, true);
1210
1211         if (s->system_journal)
1212                 patch_min_use(&s->system_storage);
1213         if (s->runtime_journal)
1214                 patch_min_use(&s->runtime_storage);
1215
1216         /* Let clients know when the most recent rotation happened. */
1217         r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1218         if (r < 0)
1219                 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1220
1221         return 0;
1222 }
1223
1224 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1225         Server *s = userdata;
1226
1227         assert(s);
1228
1229         log_received_signal(LOG_INFO, si);
1230
1231         sd_event_exit(s->event, 0);
1232         return 0;
1233 }
1234
1235 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1236         Server *s = userdata;
1237         int r;
1238
1239         assert(s);
1240
1241         log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1242
1243         server_sync(s);
1244
1245         /* Let clients know when the most recent sync happened. */
1246         r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1247         if (r < 0)
1248                 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1249
1250         return 0;
1251 }
1252
1253 static int setup_signals(Server *s) {
1254         int r;
1255
1256         assert(s);
1257
1258         assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1259
1260         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1261         if (r < 0)
1262                 return r;
1263
1264         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1265         if (r < 0)
1266                 return r;
1267
1268         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1269         if (r < 0)
1270                 return r;
1271
1272         /* Let's process SIGTERM late, so that we flush all queued
1273          * messages to disk before we exit */
1274         r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1275         if (r < 0)
1276                 return r;
1277
1278         /* When journald is invoked on the terminal (when debugging),
1279          * it's useful if C-c is handled equivalent to SIGTERM. */
1280         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1281         if (r < 0)
1282                 return r;
1283
1284         r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1285         if (r < 0)
1286                 return r;
1287
1288         /* SIGRTMIN+1 causes an immediate sync. We process this very
1289          * late, so that everything else queued at this point is
1290          * really written to disk. Clients can watch
1291          * /run/systemd/journal/synced with inotify until its mtime
1292          * changes to see when a sync happened. */
1293         r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1294         if (r < 0)
1295                 return r;
1296
1297         r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1298         if (r < 0)
1299                 return r;
1300
1301         return 0;
1302 }
1303
1304 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1305         Server *s = data;
1306         int r;
1307
1308         assert(s);
1309
1310         if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1311
1312                 r = value ? parse_boolean(value) : true;
1313                 if (r < 0)
1314                         log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1315                 else
1316                         s->forward_to_syslog = r;
1317
1318         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1319
1320                 r = value ? parse_boolean(value) : true;
1321                 if (r < 0)
1322                         log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1323                 else
1324                         s->forward_to_kmsg = r;
1325
1326         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1327
1328                 r = value ? parse_boolean(value) : true;
1329                 if (r < 0)
1330                         log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1331                 else
1332                         s->forward_to_console = r;
1333
1334         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1335
1336                 r = value ? parse_boolean(value) : true;
1337                 if (r < 0)
1338                         log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1339                 else
1340                         s->forward_to_wall = r;
1341
1342         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1343
1344                 if (proc_cmdline_value_missing(key, value))
1345                         return 0;
1346
1347                 r = log_level_from_string(value);
1348                 if (r < 0)
1349                         log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1350                 else
1351                         s->max_level_console = r;
1352
1353         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1354
1355                 if (proc_cmdline_value_missing(key, value))
1356                         return 0;
1357
1358                 r = log_level_from_string(value);
1359                 if (r < 0)
1360                         log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1361                 else
1362                         s->max_level_store = r;
1363
1364         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1365
1366                 if (proc_cmdline_value_missing(key, value))
1367                         return 0;
1368
1369                 r = log_level_from_string(value);
1370                 if (r < 0)
1371                         log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1372                 else
1373                         s->max_level_syslog = r;
1374
1375         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1376
1377                 if (proc_cmdline_value_missing(key, value))
1378                         return 0;
1379
1380                 r = log_level_from_string(value);
1381                 if (r < 0)
1382                         log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1383                 else
1384                         s->max_level_kmsg = r;
1385
1386         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1387
1388                 if (proc_cmdline_value_missing(key, value))
1389                         return 0;
1390
1391                 r = log_level_from_string(value);
1392                 if (r < 0)
1393                         log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1394                 else
1395                         s->max_level_wall = r;
1396
1397         } else if (startswith(key, "systemd.journald"))
1398                 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
1399
1400         /* do not warn about state here, since probably systemd already did */
1401         return 0;
1402 }
1403
1404 static int server_parse_config_file(Server *s) {
1405         assert(s);
1406
1407         return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
1408                                         CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1409                                         "Journal\0",
1410                                         config_item_perf_lookup, journald_gperf_lookup,
1411                                         CONFIG_PARSE_WARN, s);
1412 }
1413
1414 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1415         Server *s = userdata;
1416
1417         assert(s);
1418
1419         server_sync(s);
1420         return 0;
1421 }
1422
1423 int server_schedule_sync(Server *s, int priority) {
1424         int r;
1425
1426         assert(s);
1427
1428         if (priority <= LOG_CRIT) {
1429                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1430                 server_sync(s);
1431                 return 0;
1432         }
1433
1434         if (s->sync_scheduled)
1435                 return 0;
1436
1437         if (s->sync_interval_usec > 0) {
1438                 usec_t when;
1439
1440                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1441                 if (r < 0)
1442                         return r;
1443
1444                 when += s->sync_interval_usec;
1445
1446                 if (!s->sync_event_source) {
1447                         r = sd_event_add_time(
1448                                         s->event,
1449                                         &s->sync_event_source,
1450                                         CLOCK_MONOTONIC,
1451                                         when, 0,
1452                                         server_dispatch_sync, s);
1453                         if (r < 0)
1454                                 return r;
1455
1456                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1457                 } else {
1458                         r = sd_event_source_set_time(s->sync_event_source, when);
1459                         if (r < 0)
1460                                 return r;
1461
1462                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1463                 }
1464                 if (r < 0)
1465                         return r;
1466
1467                 s->sync_scheduled = true;
1468         }
1469
1470         return 0;
1471 }
1472
1473 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1474         Server *s = userdata;
1475
1476         assert(s);
1477
1478         server_cache_hostname(s);
1479         return 0;
1480 }
1481
1482 static int server_open_hostname(Server *s) {
1483         int r;
1484
1485         assert(s);
1486
1487         s->hostname_fd = open("/proc/sys/kernel/hostname",
1488                               O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
1489         if (s->hostname_fd < 0)
1490                 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1491
1492         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1493         if (r < 0) {
1494                 /* kernels prior to 3.2 don't support polling this file. Ignore
1495                  * the failure. */
1496                 if (r == -EPERM) {
1497                         log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1498                         s->hostname_fd = safe_close(s->hostname_fd);
1499                         return 0;
1500                 }
1501
1502                 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1503         }
1504
1505         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1506         if (r < 0)
1507                 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1508
1509         return 0;
1510 }
1511
1512 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1513         Server *s = userdata;
1514         int r;
1515
1516         assert(s);
1517         assert(s->notify_event_source == es);
1518         assert(s->notify_fd == fd);
1519
1520         /* The $NOTIFY_SOCKET is writable again, now send exactly one
1521          * message on it. Either it's the watchdog event, the initial
1522          * READY=1 event or an stdout stream event. If there's nothing
1523          * to write anymore, turn our event source off. The next time
1524          * there's something to send it will be turned on again. */
1525
1526         if (!s->sent_notify_ready) {
1527                 static const char p[] =
1528                         "READY=1\n"
1529                         "STATUS=Processing requests...";
1530                 ssize_t l;
1531
1532                 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1533                 if (l < 0) {
1534                         if (errno == EAGAIN)
1535                                 return 0;
1536
1537                         return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1538                 }
1539
1540                 s->sent_notify_ready = true;
1541                 log_debug("Sent READY=1 notification.");
1542
1543         } else if (s->send_watchdog) {
1544
1545                 static const char p[] =
1546                         "WATCHDOG=1";
1547
1548                 ssize_t l;
1549
1550                 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1551                 if (l < 0) {
1552                         if (errno == EAGAIN)
1553                                 return 0;
1554
1555                         return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1556                 }
1557
1558                 s->send_watchdog = false;
1559                 log_debug("Sent WATCHDOG=1 notification.");
1560
1561         } else if (s->stdout_streams_notify_queue)
1562                 /* Dispatch one stream notification event */
1563                 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1564
1565         /* Leave us enabled if there's still more to do. */
1566         if (s->send_watchdog || s->stdout_streams_notify_queue)
1567                 return 0;
1568
1569         /* There was nothing to do anymore, let's turn ourselves off. */
1570         r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1571         if (r < 0)
1572                 return log_error_errno(r, "Failed to turn off notify event source: %m");
1573
1574         return 0;
1575 }
1576
1577 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1578         Server *s = userdata;
1579         int r;
1580
1581         assert(s);
1582
1583         s->send_watchdog = true;
1584
1585         r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1586         if (r < 0)
1587                 log_warning_errno(r, "Failed to turn on notify event source: %m");
1588
1589         r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1590         if (r < 0)
1591                 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1592
1593         r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1594         if (r < 0)
1595                 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1596
1597         return 0;
1598 }
1599
1600 static int server_connect_notify(Server *s) {
1601         union sockaddr_union sa = {
1602                 .un.sun_family = AF_UNIX,
1603         };
1604         const char *e;
1605         int r;
1606
1607         assert(s);
1608         assert(s->notify_fd < 0);
1609         assert(!s->notify_event_source);
1610
1611         /*
1612           So here's the problem: we'd like to send notification
1613           messages to PID 1, but we cannot do that via sd_notify(),
1614           since that's synchronous, and we might end up blocking on
1615           it. Specifically: given that PID 1 might block on
1616           dbus-daemon during IPC, and dbus-daemon is logging to us,
1617           and might hence block on us, we might end up in a deadlock
1618           if we block on sending PID 1 notification messages — by
1619           generating a full blocking circle. To avoid this, let's
1620           create a non-blocking socket, and connect it to the
1621           notification socket, and then wait for POLLOUT before we
1622           send anything. This should efficiently avoid any deadlocks,
1623           as we'll never block on PID 1, hence PID 1 can safely block
1624           on dbus-daemon which can safely block on us again.
1625
1626           Don't think that this issue is real? It is, see:
1627           https://github.com/systemd/systemd/issues/1505
1628         */
1629
1630         e = getenv("NOTIFY_SOCKET");
1631         if (!e)
1632                 return 0;
1633
1634         if (!IN_SET(e[0], '@', '/') || e[1] == 0) {
1635                 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1636                 return -EINVAL;
1637         }
1638
1639         if (strlen(e) > sizeof(sa.un.sun_path)) {
1640                 log_error("NOTIFY_SOCKET path too long: %s", e);
1641                 return -EINVAL;
1642         }
1643
1644         s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1645         if (s->notify_fd < 0)
1646                 return log_error_errno(errno, "Failed to create notify socket: %m");
1647
1648         (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1649
1650         strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1651         if (sa.un.sun_path[0] == '@')
1652                 sa.un.sun_path[0] = 0;
1653
1654         r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
1655         if (r < 0)
1656                 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1657
1658         r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1659         if (r < 0)
1660                 return log_error_errno(r, "Failed to watch notification socket: %m");
1661
1662         if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1663                 s->send_watchdog = true;
1664
1665                 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1666                 if (r < 0)
1667                         return log_error_errno(r, "Failed to add watchdog time event: %m");
1668         }
1669
1670         /* This should fire pretty soon, which we'll use to send the
1671          * READY=1 event. */
1672
1673         return 0;
1674 }
1675
1676 int server_init(Server *s) {
1677         _cleanup_fdset_free_ FDSet *fds = NULL;
1678         int n, r, fd;
1679         bool no_sockets;
1680
1681         assert(s);
1682
1683         zero(*s);
1684         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1685         s->compress.enabled = true;
1686         s->compress.threshold_bytes = (uint64_t) -1;
1687         s->seal = true;
1688         s->read_kmsg = true;
1689
1690         s->watchdog_usec = USEC_INFINITY;
1691
1692         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1693         s->sync_scheduled = false;
1694
1695         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1696         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1697
1698         s->forward_to_wall = true;
1699
1700         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1701
1702         s->max_level_store = LOG_DEBUG;
1703         s->max_level_syslog = LOG_DEBUG;
1704         s->max_level_kmsg = LOG_NOTICE;
1705         s->max_level_console = LOG_INFO;
1706         s->max_level_wall = LOG_EMERG;
1707
1708         s->line_max = DEFAULT_LINE_MAX;
1709
1710         journal_reset_metrics(&s->system_storage.metrics);
1711         journal_reset_metrics(&s->runtime_storage.metrics);
1712
1713         server_parse_config_file(s);
1714
1715         r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1716         if (r < 0)
1717                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
1718
1719         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1720                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1721                           s->rate_limit_interval, s->rate_limit_burst);
1722                 s->rate_limit_interval = s->rate_limit_burst = 0;
1723         }
1724
1725         (void) mkdir_p("/run/systemd/journal", 0755);
1726
1727         s->user_journals = ordered_hashmap_new(NULL);
1728         if (!s->user_journals)
1729                 return log_oom();
1730
1731         s->mmap = mmap_cache_new();
1732         if (!s->mmap)
1733                 return log_oom();
1734
1735         s->deferred_closes = set_new(NULL);
1736         if (!s->deferred_closes)
1737                 return log_oom();
1738
1739         r = sd_event_default(&s->event);
1740         if (r < 0)
1741                 return log_error_errno(r, "Failed to create event loop: %m");
1742
1743         n = sd_listen_fds(true);
1744         if (n < 0)
1745                 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1746
1747         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1748
1749                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1750
1751                         if (s->native_fd >= 0) {
1752                                 log_error("Too many native sockets passed.");
1753                                 return -EINVAL;
1754                         }
1755
1756                         s->native_fd = fd;
1757
1758                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1759
1760                         if (s->stdout_fd >= 0) {
1761                                 log_error("Too many stdout sockets passed.");
1762                                 return -EINVAL;
1763                         }
1764
1765                         s->stdout_fd = fd;
1766
1767                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1768                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1769
1770                         if (s->syslog_fd >= 0) {
1771                                 log_error("Too many /dev/log sockets passed.");
1772                                 return -EINVAL;
1773                         }
1774
1775                         s->syslog_fd = fd;
1776
1777                 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1778
1779                         if (s->audit_fd >= 0) {
1780                                 log_error("Too many audit sockets passed.");
1781                                 return -EINVAL;
1782                         }
1783
1784                         s->audit_fd = fd;
1785
1786                 } else {
1787
1788                         if (!fds) {
1789                                 fds = fdset_new();
1790                                 if (!fds)
1791                                         return log_oom();
1792                         }
1793
1794                         r = fdset_put(fds, fd);
1795                         if (r < 0)
1796                                 return log_oom();
1797                 }
1798         }
1799
1800         /* Try to restore streams, but don't bother if this fails */
1801         (void) server_restore_streams(s, fds);
1802
1803         if (fdset_size(fds) > 0) {
1804                 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1805                 fds = fdset_free(fds);
1806         }
1807
1808         no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1809
1810         /* always open stdout, syslog, native, and kmsg sockets */
1811
1812         /* systemd-journald.socket: /run/systemd/journal/stdout */
1813         r = server_open_stdout_socket(s);
1814         if (r < 0)
1815                 return r;
1816
1817         /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1818         r = server_open_syslog_socket(s);
1819         if (r < 0)
1820                 return r;
1821
1822         /* systemd-journald.socket: /run/systemd/journal/socket */
1823         r = server_open_native_socket(s);
1824         if (r < 0)
1825                 return r;
1826
1827         /* /dev/kmsg */
1828         r = server_open_dev_kmsg(s);
1829         if (r < 0)
1830                 return r;
1831
1832         /* Unless we got *some* sockets and not audit, open audit socket */
1833         if (s->audit_fd >= 0 || no_sockets) {
1834                 r = server_open_audit(s);
1835                 if (r < 0)
1836                         return r;
1837         }
1838
1839         r = server_open_kernel_seqnum(s);
1840         if (r < 0)
1841                 return r;
1842
1843         r = server_open_hostname(s);
1844         if (r < 0)
1845                 return r;
1846
1847         r = setup_signals(s);
1848         if (r < 0)
1849                 return r;
1850
1851         s->udev = udev_new();
1852         if (!s->udev)
1853                 return -ENOMEM;
1854
1855         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1856         if (!s->rate_limit)
1857                 return -ENOMEM;
1858
1859         r = cg_get_root_path(&s->cgroup_root);
1860         if (r < 0)
1861                 return r;
1862
1863         server_cache_hostname(s);
1864         server_cache_boot_id(s);
1865         server_cache_machine_id(s);
1866
1867         s->runtime_storage.name = "Runtime journal";
1868         s->system_storage.name = "System journal";
1869
1870         s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
1871         s->system_storage.path  = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
1872         if (!s->runtime_storage.path || !s->system_storage.path)
1873                 return -ENOMEM;
1874
1875         (void) server_connect_notify(s);
1876
1877         (void) client_context_acquire_default(s);
1878
1879         return system_journal_open(s, false);
1880 }
1881
1882 void server_maybe_append_tags(Server *s) {
1883 #if HAVE_GCRYPT
1884         JournalFile *f;
1885         Iterator i;
1886         usec_t n;
1887
1888         n = now(CLOCK_REALTIME);
1889
1890         if (s->system_journal)
1891                 journal_file_maybe_append_tag(s->system_journal, n);
1892
1893         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1894                 journal_file_maybe_append_tag(f, n);
1895 #endif
1896 }
1897
1898 void server_done(Server *s) {
1899         assert(s);
1900
1901         set_free_with_destructor(s->deferred_closes, journal_file_close);
1902
1903         while (s->stdout_streams)
1904                 stdout_stream_free(s->stdout_streams);
1905
1906         client_context_flush_all(s);
1907
1908         if (s->system_journal)
1909                 (void) journal_file_close(s->system_journal);
1910
1911         if (s->runtime_journal)
1912                 (void) journal_file_close(s->runtime_journal);
1913
1914         ordered_hashmap_free_with_destructor(s->user_journals, journal_file_close);
1915
1916         sd_event_source_unref(s->syslog_event_source);
1917         sd_event_source_unref(s->native_event_source);
1918         sd_event_source_unref(s->stdout_event_source);
1919         sd_event_source_unref(s->dev_kmsg_event_source);
1920         sd_event_source_unref(s->audit_event_source);
1921         sd_event_source_unref(s->sync_event_source);
1922         sd_event_source_unref(s->sigusr1_event_source);
1923         sd_event_source_unref(s->sigusr2_event_source);
1924         sd_event_source_unref(s->sigterm_event_source);
1925         sd_event_source_unref(s->sigint_event_source);
1926         sd_event_source_unref(s->sigrtmin1_event_source);
1927         sd_event_source_unref(s->hostname_event_source);
1928         sd_event_source_unref(s->notify_event_source);
1929         sd_event_source_unref(s->watchdog_event_source);
1930         sd_event_unref(s->event);
1931
1932         safe_close(s->syslog_fd);
1933         safe_close(s->native_fd);
1934         safe_close(s->stdout_fd);
1935         safe_close(s->dev_kmsg_fd);
1936         safe_close(s->audit_fd);
1937         safe_close(s->hostname_fd);
1938         safe_close(s->notify_fd);
1939
1940         if (s->rate_limit)
1941                 journal_rate_limit_free(s->rate_limit);
1942
1943         if (s->kernel_seqnum)
1944                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1945
1946         free(s->buffer);
1947         free(s->tty_path);
1948         free(s->cgroup_root);
1949         free(s->hostname_field);
1950         free(s->runtime_storage.path);
1951         free(s->system_storage.path);
1952
1953         if (s->mmap)
1954                 mmap_cache_unref(s->mmap);
1955
1956         udev_unref(s->udev);
1957 }
1958
1959 static const char* const storage_table[_STORAGE_MAX] = {
1960         [STORAGE_AUTO] = "auto",
1961         [STORAGE_VOLATILE] = "volatile",
1962         [STORAGE_PERSISTENT] = "persistent",
1963         [STORAGE_NONE] = "none"
1964 };
1965
1966 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1967 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1968
1969 static const char* const split_mode_table[_SPLIT_MAX] = {
1970         [SPLIT_LOGIN] = "login",
1971         [SPLIT_UID] = "uid",
1972         [SPLIT_NONE] = "none",
1973 };
1974
1975 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1976 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
1977
1978 int config_parse_line_max(
1979                 const char* unit,
1980                 const char *filename,
1981                 unsigned line,
1982                 const char *section,
1983                 unsigned section_line,
1984                 const char *lvalue,
1985                 int ltype,
1986                 const char *rvalue,
1987                 void *data,
1988                 void *userdata) {
1989
1990         size_t *sz = data;
1991         int r;
1992
1993         assert(filename);
1994         assert(lvalue);
1995         assert(rvalue);
1996         assert(data);
1997
1998         if (isempty(rvalue))
1999                 /* Empty assignment means default */
2000                 *sz = DEFAULT_LINE_MAX;
2001         else {
2002                 uint64_t v;
2003
2004                 r = parse_size(rvalue, 1024, &v);
2005                 if (r < 0) {
2006                         log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
2007                         return 0;
2008                 }
2009
2010                 if (v < 79) {
2011                         /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2012                          * terminal size is 80ch, and it might make sense to break one character before the natural
2013                          * line break would occur on that. */
2014                         log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
2015                         *sz = 79;
2016                 } else if (v > (uint64_t) (SSIZE_MAX-1)) {
2017                         /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2018                          * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2019                          * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2020                          * fail much earlier anyway. */
2021                         log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
2022                         *sz = SSIZE_MAX-1;
2023                 } else
2024                         *sz = (size_t) v;
2025         }
2026
2027         return 0;
2028 }
2029
2030 int config_parse_compress(const char* unit,
2031                           const char *filename,
2032                           unsigned line,
2033                           const char *section,
2034                           unsigned section_line,
2035                           const char *lvalue,
2036                           int ltype,
2037                           const char *rvalue,
2038                           void *data,
2039                           void *userdata) {
2040         JournalCompressOptions* compress = data;
2041         int r;
2042
2043         if (streq(rvalue, "1")) {
2044                 log_syntax(unit, LOG_WARNING, filename, line, 0,
2045                            "Compress= ambiguously specified as 1, enabling compression with default threshold");
2046                 compress->enabled = true;
2047         } else if (streq(rvalue, "0")) {
2048                 log_syntax(unit, LOG_WARNING, filename, line, 0,
2049                            "Compress= ambiguously specified as 0, disabling compression");
2050                 compress->enabled = false;
2051         } else if ((r = parse_boolean(rvalue)) >= 0)
2052                 compress->enabled = r;
2053         else if (parse_size(rvalue, 1024, &compress->threshold_bytes) == 0)
2054                 compress->enabled = true;
2055         else if (isempty(rvalue)) {
2056                 compress->enabled = true;
2057                 compress->threshold_bytes = (uint64_t) -1;
2058         } else
2059                 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse Compress= value, ignoring: %s", rvalue);
2060
2061         return 0;
2062 }