src/journal/journald-server.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2011 Lennart Poettering
   6 ***/
   7
   8 #if HAVE_SELINUX
   9 #include <selinux/selinux.h>
  10 #endif
  11 #include <sys/ioctl.h>
  12 #include <sys/mman.h>
  13 #include <sys/signalfd.h>
  14 #include <sys/statvfs.h>
  15 #include <linux/sockios.h>
  16
  17 #include "libudev.h"
  18 #include "sd-daemon.h"
  19 #include "sd-journal.h"
  20 #include "sd-messages.h"
  21
  22 #include "acl-util.h"
  23 #include "alloc-util.h"
  24 #include "audit-util.h"
  25 #include "cgroup-util.h"
  26 #include "conf-parser.h"
  27 #include "dirent-util.h"
  28 #include "extract-word.h"
  29 #include "fd-util.h"
  30 #include "fileio.h"
  31 #include "format-util.h"
  32 #include "fs-util.h"
  33 #include "hashmap.h"
  34 #include "hostname-util.h"
  35 #include "id128-util.h"
  36 #include "io-util.h"
  37 #include "journal-authenticate.h"
  38 #include "journal-file.h"
  39 #include "journal-internal.h"
  40 #include "journal-vacuum.h"
  41 #include "journald-audit.h"
  42 #include "journald-context.h"
  43 #include "journald-kmsg.h"
  44 #include "journald-native.h"
  45 #include "journald-rate-limit.h"
  46 #include "journald-server.h"
  47 #include "journald-stream.h"
  48 #include "journald-syslog.h"
  49 #include "log.h"
  50 #include "missing.h"
  51 #include "mkdir.h"
  52 #include "parse-util.h"
  53 #include "proc-cmdline.h"
  54 #include "process-util.h"
  55 #include "rm-rf.h"
  56 #include "selinux-util.h"
  57 #include "signal-util.h"
  58 #include "socket-util.h"
  59 #include "stdio-util.h"
  60 #include "string-table.h"
  61 #include "string-util.h"
  62 #include "syslog-util.h"
  63 #include "user-util.h"
  64
  65 #define USER_JOURNALS_MAX 1024
  66
  67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
  68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
  69 #define DEFAULT_RATE_LIMIT_BURST 10000
  70 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
  71
  72 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
  73
  74 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
  75
  76 /* The period to insert between posting changes for coalescing */
  77 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
  78
  79 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
  80  * for a bit of additional metadata. */
  81 #define DEFAULT_LINE_MAX (48*1024)
  82
  83 static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
  84         _cleanup_closedir_ DIR *d = NULL;
  85         struct dirent *de;
  86         struct statvfs ss;
  87
  88         assert(ret_used);
  89         assert(ret_free);
  90
  91         d = opendir(path);
  92         if (!d)
  93                 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
  94                                       errno, "Failed to open %s: %m", path);
  95
  96         if (fstatvfs(dirfd(d), &ss) < 0)
  97                 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
  98
  99         *ret_free = ss.f_bsize * ss.f_bavail;
 100         *ret_used = 0;
 101         FOREACH_DIRENT_ALL(de, d, break) {
 102                 struct stat st;
 103
 104                 if (!endswith(de->d_name, ".journal") &&
 105                     !endswith(de->d_name, ".journal~"))
 106                         continue;
 107
 108                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
 109                         log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
 110                         continue;
 111                 }
 112
 113                 if (!S_ISREG(st.st_mode))
 114                         continue;
 115
 116                 *ret_used += (uint64_t) st.st_blocks * 512UL;
 117         }
 118
 119         return 0;
 120 }
 121
 122 static void cache_space_invalidate(JournalStorageSpace *space) {
 123         zero(*space);
 124 }
 125
 126 static int cache_space_refresh(Server *s, JournalStorage *storage) {
 127         JournalStorageSpace *space;
 128         JournalMetrics *metrics;
 129         uint64_t vfs_used, vfs_avail, avail;
 130         usec_t ts;
 131         int r;
 132
 133         assert(s);
 134
 135         metrics = &storage->metrics;
 136         space = &storage->space;
 137
 138         ts = now(CLOCK_MONOTONIC);
 139
 140         if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
 141                 return 0;
 142
 143         r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
 144         if (r < 0)
 145                 return r;
 146
 147         space->vfs_used = vfs_used;
 148         space->vfs_available = vfs_avail;
 149
 150         avail = LESS_BY(vfs_avail, metrics->keep_free);
 151
 152         space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
 153         space->available = LESS_BY(space->limit, vfs_used);
 154         space->timestamp = ts;
 155         return 1;
 156 }
 157
 158 static void patch_min_use(JournalStorage *storage) {
 159         assert(storage);
 160
 161         /* Let's bump the min_use limit to the current usage on disk. We do
 162          * this when starting up and first opening the journal files. This way
 163          * sudden spikes in disk usage will not cause journald to vacuum files
 164          * without bounds. Note that this means that only a restart of journald
 165          * will make it reset this value. */
 166
 167         storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
 168 }
 169
 170 static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
 171         JournalStorage *js;
 172         int r;
 173
 174         assert(s);
 175
 176         js = s->system_journal ? &s->system_storage : &s->runtime_storage;
 177
 178         r = cache_space_refresh(s, js);
 179         if (r >= 0) {
 180                 if (available)
 181                         *available = js->space.available;
 182                 if (limit)
 183                         *limit = js->space.limit;
 184         }
 185         return r;
 186 }
 187
 188 void server_space_usage_message(Server *s, JournalStorage *storage) {
 189         char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
 190              fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
 191         JournalMetrics *metrics;
 192
 193         assert(s);
 194
 195         if (!storage)
 196                 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
 197
 198         if (cache_space_refresh(s, storage) < 0)
 199                 return;
 200
 201         metrics = &storage->metrics;
 202         format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
 203         format_bytes(fb2, sizeof(fb2), metrics->max_use);
 204         format_bytes(fb3, sizeof(fb3), metrics->keep_free);
 205         format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
 206         format_bytes(fb5, sizeof(fb5), storage->space.limit);
 207         format_bytes(fb6, sizeof(fb6), storage->space.available);
 208
 209         server_driver_message(s, 0,
 210                               "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
 211                               LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
 212                                           storage->name, storage->path, fb1, fb5, fb6),
 213                               "JOURNAL_NAME=%s", storage->name,
 214                               "JOURNAL_PATH=%s", storage->path,
 215                               "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
 216                               "CURRENT_USE_PRETTY=%s", fb1,
 217                               "MAX_USE=%"PRIu64, metrics->max_use,
 218                               "MAX_USE_PRETTY=%s", fb2,
 219                               "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
 220                               "DISK_KEEP_FREE_PRETTY=%s", fb3,
 221                               "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
 222                               "DISK_AVAILABLE_PRETTY=%s", fb4,
 223                               "LIMIT=%"PRIu64, storage->space.limit,
 224                               "LIMIT_PRETTY=%s", fb5,
 225                               "AVAILABLE=%"PRIu64, storage->space.available,
 226                               "AVAILABLE_PRETTY=%s", fb6,
 227                               NULL);
 228 }
 229
 230 static bool uid_for_system_journal(uid_t uid) {
 231
 232         /* Returns true if the specified UID shall get its data stored in the system journal*/
 233
 234         return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY;
 235 }
 236
 237 static void server_add_acls(JournalFile *f, uid_t uid) {
 238 #if HAVE_ACL
 239         int r;
 240 #endif
 241         assert(f);
 242
 243 #if HAVE_ACL
 244         if (uid_for_system_journal(uid))
 245                 return;
 246
 247         r = add_acls_for_user(f->fd, uid);
 248         if (r < 0)
 249                 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
 250 #endif
 251 }
 252
 253 static int open_journal(
 254                 Server *s,
 255                 bool reliably,
 256                 const char *fname,
 257                 int flags,
 258                 bool seal,
 259                 JournalMetrics *metrics,
 260                 JournalFile **ret) {
 261         int r;
 262         JournalFile *f;
 263
 264         assert(s);
 265         assert(fname);
 266         assert(ret);
 267
 268         if (reliably)
 269                 r = journal_file_open_reliably(fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes,
 270                                                seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
 271         else
 272                 r = journal_file_open(-1, fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes, seal,
 273                                       metrics, s->mmap, s->deferred_closes, NULL, &f);
 274
 275         if (r < 0)
 276                 return r;
 277
 278         r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
 279         if (r < 0) {
 280                 (void) journal_file_close(f);
 281                 return r;
 282         }
 283
 284         *ret = f;
 285         return r;
 286 }
 287
 288 static bool flushed_flag_is_set(void) {
 289         return access("/run/systemd/journal/flushed", F_OK) >= 0;
 290 }
 291
 292 static int system_journal_open(Server *s, bool flush_requested) {
 293         const char *fn;
 294         int r = 0;
 295
 296         if (!s->system_journal &&
 297             IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
 298             (flush_requested || flushed_flag_is_set())) {
 299
 300                 /* If in auto mode: first try to create the machine
 301                  * path, but not the prefix.
 302                  *
 303                  * If in persistent mode: create /var/log/journal and
 304                  * the machine path */
 305
 306                 if (s->storage == STORAGE_PERSISTENT)
 307                         (void) mkdir_p("/var/log/journal/", 0755);
 308
 309                 (void) mkdir(s->system_storage.path, 0755);
 310
 311                 fn = strjoina(s->system_storage.path, "/system.journal");
 312                 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
 313                 if (r >= 0) {
 314                         server_add_acls(s->system_journal, 0);
 315                         (void) cache_space_refresh(s, &s->system_storage);
 316                         patch_min_use(&s->system_storage);
 317                 } else if (r < 0) {
 318                         if (!IN_SET(r, -ENOENT, -EROFS))
 319                                 log_warning_errno(r, "Failed to open system journal: %m");
 320
 321                         r = 0;
 322                 }
 323
 324                 /* If the runtime journal is open, and we're post-flush, we're
 325                  * recovering from a failed system journal rotate (ENOSPC)
 326                  * for which the runtime journal was reopened.
 327                  *
 328                  * Perform an implicit flush to var, leaving the runtime
 329                  * journal closed, now that the system journal is back.
 330                  */
 331                 if (!flush_requested)
 332                         (void) server_flush_to_var(s, true);
 333         }
 334
 335         if (!s->runtime_journal &&
 336             (s->storage != STORAGE_NONE)) {
 337
 338                 fn = strjoina(s->runtime_storage.path, "/system.journal");
 339
 340                 if (s->system_journal) {
 341
 342                         /* Try to open the runtime journal, but only
 343                          * if it already exists, so that we can flush
 344                          * it into the system journal */
 345
 346                         r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
 347                         if (r < 0) {
 348                                 if (r != -ENOENT)
 349                                         log_warning_errno(r, "Failed to open runtime journal: %m");
 350
 351                                 r = 0;
 352                         }
 353
 354                 } else {
 355
 356                         /* OK, we really need the runtime journal, so create
 357                          * it if necessary. */
 358
 359                         (void) mkdir("/run/log", 0755);
 360                         (void) mkdir("/run/log/journal", 0755);
 361                         (void) mkdir_parents(fn, 0750);
 362
 363                         r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
 364                         if (r < 0)
 365                                 return log_error_errno(r, "Failed to open runtime journal: %m");
 366                 }
 367
 368                 if (s->runtime_journal) {
 369                         server_add_acls(s->runtime_journal, 0);
 370                         (void) cache_space_refresh(s, &s->runtime_storage);
 371                         patch_min_use(&s->runtime_storage);
 372                 }
 373         }
 374
 375         return r;
 376 }
 377
 378 static JournalFile* find_journal(Server *s, uid_t uid) {
 379         _cleanup_free_ char *p = NULL;
 380         int r;
 381         JournalFile *f;
 382         sd_id128_t machine;
 383
 384         assert(s);
 385
 386         /* A rotate that fails to create the new journal (ENOSPC) leaves the
 387          * rotated journal as NULL.  Unless we revisit opening, even after
 388          * space is made available we'll continue to return NULL indefinitely.
 389          *
 390          * system_journal_open() is a noop if the journals are already open, so
 391          * we can just call it here to recover from failed rotates (or anything
 392          * else that's left the journals as NULL).
 393          *
 394          * Fixes https://github.com/systemd/systemd/issues/3968 */
 395         (void) system_journal_open(s, false);
 396
 397         /* We split up user logs only on /var, not on /run. If the
 398          * runtime file is open, we write to it exclusively, in order
 399          * to guarantee proper order as soon as we flush /run to
 400          * /var and close the runtime file. */
 401
 402         if (s->runtime_journal)
 403                 return s->runtime_journal;
 404
 405         if (uid_for_system_journal(uid))
 406                 return s->system_journal;
 407
 408         r = sd_id128_get_machine(&machine);
 409         if (r < 0)
 410                 return s->system_journal;
 411
 412         f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
 413         if (f)
 414                 return f;
 415
 416         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
 417                      SD_ID128_FORMAT_VAL(machine), uid) < 0)
 418                 return s->system_journal;
 419
 420         while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
 421                 /* Too many open? Then let's close one */
 422                 f = ordered_hashmap_steal_first(s->user_journals);
 423                 assert(f);
 424                 (void) journal_file_close(f);
 425         }
 426
 427         r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
 428         if (r < 0)
 429                 return s->system_journal;
 430
 431         server_add_acls(f, uid);
 432
 433         r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
 434         if (r < 0) {
 435                 (void) journal_file_close(f);
 436                 return s->system_journal;
 437         }
 438
 439         return f;
 440 }
 441
 442 static int do_rotate(
 443                 Server *s,
 444                 JournalFile **f,
 445                 const char* name,
 446                 bool seal,
 447                 uint32_t uid) {
 448
 449         int r;
 450         assert(s);
 451
 452         if (!*f)
 453                 return -EINVAL;
 454
 455         r = journal_file_rotate(f, s->compress.enabled, s->compress.threshold_bytes, seal, s->deferred_closes);
 456         if (r < 0) {
 457                 if (*f)
 458                         return log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
 459                 else
 460                         return log_error_errno(r, "Failed to create new %s journal: %m", name);
 461         }
 462
 463         server_add_acls(*f, uid);
 464
 465         return r;
 466 }
 467
 468 void server_rotate(Server *s) {
 469         JournalFile *f;
 470         void *k;
 471         Iterator i;
 472         int r;
 473
 474         log_debug("Rotating...");
 475
 476         (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
 477         (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
 478
 479         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
 480                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
 481                 if (r >= 0)
 482                         ordered_hashmap_replace(s->user_journals, k, f);
 483                 else if (!f)
 484                         /* Old file has been closed and deallocated */
 485                         ordered_hashmap_remove(s->user_journals, k);
 486         }
 487
 488         /* Perform any deferred closes which aren't still offlining. */
 489         SET_FOREACH(f, s->deferred_closes, i)
 490                 if (!journal_file_is_offlining(f)) {
 491                         (void) set_remove(s->deferred_closes, f);
 492                         (void) journal_file_close(f);
 493                 }
 494 }
 495
 496 void server_sync(Server *s) {
 497         JournalFile *f;
 498         Iterator i;
 499         int r;
 500
 501         if (s->system_journal) {
 502                 r = journal_file_set_offline(s->system_journal, false);
 503                 if (r < 0)
 504                         log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
 505         }
 506
 507         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
 508                 r = journal_file_set_offline(f, false);
 509                 if (r < 0)
 510                         log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
 511         }
 512
 513         if (s->sync_event_source) {
 514                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
 515                 if (r < 0)
 516                         log_error_errno(r, "Failed to disable sync timer source: %m");
 517         }
 518
 519         s->sync_scheduled = false;
 520 }
 521
 522 static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
 523
 524         int r;
 525
 526         assert(s);
 527         assert(storage);
 528
 529         (void) cache_space_refresh(s, storage);
 530
 531         if (verbose)
 532                 server_space_usage_message(s, storage);
 533
 534         r = journal_directory_vacuum(storage->path, storage->space.limit,
 535                                      storage->metrics.n_max_files, s->max_retention_usec,
 536                                      &s->oldest_file_usec, verbose);
 537         if (r < 0 && r != -ENOENT)
 538                 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
 539
 540         cache_space_invalidate(&storage->space);
 541 }
 542
 543 int server_vacuum(Server *s, bool verbose) {
 544         assert(s);
 545
 546         log_debug("Vacuuming...");
 547
 548         s->oldest_file_usec = 0;
 549
 550         if (s->system_journal)
 551                 do_vacuum(s, &s->system_storage, verbose);
 552         if (s->runtime_journal)
 553                 do_vacuum(s, &s->runtime_storage, verbose);
 554
 555         return 0;
 556 }
 557
 558 static void server_cache_machine_id(Server *s) {
 559         sd_id128_t id;
 560         int r;
 561
 562         assert(s);
 563
 564         r = sd_id128_get_machine(&id);
 565         if (r < 0)
 566                 return;
 567
 568         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
 569 }
 570
 571 static void server_cache_boot_id(Server *s) {
 572         sd_id128_t id;
 573         int r;
 574
 575         assert(s);
 576
 577         r = sd_id128_get_boot(&id);
 578         if (r < 0)
 579                 return;
 580
 581         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
 582 }
 583
 584 static void server_cache_hostname(Server *s) {
 585         _cleanup_free_ char *t = NULL;
 586         char *x;
 587
 588         assert(s);
 589
 590         t = gethostname_malloc();
 591         if (!t)
 592                 return;
 593
 594         x = strappend("_HOSTNAME=", t);
 595         if (!x)
 596                 return;
 597
 598         free(s->hostname_field);
 599         s->hostname_field = x;
 600 }
 601
 602 static bool shall_try_append_again(JournalFile *f, int r) {
 603         switch(r) {
 604
 605         case -E2BIG:           /* Hit configured limit          */
 606         case -EFBIG:           /* Hit fs limit                  */
 607         case -EDQUOT:          /* Quota limit hit               */
 608         case -ENOSPC:          /* Disk full                     */
 609                 log_debug("%s: Allocation limit reached, rotating.", f->path);
 610                 return true;
 611
 612         case -EIO:             /* I/O error of some kind (mmap) */
 613                 log_warning("%s: IO error, rotating.", f->path);
 614                 return true;
 615
 616         case -EHOSTDOWN:       /* Other machine                 */
 617                 log_info("%s: Journal file from other machine, rotating.", f->path);
 618                 return true;
 619
 620         case -EBUSY:           /* Unclean shutdown              */
 621                 log_info("%s: Unclean shutdown, rotating.", f->path);
 622                 return true;
 623
 624         case -EPROTONOSUPPORT: /* Unsupported feature           */
 625                 log_info("%s: Unsupported feature, rotating.", f->path);
 626                 return true;
 627
 628         case -EBADMSG:         /* Corrupted                     */
 629         case -ENODATA:         /* Truncated                     */
 630         case -ESHUTDOWN:       /* Already archived              */
 631                 log_warning("%s: Journal file corrupted, rotating.", f->path);
 632                 return true;
 633
 634         case -EIDRM:           /* Journal file has been deleted */
 635                 log_warning("%s: Journal file has been deleted, rotating.", f->path);
 636                 return true;
 637
 638         case -ETXTBSY:         /* Journal file is from the future */
 639                 log_warning("%s: Journal file is from the future, rotating.", f->path);
 640                 return true;
 641
 642         default:
 643                 return false;
 644         }
 645 }
 646
 647 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n, int priority) {
 648         bool vacuumed = false, rotate = false;
 649         struct dual_timestamp ts;
 650         JournalFile *f;
 651         int r;
 652
 653         assert(s);
 654         assert(iovec);
 655         assert(n > 0);
 656
 657         /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
 658          * the source time, and not even the time the event was originally seen, but instead simply the time we started
 659          * processing it, as we want strictly linear ordering in what we write out.) */
 660         assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
 661         assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
 662
 663         if (ts.realtime < s->last_realtime_clock) {
 664                 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
 665                  * regular operation. However, when it does happen, then we should make sure that we start fresh files
 666                  * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
 667                  * bisection works correctly. */
 668
 669                 log_debug("Time jumped backwards, rotating.");
 670                 rotate = true;
 671         } else {
 672
 673                 f = find_journal(s, uid);
 674                 if (!f)
 675                         return;
 676
 677                 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
 678                         log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
 679                         rotate = true;
 680                 }
 681         }
 682
 683         if (rotate) {
 684                 server_rotate(s);
 685                 server_vacuum(s, false);
 686                 vacuumed = true;
 687
 688                 f = find_journal(s, uid);
 689                 if (!f)
 690                         return;
 691         }
 692
 693         s->last_realtime_clock = ts.realtime;
 694
 695         r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
 696         if (r >= 0) {
 697                 server_schedule_sync(s, priority);
 698                 return;
 699         }
 700
 701         if (vacuumed || !shall_try_append_again(f, r)) {
 702                 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
 703                 return;
 704         }
 705
 706         server_rotate(s);
 707         server_vacuum(s, false);
 708
 709         f = find_journal(s, uid);
 710         if (!f)
 711                 return;
 712
 713         log_debug("Retrying write.");
 714         r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
 715         if (r < 0)
 716                 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
 717         else
 718                 server_schedule_sync(s, priority);
 719 }
 720
 721 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field)  \
 722         if (isset(value)) {                                             \
 723                 char *k;                                                \
 724                 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
 725                 sprintf(k, field "=" format, value);                    \
 726                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 727         }
 728
 729 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field)                  \
 730         if (!isempty(value)) {                                          \
 731                 char *k;                                                \
 732                 k = strjoina(field "=", value);                         \
 733                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 734         }
 735
 736 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field)                   \
 737         if (!sd_id128_is_null(value)) {                                 \
 738                 char *k;                                                \
 739                 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
 740                 sd_id128_to_string(value, stpcpy(k, field "="));        \
 741                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 742         }
 743
 744 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field)       \
 745         if (value_size > 0) {                                           \
 746                 char *k;                                                \
 747                 k = newa(char, STRLEN(field "=") + value_size + 1);     \
 748                 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
 749                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 750         }                                                               \
 751
 752 static void dispatch_message_real(
 753                 Server *s,
 754                 struct iovec *iovec, size_t n, size_t m,
 755                 const ClientContext *c,
 756                 const struct timeval *tv,
 757                 int priority,
 758                 pid_t object_pid) {
 759
 760         char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
 761         uid_t journal_uid;
 762         ClientContext *o;
 763
 764         assert(s);
 765         assert(iovec);
 766         assert(n > 0);
 767         assert(n +
 768                N_IOVEC_META_FIELDS +
 769                (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) +
 770                client_context_extra_fields_n_iovec(c) <= m);
 771
 772         if (c) {
 773                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
 774                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
 775                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
 776
 777                 IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM");
 778                 IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE");
 779                 IOVEC_ADD_STRING_FIELD(iovec, n, c->cmdline, "_CMDLINE");
 780                 IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE");
 781
 782                 IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
 783
 784                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
 785                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
 786
 787                 IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP");
 788                 IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
 789                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
 790                 IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT");
 791                 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
 792                 IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
 793                 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
 794
 795                 IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
 796
 797                 if (c->extra_fields_n_iovec > 0) {
 798                         memcpy(iovec + n, c->extra_fields_iovec, c->extra_fields_n_iovec * sizeof(struct iovec));
 799                         n += c->extra_fields_n_iovec;
 800                 }
 801         }
 802
 803         assert(n <= m);
 804
 805         if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
 806
 807                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
 808                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
 809                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
 810
 811                 IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
 812                 IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
 813                 IOVEC_ADD_STRING_FIELD(iovec, n, o->cmdline, "OBJECT_CMDLINE");
 814                 IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
 815
 816                 IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
 817
 818                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
 819                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
 820
 821                 IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
 822                 IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
 823                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
 824                 IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
 825                 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
 826                 IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
 827                 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
 828
 829                 IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
 830         }
 831
 832         assert(n <= m);
 833
 834         if (tv) {
 835                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
 836                 iovec[n++] = IOVEC_MAKE_STRING(source_time);
 837         }
 838
 839         /* Note that strictly speaking storing the boot id here is
 840          * redundant since the entry includes this in-line
 841          * anyway. However, we need this indexed, too. */
 842         if (!isempty(s->boot_id_field))
 843                 iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
 844
 845         if (!isempty(s->machine_id_field))
 846                 iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
 847
 848         if (!isempty(s->hostname_field))
 849                 iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
 850
 851         assert(n <= m);
 852
 853         if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
 854                 /* Split up strictly by (non-root) UID */
 855                 journal_uid = c->uid;
 856         else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
 857                 /* Split up by login UIDs.  We do this only if the
 858                  * realuid is not root, in order not to accidentally
 859                  * leak privileged information to the user that is
 860                  * logged by a privileged process that is part of an
 861                  * unprivileged session. */
 862                 journal_uid = c->owner_uid;
 863         else
 864                 journal_uid = 0;
 865
 866         write_to_journal(s, journal_uid, iovec, n, priority);
 867 }
 868
 869 void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) {
 870
 871         struct iovec *iovec;
 872         size_t n = 0, k, m;
 873         va_list ap;
 874         int r;
 875
 876         assert(s);
 877         assert(format);
 878
 879         m = N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS + client_context_extra_fields_n_iovec(s->my_context) + N_IOVEC_OBJECT_FIELDS;
 880         iovec = newa(struct iovec, m);
 881
 882         assert_cc(3 == LOG_FAC(LOG_DAEMON));
 883         iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
 884         iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
 885
 886         iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
 887         assert_cc(6 == LOG_INFO);
 888         iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
 889
 890         if (message_id)
 891                 iovec[n++] = IOVEC_MAKE_STRING(message_id);
 892         k = n;
 893
 894         va_start(ap, format);
 895         r = log_format_iovec(iovec, m, &n, false, 0, format, ap);
 896         /* Error handling below */
 897         va_end(ap);
 898
 899         if (r >= 0)
 900                 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
 901
 902         while (k < n)
 903                 free(iovec[k++].iov_base);
 904
 905         if (r < 0) {
 906                 /* We failed to format the message. Emit a warning instead. */
 907                 char buf[LINE_MAX];
 908
 909                 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
 910
 911                 n = 3;
 912                 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
 913                 iovec[n++] = IOVEC_MAKE_STRING(buf);
 914                 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
 915         }
 916 }
 917
 918 void server_dispatch_message(
 919                 Server *s,
 920                 struct iovec *iovec, size_t n, size_t m,
 921                 ClientContext *c,
 922                 const struct timeval *tv,
 923                 int priority,
 924                 pid_t object_pid) {
 925
 926         uint64_t available = 0;
 927         int rl;
 928
 929         assert(s);
 930         assert(iovec || n == 0);
 931
 932         if (n == 0)
 933                 return;
 934
 935         if (LOG_PRI(priority) > s->max_level_store)
 936                 return;
 937
 938         /* Stop early in case the information will not be stored
 939          * in a journal. */
 940         if (s->storage == STORAGE_NONE)
 941                 return;
 942
 943         if (c && c->unit) {
 944                 (void) determine_space(s, &available, NULL);
 945
 946                 rl = journal_rate_limit_test(s->rate_limit, c->unit, priority & LOG_PRIMASK, available);
 947                 if (rl == 0)
 948                         return;
 949
 950                 /* Write a suppression message if we suppressed something */
 951                 if (rl > 1)
 952                         server_driver_message(s, c->pid,
 953                                               "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
 954                                               LOG_MESSAGE("Suppressed %i messages from %s", rl - 1, c->unit),
 955                                               "N_DROPPED=%i", rl - 1,
 956                                               NULL);
 957         }
 958
 959         dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
 960 }
 961
 962 int server_flush_to_var(Server *s, bool require_flag_file) {
 963         sd_id128_t machine;
 964         sd_journal *j = NULL;
 965         char ts[FORMAT_TIMESPAN_MAX];
 966         usec_t start;
 967         unsigned n = 0;
 968         int r;
 969
 970         assert(s);
 971
 972         if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
 973                 return 0;
 974
 975         if (!s->runtime_journal)
 976                 return 0;
 977
 978         if (require_flag_file && !flushed_flag_is_set())
 979                 return 0;
 980
 981         (void) system_journal_open(s, true);
 982
 983         if (!s->system_journal)
 984                 return 0;
 985
 986         log_debug("Flushing to /var...");
 987
 988         start = now(CLOCK_MONOTONIC);
 989
 990         r = sd_id128_get_machine(&machine);
 991         if (r < 0)
 992                 return r;
 993
 994         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
 995         if (r < 0)
 996                 return log_error_errno(r, "Failed to read runtime journal: %m");
 997
 998         sd_journal_set_data_threshold(j, 0);
 999
1000         SD_JOURNAL_FOREACH(j) {
1001                 Object *o = NULL;
1002                 JournalFile *f;
1003
1004                 f = j->current_file;
1005                 assert(f && f->current_offset > 0);
1006
1007                 n++;
1008
1009                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1010                 if (r < 0) {
1011                         log_error_errno(r, "Can't read entry: %m");
1012                         goto finish;
1013                 }
1014
1015                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
1016                 if (r >= 0)
1017                         continue;
1018
1019                 if (!shall_try_append_again(s->system_journal, r)) {
1020                         log_error_errno(r, "Can't write entry: %m");
1021                         goto finish;
1022                 }
1023
1024                 server_rotate(s);
1025                 server_vacuum(s, false);
1026
1027                 if (!s->system_journal) {
1028                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1029                         r = -EIO;
1030                         goto finish;
1031                 }
1032
1033                 log_debug("Retrying write.");
1034                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
1035                 if (r < 0) {
1036                         log_error_errno(r, "Can't write entry: %m");
1037                         goto finish;
1038                 }
1039         }
1040
1041         r = 0;
1042
1043 finish:
1044         journal_file_post_change(s->system_journal);
1045
1046         s->runtime_journal = journal_file_close(s->runtime_journal);
1047
1048         if (r >= 0)
1049                 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1050
1051         sd_journal_close(j);
1052
1053         server_driver_message(s, 0, NULL,
1054                               LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1055                                           format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1056                                           n),
1057                               NULL);
1058
1059         return r;
1060 }
1061
1062 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1063         Server *s = userdata;
1064         struct ucred *ucred = NULL;
1065         struct timeval *tv = NULL;
1066         struct cmsghdr *cmsg;
1067         char *label = NULL;
1068         size_t label_len = 0, m;
1069         struct iovec iovec;
1070         ssize_t n;
1071         int *fds = NULL, v = 0;
1072         size_t n_fds = 0;
1073
1074         union {
1075                 struct cmsghdr cmsghdr;
1076
1077                 /* We use NAME_MAX space for the SELinux label
1078                  * here. The kernel currently enforces no
1079                  * limit, but according to suggestions from
1080                  * the SELinux people this will change and it
1081                  * will probably be identical to NAME_MAX. For
1082                  * now we use that, but this should be updated
1083                  * one day when the final limit is known. */
1084                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1085                             CMSG_SPACE(sizeof(struct timeval)) +
1086                             CMSG_SPACE(sizeof(int)) + /* fd */
1087                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
1088         } control = {};
1089
1090         union sockaddr_union sa = {};
1091
1092         struct msghdr msghdr = {
1093                 .msg_iov = &iovec,
1094                 .msg_iovlen = 1,
1095                 .msg_control = &control,
1096                 .msg_controllen = sizeof(control),
1097                 .msg_name = &sa,
1098                 .msg_namelen = sizeof(sa),
1099         };
1100
1101         assert(s);
1102         assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1103
1104         if (revents != EPOLLIN) {
1105                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1106                 return -EIO;
1107         }
1108
1109         /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1110          * it.) */
1111         (void) ioctl(fd, SIOCINQ, &v);
1112
1113         /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1114         m = PAGE_ALIGN(MAX3((size_t) v + 1,
1115                             (size_t) LINE_MAX,
1116                             ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1117
1118         if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1119                 return log_oom();
1120
1121         iovec.iov_base = s->buffer;
1122         iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1123
1124         n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1125         if (n < 0) {
1126                 if (IN_SET(errno, EINTR, EAGAIN))
1127                         return 0;
1128
1129                 return log_error_errno(errno, "recvmsg() failed: %m");
1130         }
1131
1132         CMSG_FOREACH(cmsg, &msghdr) {
1133
1134                 if (cmsg->cmsg_level == SOL_SOCKET &&
1135                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1136                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1137                         ucred = (struct ucred*) CMSG_DATA(cmsg);
1138                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1139                          cmsg->cmsg_type == SCM_SECURITY) {
1140                         label = (char*) CMSG_DATA(cmsg);
1141                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
1142                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1143                            cmsg->cmsg_type == SO_TIMESTAMP &&
1144                            cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1145                         tv = (struct timeval*) CMSG_DATA(cmsg);
1146                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1147                          cmsg->cmsg_type == SCM_RIGHTS) {
1148                         fds = (int*) CMSG_DATA(cmsg);
1149                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1150                 }
1151         }
1152
1153         /* And a trailing NUL, just in case */
1154         s->buffer[n] = 0;
1155
1156         if (fd == s->syslog_fd) {
1157                 if (n > 0 && n_fds == 0)
1158                         server_process_syslog_message(s, s->buffer, n, ucred, tv, label, label_len);
1159                 else if (n_fds > 0)
1160                         log_warning("Got file descriptors via syslog socket. Ignoring.");
1161
1162         } else if (fd == s->native_fd) {
1163                 if (n > 0 && n_fds == 0)
1164                         server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1165                 else if (n == 0 && n_fds == 1)
1166                         server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1167                 else if (n_fds > 0)
1168                         log_warning("Got too many file descriptors via native socket. Ignoring.");
1169
1170         } else {
1171                 assert(fd == s->audit_fd);
1172
1173                 if (n > 0 && n_fds == 0)
1174                         server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1175                 else if (n_fds > 0)
1176                         log_warning("Got file descriptors via audit socket. Ignoring.");
1177         }
1178
1179         close_many(fds, n_fds);
1180         return 0;
1181 }
1182
1183 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1184         Server *s = userdata;
1185         int r;
1186
1187         assert(s);
1188
1189         log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1190
1191         (void) server_flush_to_var(s, false);
1192         server_sync(s);
1193         server_vacuum(s, false);
1194
1195         r = touch("/run/systemd/journal/flushed");
1196         if (r < 0)
1197                 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1198
1199         server_space_usage_message(s, NULL);
1200         return 0;
1201 }
1202
1203 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1204         Server *s = userdata;
1205         int r;
1206
1207         assert(s);
1208
1209         log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1210         server_rotate(s);
1211         server_vacuum(s, true);
1212
1213         if (s->system_journal)
1214                 patch_min_use(&s->system_storage);
1215         if (s->runtime_journal)
1216                 patch_min_use(&s->runtime_storage);
1217
1218         /* Let clients know when the most recent rotation happened. */
1219         r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1220         if (r < 0)
1221                 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1222
1223         return 0;
1224 }
1225
1226 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1227         Server *s = userdata;
1228
1229         assert(s);
1230
1231         log_received_signal(LOG_INFO, si);
1232
1233         sd_event_exit(s->event, 0);
1234         return 0;
1235 }
1236
1237 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1238         Server *s = userdata;
1239         int r;
1240
1241         assert(s);
1242
1243         log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1244
1245         server_sync(s);
1246
1247         /* Let clients know when the most recent sync happened. */
1248         r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1249         if (r < 0)
1250                 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1251
1252         return 0;
1253 }
1254
1255 static int setup_signals(Server *s) {
1256         int r;
1257
1258         assert(s);
1259
1260         assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1261
1262         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1263         if (r < 0)
1264                 return r;
1265
1266         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1267         if (r < 0)
1268                 return r;
1269
1270         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1271         if (r < 0)
1272                 return r;
1273
1274         /* Let's process SIGTERM late, so that we flush all queued
1275          * messages to disk before we exit */
1276         r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1277         if (r < 0)
1278                 return r;
1279
1280         /* When journald is invoked on the terminal (when debugging),
1281          * it's useful if C-c is handled equivalent to SIGTERM. */
1282         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1283         if (r < 0)
1284                 return r;
1285
1286         r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1287         if (r < 0)
1288                 return r;
1289
1290         /* SIGRTMIN+1 causes an immediate sync. We process this very
1291          * late, so that everything else queued at this point is
1292          * really written to disk. Clients can watch
1293          * /run/systemd/journal/synced with inotify until its mtime
1294          * changes to see when a sync happened. */
1295         r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1296         if (r < 0)
1297                 return r;
1298
1299         r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1300         if (r < 0)
1301                 return r;
1302
1303         return 0;
1304 }
1305
1306 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1307         Server *s = data;
1308         int r;
1309
1310         assert(s);
1311
1312         if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1313
1314                 r = value ? parse_boolean(value) : true;
1315                 if (r < 0)
1316                         log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1317                 else
1318                         s->forward_to_syslog = r;
1319
1320         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1321
1322                 r = value ? parse_boolean(value) : true;
1323                 if (r < 0)
1324                         log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1325                 else
1326                         s->forward_to_kmsg = r;
1327
1328         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1329
1330                 r = value ? parse_boolean(value) : true;
1331                 if (r < 0)
1332                         log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1333                 else
1334                         s->forward_to_console = r;
1335
1336         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1337
1338                 r = value ? parse_boolean(value) : true;
1339                 if (r < 0)
1340                         log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1341                 else
1342                         s->forward_to_wall = r;
1343
1344         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1345
1346                 if (proc_cmdline_value_missing(key, value))
1347                         return 0;
1348
1349                 r = log_level_from_string(value);
1350                 if (r < 0)
1351                         log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1352                 else
1353                         s->max_level_console = r;
1354
1355         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1356
1357                 if (proc_cmdline_value_missing(key, value))
1358                         return 0;
1359
1360                 r = log_level_from_string(value);
1361                 if (r < 0)
1362                         log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1363                 else
1364                         s->max_level_store = r;
1365
1366         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1367
1368                 if (proc_cmdline_value_missing(key, value))
1369                         return 0;
1370
1371                 r = log_level_from_string(value);
1372                 if (r < 0)
1373                         log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1374                 else
1375                         s->max_level_syslog = r;
1376
1377         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1378
1379                 if (proc_cmdline_value_missing(key, value))
1380                         return 0;
1381
1382                 r = log_level_from_string(value);
1383                 if (r < 0)
1384                         log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1385                 else
1386                         s->max_level_kmsg = r;
1387
1388         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1389
1390                 if (proc_cmdline_value_missing(key, value))
1391                         return 0;
1392
1393                 r = log_level_from_string(value);
1394                 if (r < 0)
1395                         log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1396                 else
1397                         s->max_level_wall = r;
1398
1399         } else if (startswith(key, "systemd.journald"))
1400                 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
1401
1402         /* do not warn about state here, since probably systemd already did */
1403         return 0;
1404 }
1405
1406 static int server_parse_config_file(Server *s) {
1407         assert(s);
1408
1409         return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
1410                                         CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1411                                         "Journal\0",
1412                                         config_item_perf_lookup, journald_gperf_lookup,
1413                                         CONFIG_PARSE_WARN, s);
1414 }
1415
1416 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1417         Server *s = userdata;
1418
1419         assert(s);
1420
1421         server_sync(s);
1422         return 0;
1423 }
1424
1425 int server_schedule_sync(Server *s, int priority) {
1426         int r;
1427
1428         assert(s);
1429
1430         if (priority <= LOG_CRIT) {
1431                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1432                 server_sync(s);
1433                 return 0;
1434         }
1435
1436         if (s->sync_scheduled)
1437                 return 0;
1438
1439         if (s->sync_interval_usec > 0) {
1440                 usec_t when;
1441
1442                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1443                 if (r < 0)
1444                         return r;
1445
1446                 when += s->sync_interval_usec;
1447
1448                 if (!s->sync_event_source) {
1449                         r = sd_event_add_time(
1450                                         s->event,
1451                                         &s->sync_event_source,
1452                                         CLOCK_MONOTONIC,
1453                                         when, 0,
1454                                         server_dispatch_sync, s);
1455                         if (r < 0)
1456                                 return r;
1457
1458                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1459                 } else {
1460                         r = sd_event_source_set_time(s->sync_event_source, when);
1461                         if (r < 0)
1462                                 return r;
1463
1464                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1465                 }
1466                 if (r < 0)
1467                         return r;
1468
1469                 s->sync_scheduled = true;
1470         }
1471
1472         return 0;
1473 }
1474
1475 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1476         Server *s = userdata;
1477
1478         assert(s);
1479
1480         server_cache_hostname(s);
1481         return 0;
1482 }
1483
1484 static int server_open_hostname(Server *s) {
1485         int r;
1486
1487         assert(s);
1488
1489         s->hostname_fd = open("/proc/sys/kernel/hostname",
1490                               O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
1491         if (s->hostname_fd < 0)
1492                 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1493
1494         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1495         if (r < 0) {
1496                 /* kernels prior to 3.2 don't support polling this file. Ignore
1497                  * the failure. */
1498                 if (r == -EPERM) {
1499                         log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1500                         s->hostname_fd = safe_close(s->hostname_fd);
1501                         return 0;
1502                 }
1503
1504                 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1505         }
1506
1507         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1508         if (r < 0)
1509                 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1510
1511         return 0;
1512 }
1513
1514 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1515         Server *s = userdata;
1516         int r;
1517
1518         assert(s);
1519         assert(s->notify_event_source == es);
1520         assert(s->notify_fd == fd);
1521
1522         /* The $NOTIFY_SOCKET is writable again, now send exactly one
1523          * message on it. Either it's the watchdog event, the initial
1524          * READY=1 event or an stdout stream event. If there's nothing
1525          * to write anymore, turn our event source off. The next time
1526          * there's something to send it will be turned on again. */
1527
1528         if (!s->sent_notify_ready) {
1529                 static const char p[] =
1530                         "READY=1\n"
1531                         "STATUS=Processing requests...";
1532                 ssize_t l;
1533
1534                 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1535                 if (l < 0) {
1536                         if (errno == EAGAIN)
1537                                 return 0;
1538
1539                         return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1540                 }
1541
1542                 s->sent_notify_ready = true;
1543                 log_debug("Sent READY=1 notification.");
1544
1545         } else if (s->send_watchdog) {
1546
1547                 static const char p[] =
1548                         "WATCHDOG=1";
1549
1550                 ssize_t l;
1551
1552                 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1553                 if (l < 0) {
1554                         if (errno == EAGAIN)
1555                                 return 0;
1556
1557                         return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1558                 }
1559
1560                 s->send_watchdog = false;
1561                 log_debug("Sent WATCHDOG=1 notification.");
1562
1563         } else if (s->stdout_streams_notify_queue)
1564                 /* Dispatch one stream notification event */
1565                 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1566
1567         /* Leave us enabled if there's still more to do. */
1568         if (s->send_watchdog || s->stdout_streams_notify_queue)
1569                 return 0;
1570
1571         /* There was nothing to do anymore, let's turn ourselves off. */
1572         r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1573         if (r < 0)
1574                 return log_error_errno(r, "Failed to turn off notify event source: %m");
1575
1576         return 0;
1577 }
1578
1579 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1580         Server *s = userdata;
1581         int r;
1582
1583         assert(s);
1584
1585         s->send_watchdog = true;
1586
1587         r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1588         if (r < 0)
1589                 log_warning_errno(r, "Failed to turn on notify event source: %m");
1590
1591         r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1592         if (r < 0)
1593                 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1594
1595         r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1596         if (r < 0)
1597                 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1598
1599         return 0;
1600 }
1601
1602 static int server_connect_notify(Server *s) {
1603         union sockaddr_union sa = {
1604                 .un.sun_family = AF_UNIX,
1605         };
1606         const char *e;
1607         int r;
1608
1609         assert(s);
1610         assert(s->notify_fd < 0);
1611         assert(!s->notify_event_source);
1612
1613         /*
1614           So here's the problem: we'd like to send notification
1615           messages to PID 1, but we cannot do that via sd_notify(),
1616           since that's synchronous, and we might end up blocking on
1617           it. Specifically: given that PID 1 might block on
1618           dbus-daemon during IPC, and dbus-daemon is logging to us,
1619           and might hence block on us, we might end up in a deadlock
1620           if we block on sending PID 1 notification messages — by
1621           generating a full blocking circle. To avoid this, let's
1622           create a non-blocking socket, and connect it to the
1623           notification socket, and then wait for POLLOUT before we
1624           send anything. This should efficiently avoid any deadlocks,
1625           as we'll never block on PID 1, hence PID 1 can safely block
1626           on dbus-daemon which can safely block on us again.
1627
1628           Don't think that this issue is real? It is, see:
1629           https://github.com/systemd/systemd/issues/1505
1630         */
1631
1632         e = getenv("NOTIFY_SOCKET");
1633         if (!e)
1634                 return 0;
1635
1636         if (!IN_SET(e[0], '@', '/') || e[1] == 0) {
1637                 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1638                 return -EINVAL;
1639         }
1640
1641         if (strlen(e) > sizeof(sa.un.sun_path)) {
1642                 log_error("NOTIFY_SOCKET path too long: %s", e);
1643                 return -EINVAL;
1644         }
1645
1646         s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1647         if (s->notify_fd < 0)
1648                 return log_error_errno(errno, "Failed to create notify socket: %m");
1649
1650         (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1651
1652         strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1653         if (sa.un.sun_path[0] == '@')
1654                 sa.un.sun_path[0] = 0;
1655
1656         r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
1657         if (r < 0)
1658                 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1659
1660         r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1661         if (r < 0)
1662                 return log_error_errno(r, "Failed to watch notification socket: %m");
1663
1664         if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1665                 s->send_watchdog = true;
1666
1667                 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1668                 if (r < 0)
1669                         return log_error_errno(r, "Failed to add watchdog time event: %m");
1670         }
1671
1672         /* This should fire pretty soon, which we'll use to send the
1673          * READY=1 event. */
1674
1675         return 0;
1676 }
1677
1678 int server_init(Server *s) {
1679         _cleanup_fdset_free_ FDSet *fds = NULL;
1680         int n, r, fd;
1681         bool no_sockets;
1682
1683         assert(s);
1684
1685         zero(*s);
1686         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1687         s->compress.enabled = true;
1688         s->compress.threshold_bytes = (uint64_t) -1;
1689         s->seal = true;
1690         s->read_kmsg = true;
1691
1692         s->watchdog_usec = USEC_INFINITY;
1693
1694         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1695         s->sync_scheduled = false;
1696
1697         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1698         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1699
1700         s->forward_to_wall = true;
1701
1702         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1703
1704         s->max_level_store = LOG_DEBUG;
1705         s->max_level_syslog = LOG_DEBUG;
1706         s->max_level_kmsg = LOG_NOTICE;
1707         s->max_level_console = LOG_INFO;
1708         s->max_level_wall = LOG_EMERG;
1709
1710         s->line_max = DEFAULT_LINE_MAX;
1711
1712         journal_reset_metrics(&s->system_storage.metrics);
1713         journal_reset_metrics(&s->runtime_storage.metrics);
1714
1715         server_parse_config_file(s);
1716
1717         r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1718         if (r < 0)
1719                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
1720
1721         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1722                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1723                           s->rate_limit_interval, s->rate_limit_burst);
1724                 s->rate_limit_interval = s->rate_limit_burst = 0;
1725         }
1726
1727         (void) mkdir_p("/run/systemd/journal", 0755);
1728
1729         s->user_journals = ordered_hashmap_new(NULL);
1730         if (!s->user_journals)
1731                 return log_oom();
1732
1733         s->mmap = mmap_cache_new();
1734         if (!s->mmap)
1735                 return log_oom();
1736
1737         s->deferred_closes = set_new(NULL);
1738         if (!s->deferred_closes)
1739                 return log_oom();
1740
1741         r = sd_event_default(&s->event);
1742         if (r < 0)
1743                 return log_error_errno(r, "Failed to create event loop: %m");
1744
1745         n = sd_listen_fds(true);
1746         if (n < 0)
1747                 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1748
1749         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1750
1751                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1752
1753                         if (s->native_fd >= 0) {
1754                                 log_error("Too many native sockets passed.");
1755                                 return -EINVAL;
1756                         }
1757
1758                         s->native_fd = fd;
1759
1760                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1761
1762                         if (s->stdout_fd >= 0) {
1763                                 log_error("Too many stdout sockets passed.");
1764                                 return -EINVAL;
1765                         }
1766
1767                         s->stdout_fd = fd;
1768
1769                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1770                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1771
1772                         if (s->syslog_fd >= 0) {
1773                                 log_error("Too many /dev/log sockets passed.");
1774                                 return -EINVAL;
1775                         }
1776
1777                         s->syslog_fd = fd;
1778
1779                 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1780
1781                         if (s->audit_fd >= 0) {
1782                                 log_error("Too many audit sockets passed.");
1783                                 return -EINVAL;
1784                         }
1785
1786                         s->audit_fd = fd;
1787
1788                 } else {
1789
1790                         if (!fds) {
1791                                 fds = fdset_new();
1792                                 if (!fds)
1793                                         return log_oom();
1794                         }
1795
1796                         r = fdset_put(fds, fd);
1797                         if (r < 0)
1798                                 return log_oom();
1799                 }
1800         }
1801
1802         /* Try to restore streams, but don't bother if this fails */
1803         (void) server_restore_streams(s, fds);
1804
1805         if (fdset_size(fds) > 0) {
1806                 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1807                 fds = fdset_free(fds);
1808         }
1809
1810         no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1811
1812         /* always open stdout, syslog, native, and kmsg sockets */
1813
1814         /* systemd-journald.socket: /run/systemd/journal/stdout */
1815         r = server_open_stdout_socket(s);
1816         if (r < 0)
1817                 return r;
1818
1819         /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1820         r = server_open_syslog_socket(s);
1821         if (r < 0)
1822                 return r;
1823
1824         /* systemd-journald.socket: /run/systemd/journal/socket */
1825         r = server_open_native_socket(s);
1826         if (r < 0)
1827                 return r;
1828
1829         /* /dev/kmsg */
1830         r = server_open_dev_kmsg(s);
1831         if (r < 0)
1832                 return r;
1833
1834         /* Unless we got *some* sockets and not audit, open audit socket */
1835         if (s->audit_fd >= 0 || no_sockets) {
1836                 r = server_open_audit(s);
1837                 if (r < 0)
1838                         return r;
1839         }
1840
1841         r = server_open_kernel_seqnum(s);
1842         if (r < 0)
1843                 return r;
1844
1845         r = server_open_hostname(s);
1846         if (r < 0)
1847                 return r;
1848
1849         r = setup_signals(s);
1850         if (r < 0)
1851                 return r;
1852
1853         s->udev = udev_new();
1854         if (!s->udev)
1855                 return -ENOMEM;
1856
1857         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1858         if (!s->rate_limit)
1859                 return -ENOMEM;
1860
1861         r = cg_get_root_path(&s->cgroup_root);
1862         if (r < 0)
1863                 return r;
1864
1865         server_cache_hostname(s);
1866         server_cache_boot_id(s);
1867         server_cache_machine_id(s);
1868
1869         s->runtime_storage.name = "Runtime journal";
1870         s->system_storage.name = "System journal";
1871
1872         s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
1873         s->system_storage.path  = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
1874         if (!s->runtime_storage.path || !s->system_storage.path)
1875                 return -ENOMEM;
1876
1877         (void) server_connect_notify(s);
1878
1879         (void) client_context_acquire_default(s);
1880
1881         return system_journal_open(s, false);
1882 }
1883
1884 void server_maybe_append_tags(Server *s) {
1885 #if HAVE_GCRYPT
1886         JournalFile *f;
1887         Iterator i;
1888         usec_t n;
1889
1890         n = now(CLOCK_REALTIME);
1891
1892         if (s->system_journal)
1893                 journal_file_maybe_append_tag(s->system_journal, n);
1894
1895         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1896                 journal_file_maybe_append_tag(f, n);
1897 #endif
1898 }
1899
1900 void server_done(Server *s) {
1901         assert(s);
1902
1903         set_free_with_destructor(s->deferred_closes, journal_file_close);
1904
1905         while (s->stdout_streams)
1906                 stdout_stream_free(s->stdout_streams);
1907
1908         client_context_flush_all(s);
1909
1910         if (s->system_journal)
1911                 (void) journal_file_close(s->system_journal);
1912
1913         if (s->runtime_journal)
1914                 (void) journal_file_close(s->runtime_journal);
1915
1916         ordered_hashmap_free_with_destructor(s->user_journals, journal_file_close);
1917
1918         sd_event_source_unref(s->syslog_event_source);
1919         sd_event_source_unref(s->native_event_source);
1920         sd_event_source_unref(s->stdout_event_source);
1921         sd_event_source_unref(s->dev_kmsg_event_source);
1922         sd_event_source_unref(s->audit_event_source);
1923         sd_event_source_unref(s->sync_event_source);
1924         sd_event_source_unref(s->sigusr1_event_source);
1925         sd_event_source_unref(s->sigusr2_event_source);
1926         sd_event_source_unref(s->sigterm_event_source);
1927         sd_event_source_unref(s->sigint_event_source);
1928         sd_event_source_unref(s->sigrtmin1_event_source);
1929         sd_event_source_unref(s->hostname_event_source);
1930         sd_event_source_unref(s->notify_event_source);
1931         sd_event_source_unref(s->watchdog_event_source);
1932         sd_event_unref(s->event);
1933
1934         safe_close(s->syslog_fd);
1935         safe_close(s->native_fd);
1936         safe_close(s->stdout_fd);
1937         safe_close(s->dev_kmsg_fd);
1938         safe_close(s->audit_fd);
1939         safe_close(s->hostname_fd);
1940         safe_close(s->notify_fd);
1941
1942         if (s->rate_limit)
1943                 journal_rate_limit_free(s->rate_limit);
1944
1945         if (s->kernel_seqnum)
1946                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1947
1948         free(s->buffer);
1949         free(s->tty_path);
1950         free(s->cgroup_root);
1951         free(s->hostname_field);
1952         free(s->runtime_storage.path);
1953         free(s->system_storage.path);
1954
1955         if (s->mmap)
1956                 mmap_cache_unref(s->mmap);
1957
1958         udev_unref(s->udev);
1959 }
1960
1961 static const char* const storage_table[_STORAGE_MAX] = {
1962         [STORAGE_AUTO] = "auto",
1963         [STORAGE_VOLATILE] = "volatile",
1964         [STORAGE_PERSISTENT] = "persistent",
1965         [STORAGE_NONE] = "none"
1966 };
1967
1968 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1969 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1970
1971 static const char* const split_mode_table[_SPLIT_MAX] = {
1972         [SPLIT_LOGIN] = "login",
1973         [SPLIT_UID] = "uid",
1974         [SPLIT_NONE] = "none",
1975 };
1976
1977 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1978 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
1979
1980 int config_parse_line_max(
1981                 const char* unit,
1982                 const char *filename,
1983                 unsigned line,
1984                 const char *section,
1985                 unsigned section_line,
1986                 const char *lvalue,
1987                 int ltype,
1988                 const char *rvalue,
1989                 void *data,
1990                 void *userdata) {
1991
1992         size_t *sz = data;
1993         int r;
1994
1995         assert(filename);
1996         assert(lvalue);
1997         assert(rvalue);
1998         assert(data);
1999
2000         if (isempty(rvalue))
2001                 /* Empty assignment means default */
2002                 *sz = DEFAULT_LINE_MAX;
2003         else {
2004                 uint64_t v;
2005
2006                 r = parse_size(rvalue, 1024, &v);
2007                 if (r < 0) {
2008                         log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
2009                         return 0;
2010                 }
2011
2012                 if (v < 79) {
2013                         /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2014                          * terminal size is 80ch, and it might make sense to break one character before the natural
2015                          * line break would occur on that. */
2016                         log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
2017                         *sz = 79;
2018                 } else if (v > (uint64_t) (SSIZE_MAX-1)) {
2019                         /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2020                          * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2021                          * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2022                          * fail much earlier anyway. */
2023                         log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
2024                         *sz = SSIZE_MAX-1;
2025                 } else
2026                         *sz = (size_t) v;
2027         }
2028
2029         return 0;
2030 }
2031
2032 int config_parse_compress(const char* unit,
2033                           const char *filename,
2034                           unsigned line,
2035                           const char *section,
2036                           unsigned section_line,
2037                           const char *lvalue,
2038                           int ltype,
2039                           const char *rvalue,
2040                           void *data,
2041                           void *userdata) {
2042         JournalCompressOptions* compress = data;
2043         int r;
2044
2045         if (streq(rvalue, "1")) {
2046                 log_syntax(unit, LOG_WARNING, filename, line, 0,
2047                            "Compress= ambiguously specified as 1, enabling compression with default threshold");
2048                 compress->enabled = true;
2049         } else if (streq(rvalue, "0")) {
2050                 log_syntax(unit, LOG_WARNING, filename, line, 0,
2051                            "Compress= ambiguously specified as 0, disabling compression");
2052                 compress->enabled = false;
2053         } else if ((r = parse_boolean(rvalue)) >= 0)
2054                 compress->enabled = r;
2055         else if (parse_size(rvalue, 1024, &compress->threshold_bytes) == 0)
2056                 compress->enabled = true;
2057         else if (isempty(rvalue)) {
2058                 compress->enabled = true;
2059                 compress->threshold_bytes = (uint64_t) -1;
2060         } else
2061                 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse Compress= value, ignoring: %s", rvalue);
2062
2063         return 0;
2064 }