src/journal/journald-server.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2011 Lennart Poettering
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #if HAVE_SELINUX
  22 #include <selinux/selinux.h>
  23 #endif
  24 #include <sys/ioctl.h>
  25 #include <sys/mman.h>
  26 #include <sys/signalfd.h>
  27 #include <sys/statvfs.h>
  28 #include <linux/sockios.h>
  29
  30 #include "libudev.h"
  31 #include "sd-daemon.h"
  32 #include "sd-journal.h"
  33 #include "sd-messages.h"
  34
  35 #include "acl-util.h"
  36 #include "alloc-util.h"
  37 #include "audit-util.h"
  38 #include "cgroup-util.h"
  39 #include "conf-parser.h"
  40 #include "dirent-util.h"
  41 #include "extract-word.h"
  42 #include "fd-util.h"
  43 #include "fileio.h"
  44 #include "format-util.h"
  45 #include "fs-util.h"
  46 #include "hashmap.h"
  47 #include "hostname-util.h"
  48 #include "id128-util.h"
  49 #include "io-util.h"
  50 #include "journal-authenticate.h"
  51 #include "journal-file.h"
  52 #include "journal-internal.h"
  53 #include "journal-vacuum.h"
  54 #include "journald-audit.h"
  55 #include "journald-context.h"
  56 #include "journald-kmsg.h"
  57 #include "journald-native.h"
  58 #include "journald-rate-limit.h"
  59 #include "journald-server.h"
  60 #include "journald-stream.h"
  61 #include "journald-syslog.h"
  62 #include "log.h"
  63 #include "missing.h"
  64 #include "mkdir.h"
  65 #include "parse-util.h"
  66 #include "proc-cmdline.h"
  67 #include "process-util.h"
  68 #include "rm-rf.h"
  69 #include "selinux-util.h"
  70 #include "signal-util.h"
  71 #include "socket-util.h"
  72 #include "stdio-util.h"
  73 #include "string-table.h"
  74 #include "string-util.h"
  75 #include "syslog-util.h"
  76 #include "user-util.h"
  77
  78 #define USER_JOURNALS_MAX 1024
  79
  80 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
  81 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
  82 #define DEFAULT_RATE_LIMIT_BURST 1000
  83 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
  84
  85 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
  86
  87 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
  88
  89 /* The period to insert between posting changes for coalescing */
  90 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
  91
  92 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
  93  * for a bit of additional metadata. */
  94 #define DEFAULT_LINE_MAX (48*1024)
  95
  96 static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
  97         _cleanup_closedir_ DIR *d = NULL;
  98         struct dirent *de;
  99         struct statvfs ss;
 100
 101         assert(ret_used);
 102         assert(ret_free);
 103
 104         d = opendir(path);
 105         if (!d)
 106                 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
 107                                       errno, "Failed to open %s: %m", path);
 108
 109         if (fstatvfs(dirfd(d), &ss) < 0)
 110                 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
 111
 112         *ret_free = ss.f_bsize * ss.f_bavail;
 113         *ret_used = 0;
 114         FOREACH_DIRENT_ALL(de, d, break) {
 115                 struct stat st;
 116
 117                 if (!endswith(de->d_name, ".journal") &&
 118                     !endswith(de->d_name, ".journal~"))
 119                         continue;
 120
 121                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
 122                         log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
 123                         continue;
 124                 }
 125
 126                 if (!S_ISREG(st.st_mode))
 127                         continue;
 128
 129                 *ret_used += (uint64_t) st.st_blocks * 512UL;
 130         }
 131
 132         return 0;
 133 }
 134
 135 static void cache_space_invalidate(JournalStorageSpace *space) {
 136         zero(*space);
 137 }
 138
 139 static int cache_space_refresh(Server *s, JournalStorage *storage) {
 140         JournalStorageSpace *space;
 141         JournalMetrics *metrics;
 142         uint64_t vfs_used, vfs_avail, avail;
 143         usec_t ts;
 144         int r;
 145
 146         assert(s);
 147
 148         metrics = &storage->metrics;
 149         space = &storage->space;
 150
 151         ts = now(CLOCK_MONOTONIC);
 152
 153         if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
 154                 return 0;
 155
 156         r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
 157         if (r < 0)
 158                 return r;
 159
 160         space->vfs_used = vfs_used;
 161         space->vfs_available = vfs_avail;
 162
 163         avail = LESS_BY(vfs_avail, metrics->keep_free);
 164
 165         space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
 166         space->available = LESS_BY(space->limit, vfs_used);
 167         space->timestamp = ts;
 168         return 1;
 169 }
 170
 171 static void patch_min_use(JournalStorage *storage) {
 172         assert(storage);
 173
 174         /* Let's bump the min_use limit to the current usage on disk. We do
 175          * this when starting up and first opening the journal files. This way
 176          * sudden spikes in disk usage will not cause journald to vacuum files
 177          * without bounds. Note that this means that only a restart of journald
 178          * will make it reset this value. */
 179
 180         storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
 181 }
 182
 183
 184 static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
 185         JournalStorage *js;
 186         int r;
 187
 188         assert(s);
 189
 190         js = s->system_journal ? &s->system_storage : &s->runtime_storage;
 191
 192         r = cache_space_refresh(s, js);
 193         if (r >= 0) {
 194                 if (available)
 195                         *available = js->space.available;
 196                 if (limit)
 197                         *limit = js->space.limit;
 198         }
 199         return r;
 200 }
 201
 202 void server_space_usage_message(Server *s, JournalStorage *storage) {
 203         char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
 204              fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
 205         JournalMetrics *metrics;
 206
 207         assert(s);
 208
 209         if (!storage)
 210                 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
 211
 212         if (cache_space_refresh(s, storage) < 0)
 213                 return;
 214
 215         metrics = &storage->metrics;
 216         format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
 217         format_bytes(fb2, sizeof(fb2), metrics->max_use);
 218         format_bytes(fb3, sizeof(fb3), metrics->keep_free);
 219         format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
 220         format_bytes(fb5, sizeof(fb5), storage->space.limit);
 221         format_bytes(fb6, sizeof(fb6), storage->space.available);
 222
 223         server_driver_message(s, 0,
 224                               "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
 225                               LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
 226                                           storage->name, storage->path, fb1, fb5, fb6),
 227                               "JOURNAL_NAME=%s", storage->name,
 228                               "JOURNAL_PATH=%s", storage->path,
 229                               "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
 230                               "CURRENT_USE_PRETTY=%s", fb1,
 231                               "MAX_USE=%"PRIu64, metrics->max_use,
 232                               "MAX_USE_PRETTY=%s", fb2,
 233                               "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
 234                               "DISK_KEEP_FREE_PRETTY=%s", fb3,
 235                               "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
 236                               "DISK_AVAILABLE_PRETTY=%s", fb4,
 237                               "LIMIT=%"PRIu64, storage->space.limit,
 238                               "LIMIT_PRETTY=%s", fb5,
 239                               "AVAILABLE=%"PRIu64, storage->space.available,
 240                               "AVAILABLE_PRETTY=%s", fb6,
 241                               NULL);
 242 }
 243
 244 static bool uid_for_system_journal(uid_t uid) {
 245
 246         /* Returns true if the specified UID shall get its data stored in the system journal*/
 247
 248         return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY;
 249 }
 250
 251 static void server_add_acls(JournalFile *f, uid_t uid) {
 252 #if HAVE_ACL
 253         int r;
 254 #endif
 255         assert(f);
 256
 257 #if HAVE_ACL
 258         if (uid_for_system_journal(uid))
 259                 return;
 260
 261         r = add_acls_for_user(f->fd, uid);
 262         if (r < 0)
 263                 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
 264 #endif
 265 }
 266
 267 static int open_journal(
 268                 Server *s,
 269                 bool reliably,
 270                 const char *fname,
 271                 int flags,
 272                 bool seal,
 273                 JournalMetrics *metrics,
 274                 JournalFile **ret) {
 275         int r;
 276         JournalFile *f;
 277
 278         assert(s);
 279         assert(fname);
 280         assert(ret);
 281
 282         if (reliably)
 283                 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
 284         else
 285                 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
 286         if (r < 0)
 287                 return r;
 288
 289         r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
 290         if (r < 0) {
 291                 (void) journal_file_close(f);
 292                 return r;
 293         }
 294
 295         *ret = f;
 296         return r;
 297 }
 298
 299 static bool flushed_flag_is_set(void) {
 300         return access("/run/systemd/journal/flushed", F_OK) >= 0;
 301 }
 302
 303 static int system_journal_open(Server *s, bool flush_requested) {
 304         const char *fn;
 305         int r = 0;
 306
 307         if (!s->system_journal &&
 308             IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
 309             (flush_requested || flushed_flag_is_set())) {
 310
 311                 /* If in auto mode: first try to create the machine
 312                  * path, but not the prefix.
 313                  *
 314                  * If in persistent mode: create /var/log/journal and
 315                  * the machine path */
 316
 317                 if (s->storage == STORAGE_PERSISTENT)
 318                         (void) mkdir_p("/var/log/journal/", 0755);
 319
 320                 (void) mkdir(s->system_storage.path, 0755);
 321
 322                 fn = strjoina(s->system_storage.path, "/system.journal");
 323                 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
 324                 if (r >= 0) {
 325                         server_add_acls(s->system_journal, 0);
 326                         (void) cache_space_refresh(s, &s->system_storage);
 327                         patch_min_use(&s->system_storage);
 328                 } else if (r < 0) {
 329                         if (!IN_SET(r, -ENOENT, -EROFS))
 330                                 log_warning_errno(r, "Failed to open system journal: %m");
 331
 332                         r = 0;
 333                 }
 334
 335                 /* If the runtime journal is open, and we're post-flush, we're
 336                  * recovering from a failed system journal rotate (ENOSPC)
 337                  * for which the runtime journal was reopened.
 338                  *
 339                  * Perform an implicit flush to var, leaving the runtime
 340                  * journal closed, now that the system journal is back.
 341                  */
 342                 if (!flush_requested)
 343                         (void) server_flush_to_var(s, true);
 344         }
 345
 346         if (!s->runtime_journal &&
 347             (s->storage != STORAGE_NONE)) {
 348
 349                 fn = strjoina(s->runtime_storage.path, "/system.journal");
 350
 351                 if (s->system_journal) {
 352
 353                         /* Try to open the runtime journal, but only
 354                          * if it already exists, so that we can flush
 355                          * it into the system journal */
 356
 357                         r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
 358                         if (r < 0) {
 359                                 if (r != -ENOENT)
 360                                         log_warning_errno(r, "Failed to open runtime journal: %m");
 361
 362                                 r = 0;
 363                         }
 364
 365                 } else {
 366
 367                         /* OK, we really need the runtime journal, so create
 368                          * it if necessary. */
 369
 370                         (void) mkdir("/run/log", 0755);
 371                         (void) mkdir("/run/log/journal", 0755);
 372                         (void) mkdir_parents(fn, 0750);
 373
 374                         r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
 375                         if (r < 0)
 376                                 return log_error_errno(r, "Failed to open runtime journal: %m");
 377                 }
 378
 379                 if (s->runtime_journal) {
 380                         server_add_acls(s->runtime_journal, 0);
 381                         (void) cache_space_refresh(s, &s->runtime_storage);
 382                         patch_min_use(&s->runtime_storage);
 383                 }
 384         }
 385
 386         return r;
 387 }
 388
 389 static JournalFile* find_journal(Server *s, uid_t uid) {
 390         _cleanup_free_ char *p = NULL;
 391         int r;
 392         JournalFile *f;
 393         sd_id128_t machine;
 394
 395         assert(s);
 396
 397         /* A rotate that fails to create the new journal (ENOSPC) leaves the
 398          * rotated journal as NULL.  Unless we revisit opening, even after
 399          * space is made available we'll continue to return NULL indefinitely.
 400          *
 401          * system_journal_open() is a noop if the journals are already open, so
 402          * we can just call it here to recover from failed rotates (or anything
 403          * else that's left the journals as NULL).
 404          *
 405          * Fixes https://github.com/systemd/systemd/issues/3968 */
 406         (void) system_journal_open(s, false);
 407
 408         /* We split up user logs only on /var, not on /run. If the
 409          * runtime file is open, we write to it exclusively, in order
 410          * to guarantee proper order as soon as we flush /run to
 411          * /var and close the runtime file. */
 412
 413         if (s->runtime_journal)
 414                 return s->runtime_journal;
 415
 416         if (uid_for_system_journal(uid))
 417                 return s->system_journal;
 418
 419         r = sd_id128_get_machine(&machine);
 420         if (r < 0)
 421                 return s->system_journal;
 422
 423         f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
 424         if (f)
 425                 return f;
 426
 427         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
 428                      SD_ID128_FORMAT_VAL(machine), uid) < 0)
 429                 return s->system_journal;
 430
 431         while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
 432                 /* Too many open? Then let's close one */
 433                 f = ordered_hashmap_steal_first(s->user_journals);
 434                 assert(f);
 435                 (void) journal_file_close(f);
 436         }
 437
 438         r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
 439         if (r < 0)
 440                 return s->system_journal;
 441
 442         server_add_acls(f, uid);
 443
 444         r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
 445         if (r < 0) {
 446                 (void) journal_file_close(f);
 447                 return s->system_journal;
 448         }
 449
 450         return f;
 451 }
 452
 453 static int do_rotate(
 454                 Server *s,
 455                 JournalFile **f,
 456                 const char* name,
 457                 bool seal,
 458                 uint32_t uid) {
 459
 460         int r;
 461         assert(s);
 462
 463         if (!*f)
 464                 return -EINVAL;
 465
 466         r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
 467         if (r < 0) {
 468                 if (*f)
 469                         return log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
 470                 else
 471                         return log_error_errno(r, "Failed to create new %s journal: %m", name);
 472         }
 473
 474         server_add_acls(*f, uid);
 475
 476         return r;
 477 }
 478
 479 void server_rotate(Server *s) {
 480         JournalFile *f;
 481         void *k;
 482         Iterator i;
 483         int r;
 484
 485         log_debug("Rotating...");
 486
 487         (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
 488         (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
 489
 490         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
 491                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
 492                 if (r >= 0)
 493                         ordered_hashmap_replace(s->user_journals, k, f);
 494                 else if (!f)
 495                         /* Old file has been closed and deallocated */
 496                         ordered_hashmap_remove(s->user_journals, k);
 497         }
 498
 499         /* Perform any deferred closes which aren't still offlining. */
 500         SET_FOREACH(f, s->deferred_closes, i)
 501                 if (!journal_file_is_offlining(f)) {
 502                         (void) set_remove(s->deferred_closes, f);
 503                         (void) journal_file_close(f);
 504                 }
 505 }
 506
 507 void server_sync(Server *s) {
 508         JournalFile *f;
 509         Iterator i;
 510         int r;
 511
 512         if (s->system_journal) {
 513                 r = journal_file_set_offline(s->system_journal, false);
 514                 if (r < 0)
 515                         log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
 516         }
 517
 518         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
 519                 r = journal_file_set_offline(f, false);
 520                 if (r < 0)
 521                         log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
 522         }
 523
 524         if (s->sync_event_source) {
 525                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
 526                 if (r < 0)
 527                         log_error_errno(r, "Failed to disable sync timer source: %m");
 528         }
 529
 530         s->sync_scheduled = false;
 531 }
 532
 533 static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
 534
 535         int r;
 536
 537         assert(s);
 538         assert(storage);
 539
 540         (void) cache_space_refresh(s, storage);
 541
 542         if (verbose)
 543                 server_space_usage_message(s, storage);
 544
 545         r = journal_directory_vacuum(storage->path, storage->space.limit,
 546                                      storage->metrics.n_max_files, s->max_retention_usec,
 547                                      &s->oldest_file_usec, verbose);
 548         if (r < 0 && r != -ENOENT)
 549                 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
 550
 551         cache_space_invalidate(&storage->space);
 552 }
 553
 554 int server_vacuum(Server *s, bool verbose) {
 555         assert(s);
 556
 557         log_debug("Vacuuming...");
 558
 559         s->oldest_file_usec = 0;
 560
 561         if (s->system_journal)
 562                 do_vacuum(s, &s->system_storage, verbose);
 563         if (s->runtime_journal)
 564                 do_vacuum(s, &s->runtime_storage, verbose);
 565
 566         return 0;
 567 }
 568
 569 static void server_cache_machine_id(Server *s) {
 570         sd_id128_t id;
 571         int r;
 572
 573         assert(s);
 574
 575         r = sd_id128_get_machine(&id);
 576         if (r < 0)
 577                 return;
 578
 579         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
 580 }
 581
 582 static void server_cache_boot_id(Server *s) {
 583         sd_id128_t id;
 584         int r;
 585
 586         assert(s);
 587
 588         r = sd_id128_get_boot(&id);
 589         if (r < 0)
 590                 return;
 591
 592         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
 593 }
 594
 595 static void server_cache_hostname(Server *s) {
 596         _cleanup_free_ char *t = NULL;
 597         char *x;
 598
 599         assert(s);
 600
 601         t = gethostname_malloc();
 602         if (!t)
 603                 return;
 604
 605         x = strappend("_HOSTNAME=", t);
 606         if (!x)
 607                 return;
 608
 609         free(s->hostname_field);
 610         s->hostname_field = x;
 611 }
 612
 613 static bool shall_try_append_again(JournalFile *f, int r) {
 614         switch(r) {
 615
 616         case -E2BIG:           /* Hit configured limit          */
 617         case -EFBIG:           /* Hit fs limit                  */
 618         case -EDQUOT:          /* Quota limit hit               */
 619         case -ENOSPC:          /* Disk full                     */
 620                 log_debug("%s: Allocation limit reached, rotating.", f->path);
 621                 return true;
 622
 623         case -EIO:             /* I/O error of some kind (mmap) */
 624                 log_warning("%s: IO error, rotating.", f->path);
 625                 return true;
 626
 627         case -EHOSTDOWN:       /* Other machine                 */
 628                 log_info("%s: Journal file from other machine, rotating.", f->path);
 629                 return true;
 630
 631         case -EBUSY:           /* Unclean shutdown              */
 632                 log_info("%s: Unclean shutdown, rotating.", f->path);
 633                 return true;
 634
 635         case -EPROTONOSUPPORT: /* Unsupported feature           */
 636                 log_info("%s: Unsupported feature, rotating.", f->path);
 637                 return true;
 638
 639         case -EBADMSG:         /* Corrupted                     */
 640         case -ENODATA:         /* Truncated                     */
 641         case -ESHUTDOWN:       /* Already archived              */
 642                 log_warning("%s: Journal file corrupted, rotating.", f->path);
 643                 return true;
 644
 645         case -EIDRM:           /* Journal file has been deleted */
 646                 log_warning("%s: Journal file has been deleted, rotating.", f->path);
 647                 return true;
 648
 649         case -ETXTBSY:         /* Journal file is from the future */
 650                 log_warning("%s: Journal file is from the future, rotating.", f->path);
 651                 return true;
 652
 653         default:
 654                 return false;
 655         }
 656 }
 657
 658 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
 659         bool vacuumed = false, rotate = false;
 660         struct dual_timestamp ts;
 661         JournalFile *f;
 662         int r;
 663
 664         assert(s);
 665         assert(iovec);
 666         assert(n > 0);
 667
 668         /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
 669          * the source time, and not even the time the event was originally seen, but instead simply the time we started
 670          * processing it, as we want strictly linear ordering in what we write out.) */
 671         assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
 672         assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
 673
 674         if (ts.realtime < s->last_realtime_clock) {
 675                 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
 676                  * regular operation. However, when it does happen, then we should make sure that we start fresh files
 677                  * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
 678                  * bisection works correctly. */
 679
 680                 log_debug("Time jumped backwards, rotating.");
 681                 rotate = true;
 682         } else {
 683
 684                 f = find_journal(s, uid);
 685                 if (!f)
 686                         return;
 687
 688                 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
 689                         log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
 690                         rotate = true;
 691                 }
 692         }
 693
 694         if (rotate) {
 695                 server_rotate(s);
 696                 server_vacuum(s, false);
 697                 vacuumed = true;
 698
 699                 f = find_journal(s, uid);
 700                 if (!f)
 701                         return;
 702         }
 703
 704         s->last_realtime_clock = ts.realtime;
 705
 706         r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
 707         if (r >= 0) {
 708                 server_schedule_sync(s, priority);
 709                 return;
 710         }
 711
 712         if (vacuumed || !shall_try_append_again(f, r)) {
 713                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
 714                 return;
 715         }
 716
 717         server_rotate(s);
 718         server_vacuum(s, false);
 719
 720         f = find_journal(s, uid);
 721         if (!f)
 722                 return;
 723
 724         log_debug("Retrying write.");
 725         r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
 726         if (r < 0)
 727                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
 728         else
 729                 server_schedule_sync(s, priority);
 730 }
 731
 732 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field)  \
 733         if (isset(value)) {                                             \
 734                 char *k;                                                \
 735                 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
 736                 sprintf(k, field "=" format, value);                    \
 737                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 738         }
 739
 740 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field)                  \
 741         if (!isempty(value)) {                                          \
 742                 char *k;                                                \
 743                 k = strjoina(field "=", value);                         \
 744                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 745         }
 746
 747 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field)                   \
 748         if (!sd_id128_is_null(value)) {                                 \
 749                 char *k;                                                \
 750                 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
 751                 sd_id128_to_string(value, stpcpy(k, field "="));        \
 752                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 753         }
 754
 755 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field)       \
 756         if (value_size > 0) {                                           \
 757                 char *k;                                                \
 758                 k = newa(char, STRLEN(field "=") + value_size + 1);     \
 759                 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
 760                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 761         }                                                               \
 762
 763 static void dispatch_message_real(
 764                 Server *s,
 765                 struct iovec *iovec, size_t n, size_t m,
 766                 const ClientContext *c,
 767                 const struct timeval *tv,
 768                 int priority,
 769                 pid_t object_pid) {
 770
 771         char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
 772         uid_t journal_uid;
 773         ClientContext *o;
 774
 775         assert(s);
 776         assert(iovec);
 777         assert(n > 0);
 778         assert(n +
 779                N_IOVEC_META_FIELDS +
 780                (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) +
 781                client_context_extra_fields_n_iovec(c) <= m);
 782
 783         if (c) {
 784                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
 785                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
 786                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
 787
 788                 IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM");
 789                 IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE");
 790                 IOVEC_ADD_STRING_FIELD(iovec, n, c->cmdline, "_CMDLINE");
 791                 IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE");
 792
 793                 IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
 794
 795                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
 796                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
 797
 798                 IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP");
 799                 IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
 800                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
 801                 IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT");
 802                 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
 803                 IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
 804                 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
 805
 806                 IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
 807
 808                 if (c->extra_fields_n_iovec > 0) {
 809                         memcpy(iovec + n, c->extra_fields_iovec, c->extra_fields_n_iovec * sizeof(struct iovec));
 810                         n += c->extra_fields_n_iovec;
 811                 }
 812         }
 813
 814         assert(n <= m);
 815
 816         if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
 817
 818                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
 819                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
 820                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
 821
 822                 IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
 823                 IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
 824                 IOVEC_ADD_STRING_FIELD(iovec, n, o->cmdline, "OBJECT_CMDLINE");
 825                 IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
 826
 827                 IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
 828
 829                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
 830                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
 831
 832                 IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
 833                 IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
 834                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
 835                 IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
 836                 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
 837                 IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
 838                 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
 839
 840                 IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
 841         }
 842
 843         assert(n <= m);
 844
 845         if (tv) {
 846                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
 847                 iovec[n++] = IOVEC_MAKE_STRING(source_time);
 848         }
 849
 850         /* Note that strictly speaking storing the boot id here is
 851          * redundant since the entry includes this in-line
 852          * anyway. However, we need this indexed, too. */
 853         if (!isempty(s->boot_id_field))
 854                 iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
 855
 856         if (!isempty(s->machine_id_field))
 857                 iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
 858
 859         if (!isempty(s->hostname_field))
 860                 iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
 861
 862         assert(n <= m);
 863
 864         if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
 865                 /* Split up strictly by (non-root) UID */
 866                 journal_uid = c->uid;
 867         else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
 868                 /* Split up by login UIDs.  We do this only if the
 869                  * realuid is not root, in order not to accidentally
 870                  * leak privileged information to the user that is
 871                  * logged by a privileged process that is part of an
 872                  * unprivileged session. */
 873                 journal_uid = c->owner_uid;
 874         else
 875                 journal_uid = 0;
 876
 877         write_to_journal(s, journal_uid, iovec, n, priority);
 878 }
 879
 880 void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) {
 881
 882         struct iovec *iovec;
 883         size_t n = 0, k, m;
 884         va_list ap;
 885         int r;
 886
 887         assert(s);
 888         assert(format);
 889
 890         m = N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS + client_context_extra_fields_n_iovec(s->my_context) + N_IOVEC_OBJECT_FIELDS;
 891         iovec = newa(struct iovec, m);
 892
 893         assert_cc(3 == LOG_FAC(LOG_DAEMON));
 894         iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
 895         iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
 896
 897         iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
 898         assert_cc(6 == LOG_INFO);
 899         iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
 900
 901         if (message_id)
 902                 iovec[n++] = IOVEC_MAKE_STRING(message_id);
 903         k = n;
 904
 905         va_start(ap, format);
 906         r = log_format_iovec(iovec, m, &n, false, 0, format, ap);
 907         /* Error handling below */
 908         va_end(ap);
 909
 910         if (r >= 0)
 911                 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
 912
 913         while (k < n)
 914                 free(iovec[k++].iov_base);
 915
 916         if (r < 0) {
 917                 /* We failed to format the message. Emit a warning instead. */
 918                 char buf[LINE_MAX];
 919
 920                 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
 921
 922                 n = 3;
 923                 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
 924                 iovec[n++] = IOVEC_MAKE_STRING(buf);
 925                 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
 926         }
 927 }
 928
 929 void server_dispatch_message(
 930                 Server *s,
 931                 struct iovec *iovec, size_t n, size_t m,
 932                 ClientContext *c,
 933                 const struct timeval *tv,
 934                 int priority,
 935                 pid_t object_pid) {
 936
 937         uint64_t available = 0;
 938         int rl;
 939
 940         assert(s);
 941         assert(iovec || n == 0);
 942
 943         if (n == 0)
 944                 return;
 945
 946         if (LOG_PRI(priority) > s->max_level_store)
 947                 return;
 948
 949         /* Stop early in case the information will not be stored
 950          * in a journal. */
 951         if (s->storage == STORAGE_NONE)
 952                 return;
 953
 954         if (c && c->unit) {
 955                 (void) determine_space(s, &available, NULL);
 956
 957                 rl = journal_rate_limit_test(s->rate_limit, c->unit, priority & LOG_PRIMASK, available);
 958                 if (rl == 0)
 959                         return;
 960
 961                 /* Write a suppression message if we suppressed something */
 962                 if (rl > 1)
 963                         server_driver_message(s, c->pid,
 964                                               "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
 965                                               LOG_MESSAGE("Suppressed %i messages from %s", rl - 1, c->unit),
 966                                               "N_DROPPED=%i", rl - 1,
 967                                               NULL);
 968         }
 969
 970         dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
 971 }
 972
 973 int server_flush_to_var(Server *s, bool require_flag_file) {
 974         sd_id128_t machine;
 975         sd_journal *j = NULL;
 976         char ts[FORMAT_TIMESPAN_MAX];
 977         usec_t start;
 978         unsigned n = 0;
 979         int r;
 980
 981         assert(s);
 982
 983         if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
 984                 return 0;
 985
 986         if (!s->runtime_journal)
 987                 return 0;
 988
 989         if (require_flag_file && !flushed_flag_is_set())
 990                 return 0;
 991
 992         (void) system_journal_open(s, true);
 993
 994         if (!s->system_journal)
 995                 return 0;
 996
 997         log_debug("Flushing to /var...");
 998
 999         start = now(CLOCK_MONOTONIC);
1000
1001         r = sd_id128_get_machine(&machine);
1002         if (r < 0)
1003                 return r;
1004
1005         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1006         if (r < 0)
1007                 return log_error_errno(r, "Failed to read runtime journal: %m");
1008
1009         sd_journal_set_data_threshold(j, 0);
1010
1011         SD_JOURNAL_FOREACH(j) {
1012                 Object *o = NULL;
1013                 JournalFile *f;
1014
1015                 f = j->current_file;
1016                 assert(f && f->current_offset > 0);
1017
1018                 n++;
1019
1020                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1021                 if (r < 0) {
1022                         log_error_errno(r, "Can't read entry: %m");
1023                         goto finish;
1024                 }
1025
1026                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1027                 if (r >= 0)
1028                         continue;
1029
1030                 if (!shall_try_append_again(s->system_journal, r)) {
1031                         log_error_errno(r, "Can't write entry: %m");
1032                         goto finish;
1033                 }
1034
1035                 server_rotate(s);
1036                 server_vacuum(s, false);
1037
1038                 if (!s->system_journal) {
1039                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1040                         r = -EIO;
1041                         goto finish;
1042                 }
1043
1044                 log_debug("Retrying write.");
1045                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1046                 if (r < 0) {
1047                         log_error_errno(r, "Can't write entry: %m");
1048                         goto finish;
1049                 }
1050         }
1051
1052         r = 0;
1053
1054 finish:
1055         journal_file_post_change(s->system_journal);
1056
1057         s->runtime_journal = journal_file_close(s->runtime_journal);
1058
1059         if (r >= 0)
1060                 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1061
1062         sd_journal_close(j);
1063
1064         server_driver_message(s, 0, NULL,
1065                               LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1066                                           format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1067                                           n),
1068                               NULL);
1069
1070         return r;
1071 }
1072
1073 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1074         Server *s = userdata;
1075         struct ucred *ucred = NULL;
1076         struct timeval *tv = NULL;
1077         struct cmsghdr *cmsg;
1078         char *label = NULL;
1079         size_t label_len = 0, m;
1080         struct iovec iovec;
1081         ssize_t n;
1082         int *fds = NULL, v = 0;
1083         unsigned n_fds = 0;
1084
1085         union {
1086                 struct cmsghdr cmsghdr;
1087
1088                 /* We use NAME_MAX space for the SELinux label
1089                  * here. The kernel currently enforces no
1090                  * limit, but according to suggestions from
1091                  * the SELinux people this will change and it
1092                  * will probably be identical to NAME_MAX. For
1093                  * now we use that, but this should be updated
1094                  * one day when the final limit is known. */
1095                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1096                             CMSG_SPACE(sizeof(struct timeval)) +
1097                             CMSG_SPACE(sizeof(int)) + /* fd */
1098                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
1099         } control = {};
1100
1101         union sockaddr_union sa = {};
1102
1103         struct msghdr msghdr = {
1104                 .msg_iov = &iovec,
1105                 .msg_iovlen = 1,
1106                 .msg_control = &control,
1107                 .msg_controllen = sizeof(control),
1108                 .msg_name = &sa,
1109                 .msg_namelen = sizeof(sa),
1110         };
1111
1112         assert(s);
1113         assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1114
1115         if (revents != EPOLLIN) {
1116                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1117                 return -EIO;
1118         }
1119
1120         /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1121          * it.) */
1122         (void) ioctl(fd, SIOCINQ, &v);
1123
1124         /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1125         m = PAGE_ALIGN(MAX3((size_t) v + 1,
1126                             (size_t) LINE_MAX,
1127                             ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1128
1129         if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1130                 return log_oom();
1131
1132         iovec.iov_base = s->buffer;
1133         iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1134
1135         n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1136         if (n < 0) {
1137                 if (IN_SET(errno, EINTR, EAGAIN))
1138                         return 0;
1139
1140                 return log_error_errno(errno, "recvmsg() failed: %m");
1141         }
1142
1143         CMSG_FOREACH(cmsg, &msghdr) {
1144
1145                 if (cmsg->cmsg_level == SOL_SOCKET &&
1146                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1147                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1148                         ucred = (struct ucred*) CMSG_DATA(cmsg);
1149                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1150                          cmsg->cmsg_type == SCM_SECURITY) {
1151                         label = (char*) CMSG_DATA(cmsg);
1152                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
1153                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1154                            cmsg->cmsg_type == SO_TIMESTAMP &&
1155                            cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1156                         tv = (struct timeval*) CMSG_DATA(cmsg);
1157                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1158                          cmsg->cmsg_type == SCM_RIGHTS) {
1159                         fds = (int*) CMSG_DATA(cmsg);
1160                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1161                 }
1162         }
1163
1164         /* And a trailing NUL, just in case */
1165         s->buffer[n] = 0;
1166
1167         if (fd == s->syslog_fd) {
1168                 if (n > 0 && n_fds == 0)
1169                         server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1170                 else if (n_fds > 0)
1171                         log_warning("Got file descriptors via syslog socket. Ignoring.");
1172
1173         } else if (fd == s->native_fd) {
1174                 if (n > 0 && n_fds == 0)
1175                         server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1176                 else if (n == 0 && n_fds == 1)
1177                         server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1178                 else if (n_fds > 0)
1179                         log_warning("Got too many file descriptors via native socket. Ignoring.");
1180
1181         } else {
1182                 assert(fd == s->audit_fd);
1183
1184                 if (n > 0 && n_fds == 0)
1185                         server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1186                 else if (n_fds > 0)
1187                         log_warning("Got file descriptors via audit socket. Ignoring.");
1188         }
1189
1190         close_many(fds, n_fds);
1191         return 0;
1192 }
1193
1194 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1195         Server *s = userdata;
1196         int r;
1197
1198         assert(s);
1199
1200         log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1201
1202         (void) server_flush_to_var(s, false);
1203         server_sync(s);
1204         server_vacuum(s, false);
1205
1206         r = touch("/run/systemd/journal/flushed");
1207         if (r < 0)
1208                 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1209
1210         server_space_usage_message(s, NULL);
1211         return 0;
1212 }
1213
1214 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1215         Server *s = userdata;
1216         int r;
1217
1218         assert(s);
1219
1220         log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1221         server_rotate(s);
1222         server_vacuum(s, true);
1223
1224         if (s->system_journal)
1225                 patch_min_use(&s->system_storage);
1226         if (s->runtime_journal)
1227                 patch_min_use(&s->runtime_storage);
1228
1229         /* Let clients know when the most recent rotation happened. */
1230         r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1231         if (r < 0)
1232                 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1233
1234         return 0;
1235 }
1236
1237 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1238         Server *s = userdata;
1239
1240         assert(s);
1241
1242         log_received_signal(LOG_INFO, si);
1243
1244         sd_event_exit(s->event, 0);
1245         return 0;
1246 }
1247
1248 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1249         Server *s = userdata;
1250         int r;
1251
1252         assert(s);
1253
1254         log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1255
1256         server_sync(s);
1257
1258         /* Let clients know when the most recent sync happened. */
1259         r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1260         if (r < 0)
1261                 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1262
1263         return 0;
1264 }
1265
1266 static int setup_signals(Server *s) {
1267         int r;
1268
1269         assert(s);
1270
1271         assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1272
1273         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1274         if (r < 0)
1275                 return r;
1276
1277         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1278         if (r < 0)
1279                 return r;
1280
1281         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1282         if (r < 0)
1283                 return r;
1284
1285         /* Let's process SIGTERM late, so that we flush all queued
1286          * messages to disk before we exit */
1287         r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1288         if (r < 0)
1289                 return r;
1290
1291         /* When journald is invoked on the terminal (when debugging),
1292          * it's useful if C-c is handled equivalent to SIGTERM. */
1293         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1294         if (r < 0)
1295                 return r;
1296
1297         r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1298         if (r < 0)
1299                 return r;
1300
1301         /* SIGRTMIN+1 causes an immediate sync. We process this very
1302          * late, so that everything else queued at this point is
1303          * really written to disk. Clients can watch
1304          * /run/systemd/journal/synced with inotify until its mtime
1305          * changes to see when a sync happened. */
1306         r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1307         if (r < 0)
1308                 return r;
1309
1310         r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1311         if (r < 0)
1312                 return r;
1313
1314         return 0;
1315 }
1316
1317 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1318         Server *s = data;
1319         int r;
1320
1321         assert(s);
1322
1323         if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1324
1325                 r = value ? parse_boolean(value) : true;
1326                 if (r < 0)
1327                         log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1328                 else
1329                         s->forward_to_syslog = r;
1330
1331         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1332
1333                 r = value ? parse_boolean(value) : true;
1334                 if (r < 0)
1335                         log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1336                 else
1337                         s->forward_to_kmsg = r;
1338
1339         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1340
1341                 r = value ? parse_boolean(value) : true;
1342                 if (r < 0)
1343                         log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1344                 else
1345                         s->forward_to_console = r;
1346
1347         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1348
1349                 r = value ? parse_boolean(value) : true;
1350                 if (r < 0)
1351                         log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1352                 else
1353                         s->forward_to_wall = r;
1354
1355         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1356
1357                 if (proc_cmdline_value_missing(key, value))
1358                         return 0;
1359
1360                 r = log_level_from_string(value);
1361                 if (r < 0)
1362                         log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1363                 else
1364                         s->max_level_console = r;
1365
1366         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1367
1368                 if (proc_cmdline_value_missing(key, value))
1369                         return 0;
1370
1371                 r = log_level_from_string(value);
1372                 if (r < 0)
1373                         log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1374                 else
1375                         s->max_level_store = r;
1376
1377         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1378
1379                 if (proc_cmdline_value_missing(key, value))
1380                         return 0;
1381
1382                 r = log_level_from_string(value);
1383                 if (r < 0)
1384                         log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1385                 else
1386                         s->max_level_syslog = r;
1387
1388         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1389
1390                 if (proc_cmdline_value_missing(key, value))
1391                         return 0;
1392
1393                 r = log_level_from_string(value);
1394                 if (r < 0)
1395                         log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1396                 else
1397                         s->max_level_kmsg = r;
1398
1399         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1400
1401                 if (proc_cmdline_value_missing(key, value))
1402                         return 0;
1403
1404                 r = log_level_from_string(value);
1405                 if (r < 0)
1406                         log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1407                 else
1408                         s->max_level_wall = r;
1409
1410         } else if (startswith(key, "systemd.journald"))
1411                 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
1412
1413         /* do not warn about state here, since probably systemd already did */
1414         return 0;
1415 }
1416
1417 static int server_parse_config_file(Server *s) {
1418         assert(s);
1419
1420         return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
1421                                         CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1422                                         "Journal\0",
1423                                         config_item_perf_lookup, journald_gperf_lookup,
1424                                         CONFIG_PARSE_WARN, s);
1425 }
1426
1427 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1428         Server *s = userdata;
1429
1430         assert(s);
1431
1432         server_sync(s);
1433         return 0;
1434 }
1435
1436 int server_schedule_sync(Server *s, int priority) {
1437         int r;
1438
1439         assert(s);
1440
1441         if (priority <= LOG_CRIT) {
1442                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1443                 server_sync(s);
1444                 return 0;
1445         }
1446
1447         if (s->sync_scheduled)
1448                 return 0;
1449
1450         if (s->sync_interval_usec > 0) {
1451                 usec_t when;
1452
1453                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1454                 if (r < 0)
1455                         return r;
1456
1457                 when += s->sync_interval_usec;
1458
1459                 if (!s->sync_event_source) {
1460                         r = sd_event_add_time(
1461                                         s->event,
1462                                         &s->sync_event_source,
1463                                         CLOCK_MONOTONIC,
1464                                         when, 0,
1465                                         server_dispatch_sync, s);
1466                         if (r < 0)
1467                                 return r;
1468
1469                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1470                 } else {
1471                         r = sd_event_source_set_time(s->sync_event_source, when);
1472                         if (r < 0)
1473                                 return r;
1474
1475                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1476                 }
1477                 if (r < 0)
1478                         return r;
1479
1480                 s->sync_scheduled = true;
1481         }
1482
1483         return 0;
1484 }
1485
1486 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1487         Server *s = userdata;
1488
1489         assert(s);
1490
1491         server_cache_hostname(s);
1492         return 0;
1493 }
1494
1495 static int server_open_hostname(Server *s) {
1496         int r;
1497
1498         assert(s);
1499
1500         s->hostname_fd = open("/proc/sys/kernel/hostname",
1501                               O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
1502         if (s->hostname_fd < 0)
1503                 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1504
1505         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1506         if (r < 0) {
1507                 /* kernels prior to 3.2 don't support polling this file. Ignore
1508                  * the failure. */
1509                 if (r == -EPERM) {
1510                         log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1511                         s->hostname_fd = safe_close(s->hostname_fd);
1512                         return 0;
1513                 }
1514
1515                 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1516         }
1517
1518         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1519         if (r < 0)
1520                 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1521
1522         return 0;
1523 }
1524
1525 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1526         Server *s = userdata;
1527         int r;
1528
1529         assert(s);
1530         assert(s->notify_event_source == es);
1531         assert(s->notify_fd == fd);
1532
1533         /* The $NOTIFY_SOCKET is writable again, now send exactly one
1534          * message on it. Either it's the watchdog event, the initial
1535          * READY=1 event or an stdout stream event. If there's nothing
1536          * to write anymore, turn our event source off. The next time
1537          * there's something to send it will be turned on again. */
1538
1539         if (!s->sent_notify_ready) {
1540                 static const char p[] =
1541                         "READY=1\n"
1542                         "STATUS=Processing requests...";
1543                 ssize_t l;
1544
1545                 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1546                 if (l < 0) {
1547                         if (errno == EAGAIN)
1548                                 return 0;
1549
1550                         return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1551                 }
1552
1553                 s->sent_notify_ready = true;
1554                 log_debug("Sent READY=1 notification.");
1555
1556         } else if (s->send_watchdog) {
1557
1558                 static const char p[] =
1559                         "WATCHDOG=1";
1560
1561                 ssize_t l;
1562
1563                 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1564                 if (l < 0) {
1565                         if (errno == EAGAIN)
1566                                 return 0;
1567
1568                         return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1569                 }
1570
1571                 s->send_watchdog = false;
1572                 log_debug("Sent WATCHDOG=1 notification.");
1573
1574         } else if (s->stdout_streams_notify_queue)
1575                 /* Dispatch one stream notification event */
1576                 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1577
1578         /* Leave us enabled if there's still more to do. */
1579         if (s->send_watchdog || s->stdout_streams_notify_queue)
1580                 return 0;
1581
1582         /* There was nothing to do anymore, let's turn ourselves off. */
1583         r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1584         if (r < 0)
1585                 return log_error_errno(r, "Failed to turn off notify event source: %m");
1586
1587         return 0;
1588 }
1589
1590 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1591         Server *s = userdata;
1592         int r;
1593
1594         assert(s);
1595
1596         s->send_watchdog = true;
1597
1598         r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1599         if (r < 0)
1600                 log_warning_errno(r, "Failed to turn on notify event source: %m");
1601
1602         r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1603         if (r < 0)
1604                 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1605
1606         r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1607         if (r < 0)
1608                 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1609
1610         return 0;
1611 }
1612
1613 static int server_connect_notify(Server *s) {
1614         union sockaddr_union sa = {
1615                 .un.sun_family = AF_UNIX,
1616         };
1617         const char *e;
1618         int r;
1619
1620         assert(s);
1621         assert(s->notify_fd < 0);
1622         assert(!s->notify_event_source);
1623
1624         /*
1625           So here's the problem: we'd like to send notification
1626           messages to PID 1, but we cannot do that via sd_notify(),
1627           since that's synchronous, and we might end up blocking on
1628           it. Specifically: given that PID 1 might block on
1629           dbus-daemon during IPC, and dbus-daemon is logging to us,
1630           and might hence block on us, we might end up in a deadlock
1631           if we block on sending PID 1 notification messages — by
1632           generating a full blocking circle. To avoid this, let's
1633           create a non-blocking socket, and connect it to the
1634           notification socket, and then wait for POLLOUT before we
1635           send anything. This should efficiently avoid any deadlocks,
1636           as we'll never block on PID 1, hence PID 1 can safely block
1637           on dbus-daemon which can safely block on us again.
1638
1639           Don't think that this issue is real? It is, see:
1640           https://github.com/systemd/systemd/issues/1505
1641         */
1642
1643         e = getenv("NOTIFY_SOCKET");
1644         if (!e)
1645                 return 0;
1646
1647         if (!IN_SET(e[0], '@', '/') || e[1] == 0) {
1648                 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1649                 return -EINVAL;
1650         }
1651
1652         if (strlen(e) > sizeof(sa.un.sun_path)) {
1653                 log_error("NOTIFY_SOCKET path too long: %s", e);
1654                 return -EINVAL;
1655         }
1656
1657         s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1658         if (s->notify_fd < 0)
1659                 return log_error_errno(errno, "Failed to create notify socket: %m");
1660
1661         (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1662
1663         strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1664         if (sa.un.sun_path[0] == '@')
1665                 sa.un.sun_path[0] = 0;
1666
1667         r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
1668         if (r < 0)
1669                 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1670
1671         r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1672         if (r < 0)
1673                 return log_error_errno(r, "Failed to watch notification socket: %m");
1674
1675         if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1676                 s->send_watchdog = true;
1677
1678                 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1679                 if (r < 0)
1680                         return log_error_errno(r, "Failed to add watchdog time event: %m");
1681         }
1682
1683         /* This should fire pretty soon, which we'll use to send the
1684          * READY=1 event. */
1685
1686         return 0;
1687 }
1688
1689 int server_init(Server *s) {
1690         _cleanup_fdset_free_ FDSet *fds = NULL;
1691         int n, r, fd;
1692         bool no_sockets;
1693
1694         assert(s);
1695
1696         zero(*s);
1697         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1698         s->compress = true;
1699         s->seal = true;
1700         s->read_kmsg = true;
1701
1702         s->watchdog_usec = USEC_INFINITY;
1703
1704         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1705         s->sync_scheduled = false;
1706
1707         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1708         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1709
1710         s->forward_to_wall = true;
1711
1712         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1713
1714         s->max_level_store = LOG_DEBUG;
1715         s->max_level_syslog = LOG_DEBUG;
1716         s->max_level_kmsg = LOG_NOTICE;
1717         s->max_level_console = LOG_INFO;
1718         s->max_level_wall = LOG_EMERG;
1719
1720         s->line_max = DEFAULT_LINE_MAX;
1721
1722         journal_reset_metrics(&s->system_storage.metrics);
1723         journal_reset_metrics(&s->runtime_storage.metrics);
1724
1725         server_parse_config_file(s);
1726
1727         r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1728         if (r < 0)
1729                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
1730
1731         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1732                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1733                           s->rate_limit_interval, s->rate_limit_burst);
1734                 s->rate_limit_interval = s->rate_limit_burst = 0;
1735         }
1736
1737         (void) mkdir_p("/run/systemd/journal", 0755);
1738
1739         s->user_journals = ordered_hashmap_new(NULL);
1740         if (!s->user_journals)
1741                 return log_oom();
1742
1743         s->mmap = mmap_cache_new();
1744         if (!s->mmap)
1745                 return log_oom();
1746
1747         s->deferred_closes = set_new(NULL);
1748         if (!s->deferred_closes)
1749                 return log_oom();
1750
1751         r = sd_event_default(&s->event);
1752         if (r < 0)
1753                 return log_error_errno(r, "Failed to create event loop: %m");
1754
1755         n = sd_listen_fds(true);
1756         if (n < 0)
1757                 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1758
1759         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1760
1761                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1762
1763                         if (s->native_fd >= 0) {
1764                                 log_error("Too many native sockets passed.");
1765                                 return -EINVAL;
1766                         }
1767
1768                         s->native_fd = fd;
1769
1770                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1771
1772                         if (s->stdout_fd >= 0) {
1773                                 log_error("Too many stdout sockets passed.");
1774                                 return -EINVAL;
1775                         }
1776
1777                         s->stdout_fd = fd;
1778
1779                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1780                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1781
1782                         if (s->syslog_fd >= 0) {
1783                                 log_error("Too many /dev/log sockets passed.");
1784                                 return -EINVAL;
1785                         }
1786
1787                         s->syslog_fd = fd;
1788
1789                 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1790
1791                         if (s->audit_fd >= 0) {
1792                                 log_error("Too many audit sockets passed.");
1793                                 return -EINVAL;
1794                         }
1795
1796                         s->audit_fd = fd;
1797
1798                 } else {
1799
1800                         if (!fds) {
1801                                 fds = fdset_new();
1802                                 if (!fds)
1803                                         return log_oom();
1804                         }
1805
1806                         r = fdset_put(fds, fd);
1807                         if (r < 0)
1808                                 return log_oom();
1809                 }
1810         }
1811
1812         /* Try to restore streams, but don't bother if this fails */
1813         (void) server_restore_streams(s, fds);
1814
1815         if (fdset_size(fds) > 0) {
1816                 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1817                 fds = fdset_free(fds);
1818         }
1819
1820         no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1821
1822         /* always open stdout, syslog, native, and kmsg sockets */
1823
1824         /* systemd-journald.socket: /run/systemd/journal/stdout */
1825         r = server_open_stdout_socket(s);
1826         if (r < 0)
1827                 return r;
1828
1829         /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1830         r = server_open_syslog_socket(s);
1831         if (r < 0)
1832                 return r;
1833
1834         /* systemd-journald.socket: /run/systemd/journal/socket */
1835         r = server_open_native_socket(s);
1836         if (r < 0)
1837                 return r;
1838
1839         /* /dev/kmsg */
1840         r = server_open_dev_kmsg(s);
1841         if (r < 0)
1842                 return r;
1843
1844         /* Unless we got *some* sockets and not audit, open audit socket */
1845         if (s->audit_fd >= 0 || no_sockets) {
1846                 r = server_open_audit(s);
1847                 if (r < 0)
1848                         return r;
1849         }
1850
1851         r = server_open_kernel_seqnum(s);
1852         if (r < 0)
1853                 return r;
1854
1855         r = server_open_hostname(s);
1856         if (r < 0)
1857                 return r;
1858
1859         r = setup_signals(s);
1860         if (r < 0)
1861                 return r;
1862
1863         s->udev = udev_new();
1864         if (!s->udev)
1865                 return -ENOMEM;
1866
1867         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1868         if (!s->rate_limit)
1869                 return -ENOMEM;
1870
1871         r = cg_get_root_path(&s->cgroup_root);
1872         if (r < 0)
1873                 return r;
1874
1875         server_cache_hostname(s);
1876         server_cache_boot_id(s);
1877         server_cache_machine_id(s);
1878
1879         s->runtime_storage.name = "Runtime journal";
1880         s->system_storage.name = "System journal";
1881
1882         s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
1883         s->system_storage.path  = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
1884         if (!s->runtime_storage.path || !s->system_storage.path)
1885                 return -ENOMEM;
1886
1887         (void) server_connect_notify(s);
1888
1889         (void) client_context_acquire_default(s);
1890
1891         return system_journal_open(s, false);
1892 }
1893
1894 void server_maybe_append_tags(Server *s) {
1895 #if HAVE_GCRYPT
1896         JournalFile *f;
1897         Iterator i;
1898         usec_t n;
1899
1900         n = now(CLOCK_REALTIME);
1901
1902         if (s->system_journal)
1903                 journal_file_maybe_append_tag(s->system_journal, n);
1904
1905         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1906                 journal_file_maybe_append_tag(f, n);
1907 #endif
1908 }
1909
1910 void server_done(Server *s) {
1911         assert(s);
1912
1913         set_free_with_destructor(s->deferred_closes, journal_file_close);
1914
1915         while (s->stdout_streams)
1916                 stdout_stream_free(s->stdout_streams);
1917
1918         client_context_flush_all(s);
1919
1920         if (s->system_journal)
1921                 (void) journal_file_close(s->system_journal);
1922
1923         if (s->runtime_journal)
1924                 (void) journal_file_close(s->runtime_journal);
1925
1926         ordered_hashmap_free_with_destructor(s->user_journals, journal_file_close);
1927
1928         sd_event_source_unref(s->syslog_event_source);
1929         sd_event_source_unref(s->native_event_source);
1930         sd_event_source_unref(s->stdout_event_source);
1931         sd_event_source_unref(s->dev_kmsg_event_source);
1932         sd_event_source_unref(s->audit_event_source);
1933         sd_event_source_unref(s->sync_event_source);
1934         sd_event_source_unref(s->sigusr1_event_source);
1935         sd_event_source_unref(s->sigusr2_event_source);
1936         sd_event_source_unref(s->sigterm_event_source);
1937         sd_event_source_unref(s->sigint_event_source);
1938         sd_event_source_unref(s->sigrtmin1_event_source);
1939         sd_event_source_unref(s->hostname_event_source);
1940         sd_event_source_unref(s->notify_event_source);
1941         sd_event_source_unref(s->watchdog_event_source);
1942         sd_event_unref(s->event);
1943
1944         safe_close(s->syslog_fd);
1945         safe_close(s->native_fd);
1946         safe_close(s->stdout_fd);
1947         safe_close(s->dev_kmsg_fd);
1948         safe_close(s->audit_fd);
1949         safe_close(s->hostname_fd);
1950         safe_close(s->notify_fd);
1951
1952         if (s->rate_limit)
1953                 journal_rate_limit_free(s->rate_limit);
1954
1955         if (s->kernel_seqnum)
1956                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1957
1958         free(s->buffer);
1959         free(s->tty_path);
1960         free(s->cgroup_root);
1961         free(s->hostname_field);
1962         free(s->runtime_storage.path);
1963         free(s->system_storage.path);
1964
1965         if (s->mmap)
1966                 mmap_cache_unref(s->mmap);
1967
1968         udev_unref(s->udev);
1969 }
1970
1971 static const char* const storage_table[_STORAGE_MAX] = {
1972         [STORAGE_AUTO] = "auto",
1973         [STORAGE_VOLATILE] = "volatile",
1974         [STORAGE_PERSISTENT] = "persistent",
1975         [STORAGE_NONE] = "none"
1976 };
1977
1978 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1979 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1980
1981 static const char* const split_mode_table[_SPLIT_MAX] = {
1982         [SPLIT_LOGIN] = "login",
1983         [SPLIT_UID] = "uid",
1984         [SPLIT_NONE] = "none",
1985 };
1986
1987 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1988 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
1989
1990 int config_parse_line_max(
1991                 const char* unit,
1992                 const char *filename,
1993                 unsigned line,
1994                 const char *section,
1995                 unsigned section_line,
1996                 const char *lvalue,
1997                 int ltype,
1998                 const char *rvalue,
1999                 void *data,
2000                 void *userdata) {
2001
2002         size_t *sz = data;
2003         int r;
2004
2005         assert(filename);
2006         assert(lvalue);
2007         assert(rvalue);
2008         assert(data);
2009
2010         if (isempty(rvalue))
2011                 /* Empty assignment means default */
2012                 *sz = DEFAULT_LINE_MAX;
2013         else {
2014                 uint64_t v;
2015
2016                 r = parse_size(rvalue, 1024, &v);
2017                 if (r < 0) {
2018                         log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
2019                         return 0;
2020                 }
2021
2022                 if (v < 79) {
2023                         /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2024                          * terminal size is 80ch, and it might make sense to break one character before the natural
2025                          * line break would occur on that. */
2026                         log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
2027                         *sz = 79;
2028                 } else if (v > (uint64_t) (SSIZE_MAX-1)) {
2029                         /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2030                          * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2031                          * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2032                          * fail much earlier anyway. */
2033                         log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
2034                         *sz = SSIZE_MAX-1;
2035                 } else
2036                         *sz = (size_t) v;
2037         }
2038
2039         return 0;
2040 }