src/journal/journald-server.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2011 Lennart Poettering
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #if HAVE_SELINUX
  22 #include <selinux/selinux.h>
  23 #endif
  24 #include <sys/ioctl.h>
  25 #include <sys/mman.h>
  26 #include <sys/signalfd.h>
  27 #include <sys/statvfs.h>
  28 #include <linux/sockios.h>
  29
  30 #include "libudev.h"
  31 #include "sd-daemon.h"
  32 #include "sd-journal.h"
  33 #include "sd-messages.h"
  34
  35 #include "acl-util.h"
  36 #include "alloc-util.h"
  37 #include "audit-util.h"
  38 #include "cgroup-util.h"
  39 #include "conf-parser.h"
  40 #include "dirent-util.h"
  41 #include "extract-word.h"
  42 #include "fd-util.h"
  43 #include "fileio.h"
  44 #include "format-util.h"
  45 #include "fs-util.h"
  46 #include "hashmap.h"
  47 #include "hostname-util.h"
  48 #include "id128-util.h"
  49 #include "io-util.h"
  50 #include "journal-authenticate.h"
  51 #include "journal-file.h"
  52 #include "journal-internal.h"
  53 #include "journal-vacuum.h"
  54 #include "journald-audit.h"
  55 #include "journald-context.h"
  56 #include "journald-kmsg.h"
  57 #include "journald-native.h"
  58 #include "journald-rate-limit.h"
  59 #include "journald-server.h"
  60 #include "journald-stream.h"
  61 #include "journald-syslog.h"
  62 #include "log.h"
  63 #include "missing.h"
  64 #include "mkdir.h"
  65 #include "parse-util.h"
  66 #include "proc-cmdline.h"
  67 #include "process-util.h"
  68 #include "rm-rf.h"
  69 #include "selinux-util.h"
  70 #include "signal-util.h"
  71 #include "socket-util.h"
  72 #include "stdio-util.h"
  73 #include "string-table.h"
  74 #include "string-util.h"
  75 #include "syslog-util.h"
  76 #include "user-util.h"
  77
  78 #define USER_JOURNALS_MAX 1024
  79
  80 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
  81 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
  82 #define DEFAULT_RATE_LIMIT_BURST 1000
  83 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
  84
  85 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
  86
  87 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
  88
  89 /* The period to insert between posting changes for coalescing */
  90 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
  91
  92 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
  93  * for a bit of additional metadata. */
  94 #define DEFAULT_LINE_MAX (48*1024)
  95
  96 static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
  97         _cleanup_closedir_ DIR *d = NULL;
  98         struct dirent *de;
  99         struct statvfs ss;
 100
 101         assert(ret_used);
 102         assert(ret_free);
 103
 104         d = opendir(path);
 105         if (!d)
 106                 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
 107                                       errno, "Failed to open %s: %m", path);
 108
 109         if (fstatvfs(dirfd(d), &ss) < 0)
 110                 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
 111
 112         *ret_free = ss.f_bsize * ss.f_bavail;
 113         *ret_used = 0;
 114         FOREACH_DIRENT_ALL(de, d, break) {
 115                 struct stat st;
 116
 117                 if (!endswith(de->d_name, ".journal") &&
 118                     !endswith(de->d_name, ".journal~"))
 119                         continue;
 120
 121                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
 122                         log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
 123                         continue;
 124                 }
 125
 126                 if (!S_ISREG(st.st_mode))
 127                         continue;
 128
 129                 *ret_used += (uint64_t) st.st_blocks * 512UL;
 130         }
 131
 132         return 0;
 133 }
 134
 135 static void cache_space_invalidate(JournalStorageSpace *space) {
 136         zero(*space);
 137 }
 138
 139 static int cache_space_refresh(Server *s, JournalStorage *storage) {
 140         JournalStorageSpace *space;
 141         JournalMetrics *metrics;
 142         uint64_t vfs_used, vfs_avail, avail;
 143         usec_t ts;
 144         int r;
 145
 146         assert(s);
 147
 148         metrics = &storage->metrics;
 149         space = &storage->space;
 150
 151         ts = now(CLOCK_MONOTONIC);
 152
 153         if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
 154                 return 0;
 155
 156         r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
 157         if (r < 0)
 158                 return r;
 159
 160         space->vfs_used = vfs_used;
 161         space->vfs_available = vfs_avail;
 162
 163         avail = LESS_BY(vfs_avail, metrics->keep_free);
 164
 165         space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
 166         space->available = LESS_BY(space->limit, vfs_used);
 167         space->timestamp = ts;
 168         return 1;
 169 }
 170
 171 static void patch_min_use(JournalStorage *storage) {
 172         assert(storage);
 173
 174         /* Let's bump the min_use limit to the current usage on disk. We do
 175          * this when starting up and first opening the journal files. This way
 176          * sudden spikes in disk usage will not cause journald to vacuum files
 177          * without bounds. Note that this means that only a restart of journald
 178          * will make it reset this value. */
 179
 180         storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
 181 }
 182
 183
 184 static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
 185         JournalStorage *js;
 186         int r;
 187
 188         assert(s);
 189
 190         js = s->system_journal ? &s->system_storage : &s->runtime_storage;
 191
 192         r = cache_space_refresh(s, js);
 193         if (r >= 0) {
 194                 if (available)
 195                         *available = js->space.available;
 196                 if (limit)
 197                         *limit = js->space.limit;
 198         }
 199         return r;
 200 }
 201
 202 void server_space_usage_message(Server *s, JournalStorage *storage) {
 203         char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
 204              fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
 205         JournalMetrics *metrics;
 206
 207         assert(s);
 208
 209         if (!storage)
 210                 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
 211
 212         if (cache_space_refresh(s, storage) < 0)
 213                 return;
 214
 215         metrics = &storage->metrics;
 216         format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
 217         format_bytes(fb2, sizeof(fb2), metrics->max_use);
 218         format_bytes(fb3, sizeof(fb3), metrics->keep_free);
 219         format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
 220         format_bytes(fb5, sizeof(fb5), storage->space.limit);
 221         format_bytes(fb6, sizeof(fb6), storage->space.available);
 222
 223         server_driver_message(s, 0,
 224                               "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
 225                               LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
 226                                           storage->name, storage->path, fb1, fb5, fb6),
 227                               "JOURNAL_NAME=%s", storage->name,
 228                               "JOURNAL_PATH=%s", storage->path,
 229                               "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
 230                               "CURRENT_USE_PRETTY=%s", fb1,
 231                               "MAX_USE=%"PRIu64, metrics->max_use,
 232                               "MAX_USE_PRETTY=%s", fb2,
 233                               "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
 234                               "DISK_KEEP_FREE_PRETTY=%s", fb3,
 235                               "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
 236                               "DISK_AVAILABLE_PRETTY=%s", fb4,
 237                               "LIMIT=%"PRIu64, storage->space.limit,
 238                               "LIMIT_PRETTY=%s", fb5,
 239                               "AVAILABLE=%"PRIu64, storage->space.available,
 240                               "AVAILABLE_PRETTY=%s", fb6,
 241                               NULL);
 242 }
 243
 244 static bool uid_for_system_journal(uid_t uid) {
 245
 246         /* Returns true if the specified UID shall get its data stored in the system journal*/
 247
 248         return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY;
 249 }
 250
 251 static void server_add_acls(JournalFile *f, uid_t uid) {
 252 #if HAVE_ACL
 253         int r;
 254 #endif
 255         assert(f);
 256
 257 #if HAVE_ACL
 258         if (uid_for_system_journal(uid))
 259                 return;
 260
 261         r = add_acls_for_user(f->fd, uid);
 262         if (r < 0)
 263                 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
 264 #endif
 265 }
 266
 267 static int open_journal(
 268                 Server *s,
 269                 bool reliably,
 270                 const char *fname,
 271                 int flags,
 272                 bool seal,
 273                 JournalMetrics *metrics,
 274                 JournalFile **ret) {
 275         int r;
 276         JournalFile *f;
 277
 278         assert(s);
 279         assert(fname);
 280         assert(ret);
 281
 282         if (reliably)
 283                 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
 284         else
 285                 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
 286         if (r < 0)
 287                 return r;
 288
 289         r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
 290         if (r < 0) {
 291                 (void) journal_file_close(f);
 292                 return r;
 293         }
 294
 295         *ret = f;
 296         return r;
 297 }
 298
 299 static bool flushed_flag_is_set(void) {
 300         return access("/run/systemd/journal/flushed", F_OK) >= 0;
 301 }
 302
 303 static int system_journal_open(Server *s, bool flush_requested) {
 304         const char *fn;
 305         int r = 0;
 306
 307         if (!s->system_journal &&
 308             IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
 309             (flush_requested || flushed_flag_is_set())) {
 310
 311                 /* If in auto mode: first try to create the machine
 312                  * path, but not the prefix.
 313                  *
 314                  * If in persistent mode: create /var/log/journal and
 315                  * the machine path */
 316
 317                 if (s->storage == STORAGE_PERSISTENT)
 318                         (void) mkdir_p("/var/log/journal/", 0755);
 319
 320                 (void) mkdir(s->system_storage.path, 0755);
 321
 322                 fn = strjoina(s->system_storage.path, "/system.journal");
 323                 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
 324                 if (r >= 0) {
 325                         server_add_acls(s->system_journal, 0);
 326                         (void) cache_space_refresh(s, &s->system_storage);
 327                         patch_min_use(&s->system_storage);
 328                 } else if (r < 0) {
 329                         if (!IN_SET(r, -ENOENT, -EROFS))
 330                                 log_warning_errno(r, "Failed to open system journal: %m");
 331
 332                         r = 0;
 333                 }
 334
 335                 /* If the runtime journal is open, and we're post-flush, we're
 336                  * recovering from a failed system journal rotate (ENOSPC)
 337                  * for which the runtime journal was reopened.
 338                  *
 339                  * Perform an implicit flush to var, leaving the runtime
 340                  * journal closed, now that the system journal is back.
 341                  */
 342                 if (!flush_requested)
 343                         (void) server_flush_to_var(s, true);
 344         }
 345
 346         if (!s->runtime_journal &&
 347             (s->storage != STORAGE_NONE)) {
 348
 349                 fn = strjoina(s->runtime_storage.path, "/system.journal");
 350
 351                 if (s->system_journal) {
 352
 353                         /* Try to open the runtime journal, but only
 354                          * if it already exists, so that we can flush
 355                          * it into the system journal */
 356
 357                         r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
 358                         if (r < 0) {
 359                                 if (r != -ENOENT)
 360                                         log_warning_errno(r, "Failed to open runtime journal: %m");
 361
 362                                 r = 0;
 363                         }
 364
 365                 } else {
 366
 367                         /* OK, we really need the runtime journal, so create
 368                          * it if necessary. */
 369
 370                         (void) mkdir("/run/log", 0755);
 371                         (void) mkdir("/run/log/journal", 0755);
 372                         (void) mkdir_parents(fn, 0750);
 373
 374                         r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
 375                         if (r < 0)
 376                                 return log_error_errno(r, "Failed to open runtime journal: %m");
 377                 }
 378
 379                 if (s->runtime_journal) {
 380                         server_add_acls(s->runtime_journal, 0);
 381                         (void) cache_space_refresh(s, &s->runtime_storage);
 382                         patch_min_use(&s->runtime_storage);
 383                 }
 384         }
 385
 386         return r;
 387 }
 388
 389 static JournalFile* find_journal(Server *s, uid_t uid) {
 390         _cleanup_free_ char *p = NULL;
 391         int r;
 392         JournalFile *f;
 393         sd_id128_t machine;
 394
 395         assert(s);
 396
 397         /* A rotate that fails to create the new journal (ENOSPC) leaves the
 398          * rotated journal as NULL.  Unless we revisit opening, even after
 399          * space is made available we'll continue to return NULL indefinitely.
 400          *
 401          * system_journal_open() is a noop if the journals are already open, so
 402          * we can just call it here to recover from failed rotates (or anything
 403          * else that's left the journals as NULL).
 404          *
 405          * Fixes https://github.com/systemd/systemd/issues/3968 */
 406         (void) system_journal_open(s, false);
 407
 408         /* We split up user logs only on /var, not on /run. If the
 409          * runtime file is open, we write to it exclusively, in order
 410          * to guarantee proper order as soon as we flush /run to
 411          * /var and close the runtime file. */
 412
 413         if (s->runtime_journal)
 414                 return s->runtime_journal;
 415
 416         if (uid_for_system_journal(uid))
 417                 return s->system_journal;
 418
 419         r = sd_id128_get_machine(&machine);
 420         if (r < 0)
 421                 return s->system_journal;
 422
 423         f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
 424         if (f)
 425                 return f;
 426
 427         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
 428                      SD_ID128_FORMAT_VAL(machine), uid) < 0)
 429                 return s->system_journal;
 430
 431         while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
 432                 /* Too many open? Then let's close one */
 433                 f = ordered_hashmap_steal_first(s->user_journals);
 434                 assert(f);
 435                 (void) journal_file_close(f);
 436         }
 437
 438         r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
 439         if (r < 0)
 440                 return s->system_journal;
 441
 442         server_add_acls(f, uid);
 443
 444         r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
 445         if (r < 0) {
 446                 (void) journal_file_close(f);
 447                 return s->system_journal;
 448         }
 449
 450         return f;
 451 }
 452
 453 static int do_rotate(
 454                 Server *s,
 455                 JournalFile **f,
 456                 const char* name,
 457                 bool seal,
 458                 uint32_t uid) {
 459
 460         int r;
 461         assert(s);
 462
 463         if (!*f)
 464                 return -EINVAL;
 465
 466         r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
 467         if (r < 0)
 468                 if (*f)
 469                         log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
 470                 else
 471                         log_error_errno(r, "Failed to create new %s journal: %m", name);
 472         else
 473                 server_add_acls(*f, uid);
 474
 475         return r;
 476 }
 477
 478 void server_rotate(Server *s) {
 479         JournalFile *f;
 480         void *k;
 481         Iterator i;
 482         int r;
 483
 484         log_debug("Rotating...");
 485
 486         (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
 487         (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
 488
 489         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
 490                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
 491                 if (r >= 0)
 492                         ordered_hashmap_replace(s->user_journals, k, f);
 493                 else if (!f)
 494                         /* Old file has been closed and deallocated */
 495                         ordered_hashmap_remove(s->user_journals, k);
 496         }
 497
 498         /* Perform any deferred closes which aren't still offlining. */
 499         SET_FOREACH(f, s->deferred_closes, i)
 500                 if (!journal_file_is_offlining(f)) {
 501                         (void) set_remove(s->deferred_closes, f);
 502                         (void) journal_file_close(f);
 503                 }
 504 }
 505
 506 void server_sync(Server *s) {
 507         JournalFile *f;
 508         Iterator i;
 509         int r;
 510
 511         if (s->system_journal) {
 512                 r = journal_file_set_offline(s->system_journal, false);
 513                 if (r < 0)
 514                         log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
 515         }
 516
 517         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
 518                 r = journal_file_set_offline(f, false);
 519                 if (r < 0)
 520                         log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
 521         }
 522
 523         if (s->sync_event_source) {
 524                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
 525                 if (r < 0)
 526                         log_error_errno(r, "Failed to disable sync timer source: %m");
 527         }
 528
 529         s->sync_scheduled = false;
 530 }
 531
 532 static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
 533
 534         int r;
 535
 536         assert(s);
 537         assert(storage);
 538
 539         (void) cache_space_refresh(s, storage);
 540
 541         if (verbose)
 542                 server_space_usage_message(s, storage);
 543
 544         r = journal_directory_vacuum(storage->path, storage->space.limit,
 545                                      storage->metrics.n_max_files, s->max_retention_usec,
 546                                      &s->oldest_file_usec, verbose);
 547         if (r < 0 && r != -ENOENT)
 548                 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
 549
 550         cache_space_invalidate(&storage->space);
 551 }
 552
 553 int server_vacuum(Server *s, bool verbose) {
 554         assert(s);
 555
 556         log_debug("Vacuuming...");
 557
 558         s->oldest_file_usec = 0;
 559
 560         if (s->system_journal)
 561                 do_vacuum(s, &s->system_storage, verbose);
 562         if (s->runtime_journal)
 563                 do_vacuum(s, &s->runtime_storage, verbose);
 564
 565         return 0;
 566 }
 567
 568 static void server_cache_machine_id(Server *s) {
 569         sd_id128_t id;
 570         int r;
 571
 572         assert(s);
 573
 574         r = sd_id128_get_machine(&id);
 575         if (r < 0)
 576                 return;
 577
 578         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
 579 }
 580
 581 static void server_cache_boot_id(Server *s) {
 582         sd_id128_t id;
 583         int r;
 584
 585         assert(s);
 586
 587         r = sd_id128_get_boot(&id);
 588         if (r < 0)
 589                 return;
 590
 591         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
 592 }
 593
 594 static void server_cache_hostname(Server *s) {
 595         _cleanup_free_ char *t = NULL;
 596         char *x;
 597
 598         assert(s);
 599
 600         t = gethostname_malloc();
 601         if (!t)
 602                 return;
 603
 604         x = strappend("_HOSTNAME=", t);
 605         if (!x)
 606                 return;
 607
 608         free(s->hostname_field);
 609         s->hostname_field = x;
 610 }
 611
 612 static bool shall_try_append_again(JournalFile *f, int r) {
 613         switch(r) {
 614
 615         case -E2BIG:           /* Hit configured limit          */
 616         case -EFBIG:           /* Hit fs limit                  */
 617         case -EDQUOT:          /* Quota limit hit               */
 618         case -ENOSPC:          /* Disk full                     */
 619                 log_debug("%s: Allocation limit reached, rotating.", f->path);
 620                 return true;
 621
 622         case -EIO:             /* I/O error of some kind (mmap) */
 623                 log_warning("%s: IO error, rotating.", f->path);
 624                 return true;
 625
 626         case -EHOSTDOWN:       /* Other machine                 */
 627                 log_info("%s: Journal file from other machine, rotating.", f->path);
 628                 return true;
 629
 630         case -EBUSY:           /* Unclean shutdown              */
 631                 log_info("%s: Unclean shutdown, rotating.", f->path);
 632                 return true;
 633
 634         case -EPROTONOSUPPORT: /* Unsupported feature           */
 635                 log_info("%s: Unsupported feature, rotating.", f->path);
 636                 return true;
 637
 638         case -EBADMSG:         /* Corrupted                     */
 639         case -ENODATA:         /* Truncated                     */
 640         case -ESHUTDOWN:       /* Already archived              */
 641                 log_warning("%s: Journal file corrupted, rotating.", f->path);
 642                 return true;
 643
 644         case -EIDRM:           /* Journal file has been deleted */
 645                 log_warning("%s: Journal file has been deleted, rotating.", f->path);
 646                 return true;
 647
 648         case -ETXTBSY:         /* Journal file is from the future */
 649                 log_warning("%s: Journal file is from the future, rotating.", f->path);
 650                 return true;
 651
 652         default:
 653                 return false;
 654         }
 655 }
 656
 657 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
 658         bool vacuumed = false, rotate = false;
 659         struct dual_timestamp ts;
 660         JournalFile *f;
 661         int r;
 662
 663         assert(s);
 664         assert(iovec);
 665         assert(n > 0);
 666
 667         /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
 668          * the source time, and not even the time the event was originally seen, but instead simply the time we started
 669          * processing it, as we want strictly linear ordering in what we write out.) */
 670         assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
 671         assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
 672
 673         if (ts.realtime < s->last_realtime_clock) {
 674                 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
 675                  * regular operation. However, when it does happen, then we should make sure that we start fresh files
 676                  * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
 677                  * bisection works correctly. */
 678
 679                 log_debug("Time jumped backwards, rotating.");
 680                 rotate = true;
 681         } else {
 682
 683                 f = find_journal(s, uid);
 684                 if (!f)
 685                         return;
 686
 687                 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
 688                         log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
 689                         rotate = true;
 690                 }
 691         }
 692
 693         if (rotate) {
 694                 server_rotate(s);
 695                 server_vacuum(s, false);
 696                 vacuumed = true;
 697
 698                 f = find_journal(s, uid);
 699                 if (!f)
 700                         return;
 701         }
 702
 703         s->last_realtime_clock = ts.realtime;
 704
 705         r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
 706         if (r >= 0) {
 707                 server_schedule_sync(s, priority);
 708                 return;
 709         }
 710
 711         if (vacuumed || !shall_try_append_again(f, r)) {
 712                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
 713                 return;
 714         }
 715
 716         server_rotate(s);
 717         server_vacuum(s, false);
 718
 719         f = find_journal(s, uid);
 720         if (!f)
 721                 return;
 722
 723         log_debug("Retrying write.");
 724         r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
 725         if (r < 0)
 726                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
 727         else
 728                 server_schedule_sync(s, priority);
 729 }
 730
 731 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field)  \
 732         if (isset(value)) {                                             \
 733                 char *k;                                                \
 734                 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
 735                 sprintf(k, field "=" format, value);                    \
 736                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 737         }
 738
 739 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field)                  \
 740         if (!isempty(value)) {                                          \
 741                 char *k;                                                \
 742                 k = strjoina(field "=", value);                         \
 743                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 744         }
 745
 746 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field)                   \
 747         if (!sd_id128_is_null(value)) {                                 \
 748                 char *k;                                                \
 749                 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
 750                 sd_id128_to_string(value, stpcpy(k, field "="));        \
 751                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 752         }
 753
 754 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field)       \
 755         if (value_size > 0) {                                           \
 756                 char *k;                                                \
 757                 k = newa(char, STRLEN(field "=") + value_size + 1);     \
 758                 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
 759                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 760         }                                                               \
 761
 762 static void dispatch_message_real(
 763                 Server *s,
 764                 struct iovec *iovec, size_t n, size_t m,
 765                 const ClientContext *c,
 766                 const struct timeval *tv,
 767                 int priority,
 768                 pid_t object_pid) {
 769
 770         char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
 771         uid_t journal_uid;
 772         ClientContext *o;
 773
 774         assert(s);
 775         assert(iovec);
 776         assert(n > 0);
 777         assert(n +
 778                N_IOVEC_META_FIELDS +
 779                (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) +
 780                client_context_extra_fields_n_iovec(c) <= m);
 781
 782         if (c) {
 783                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
 784                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
 785                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
 786
 787                 IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM");
 788                 IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE");
 789                 IOVEC_ADD_STRING_FIELD(iovec, n, c->cmdline, "_CMDLINE");
 790                 IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE");
 791
 792                 IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
 793
 794                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
 795                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
 796
 797                 IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP");
 798                 IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
 799                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
 800                 IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT");
 801                 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
 802                 IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
 803                 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
 804
 805                 IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
 806
 807                 if (c->extra_fields_n_iovec > 0) {
 808                         memcpy(iovec + n, c->extra_fields_iovec, c->extra_fields_n_iovec * sizeof(struct iovec));
 809                         n += c->extra_fields_n_iovec;
 810                 }
 811         }
 812
 813         assert(n <= m);
 814
 815         if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
 816
 817                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
 818                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
 819                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
 820
 821                 IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
 822                 IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
 823                 IOVEC_ADD_STRING_FIELD(iovec, n, o->cmdline, "OBJECT_CMDLINE");
 824                 IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
 825
 826                 IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
 827
 828                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
 829                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
 830
 831                 IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
 832                 IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
 833                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
 834                 IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
 835                 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
 836                 IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
 837                 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
 838
 839                 IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
 840         }
 841
 842         assert(n <= m);
 843
 844         if (tv) {
 845                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
 846                 iovec[n++] = IOVEC_MAKE_STRING(source_time);
 847         }
 848
 849         /* Note that strictly speaking storing the boot id here is
 850          * redundant since the entry includes this in-line
 851          * anyway. However, we need this indexed, too. */
 852         if (!isempty(s->boot_id_field))
 853                 iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
 854
 855         if (!isempty(s->machine_id_field))
 856                 iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
 857
 858         if (!isempty(s->hostname_field))
 859                 iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
 860
 861         assert(n <= m);
 862
 863         if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
 864                 /* Split up strictly by (non-root) UID */
 865                 journal_uid = c->uid;
 866         else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
 867                 /* Split up by login UIDs.  We do this only if the
 868                  * realuid is not root, in order not to accidentally
 869                  * leak privileged information to the user that is
 870                  * logged by a privileged process that is part of an
 871                  * unprivileged session. */
 872                 journal_uid = c->owner_uid;
 873         else
 874                 journal_uid = 0;
 875
 876         write_to_journal(s, journal_uid, iovec, n, priority);
 877 }
 878
 879 void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) {
 880
 881         struct iovec *iovec;
 882         size_t n = 0, k, m;
 883         va_list ap;
 884         int r;
 885
 886         assert(s);
 887         assert(format);
 888
 889         m = N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS + client_context_extra_fields_n_iovec(s->my_context) + N_IOVEC_OBJECT_FIELDS;
 890         iovec = newa(struct iovec, m);
 891
 892         assert_cc(3 == LOG_FAC(LOG_DAEMON));
 893         iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
 894         iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
 895
 896         iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
 897         assert_cc(6 == LOG_INFO);
 898         iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
 899
 900         if (message_id)
 901                 iovec[n++] = IOVEC_MAKE_STRING(message_id);
 902         k = n;
 903
 904         va_start(ap, format);
 905         r = log_format_iovec(iovec, m, &n, false, 0, format, ap);
 906         /* Error handling below */
 907         va_end(ap);
 908
 909         if (r >= 0)
 910                 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
 911
 912         while (k < n)
 913                 free(iovec[k++].iov_base);
 914
 915         if (r < 0) {
 916                 /* We failed to format the message. Emit a warning instead. */
 917                 char buf[LINE_MAX];
 918
 919                 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
 920
 921                 n = 3;
 922                 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
 923                 iovec[n++] = IOVEC_MAKE_STRING(buf);
 924                 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
 925         }
 926 }
 927
 928 void server_dispatch_message(
 929                 Server *s,
 930                 struct iovec *iovec, size_t n, size_t m,
 931                 ClientContext *c,
 932                 const struct timeval *tv,
 933                 int priority,
 934                 pid_t object_pid) {
 935
 936         uint64_t available = 0;
 937         int rl;
 938
 939         assert(s);
 940         assert(iovec || n == 0);
 941
 942         if (n == 0)
 943                 return;
 944
 945         if (LOG_PRI(priority) > s->max_level_store)
 946                 return;
 947
 948         /* Stop early in case the information will not be stored
 949          * in a journal. */
 950         if (s->storage == STORAGE_NONE)
 951                 return;
 952
 953         if (c && c->unit) {
 954                 (void) determine_space(s, &available, NULL);
 955
 956                 rl = journal_rate_limit_test(s->rate_limit, c->unit, priority & LOG_PRIMASK, available);
 957                 if (rl == 0)
 958                         return;
 959
 960                 /* Write a suppression message if we suppressed something */
 961                 if (rl > 1)
 962                         server_driver_message(s, c->pid,
 963                                               "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
 964                                               LOG_MESSAGE("Suppressed %i messages from %s", rl - 1, c->unit),
 965                                               "N_DROPPED=%i", rl - 1,
 966                                               NULL);
 967         }
 968
 969         dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
 970 }
 971
 972 int server_flush_to_var(Server *s, bool require_flag_file) {
 973         sd_id128_t machine;
 974         sd_journal *j = NULL;
 975         char ts[FORMAT_TIMESPAN_MAX];
 976         usec_t start;
 977         unsigned n = 0;
 978         int r;
 979
 980         assert(s);
 981
 982         if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
 983                 return 0;
 984
 985         if (!s->runtime_journal)
 986                 return 0;
 987
 988         if (require_flag_file && !flushed_flag_is_set())
 989                 return 0;
 990
 991         (void) system_journal_open(s, true);
 992
 993         if (!s->system_journal)
 994                 return 0;
 995
 996         log_debug("Flushing to /var...");
 997
 998         start = now(CLOCK_MONOTONIC);
 999
1000         r = sd_id128_get_machine(&machine);
1001         if (r < 0)
1002                 return r;
1003
1004         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1005         if (r < 0)
1006                 return log_error_errno(r, "Failed to read runtime journal: %m");
1007
1008         sd_journal_set_data_threshold(j, 0);
1009
1010         SD_JOURNAL_FOREACH(j) {
1011                 Object *o = NULL;
1012                 JournalFile *f;
1013
1014                 f = j->current_file;
1015                 assert(f && f->current_offset > 0);
1016
1017                 n++;
1018
1019                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1020                 if (r < 0) {
1021                         log_error_errno(r, "Can't read entry: %m");
1022                         goto finish;
1023                 }
1024
1025                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1026                 if (r >= 0)
1027                         continue;
1028
1029                 if (!shall_try_append_again(s->system_journal, r)) {
1030                         log_error_errno(r, "Can't write entry: %m");
1031                         goto finish;
1032                 }
1033
1034                 server_rotate(s);
1035                 server_vacuum(s, false);
1036
1037                 if (!s->system_journal) {
1038                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1039                         r = -EIO;
1040                         goto finish;
1041                 }
1042
1043                 log_debug("Retrying write.");
1044                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1045                 if (r < 0) {
1046                         log_error_errno(r, "Can't write entry: %m");
1047                         goto finish;
1048                 }
1049         }
1050
1051         r = 0;
1052
1053 finish:
1054         journal_file_post_change(s->system_journal);
1055
1056         s->runtime_journal = journal_file_close(s->runtime_journal);
1057
1058         if (r >= 0)
1059                 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1060
1061         sd_journal_close(j);
1062
1063         server_driver_message(s, 0, NULL,
1064                               LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1065                                           format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1066                                           n),
1067                               NULL);
1068
1069         return r;
1070 }
1071
1072 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1073         Server *s = userdata;
1074         struct ucred *ucred = NULL;
1075         struct timeval *tv = NULL;
1076         struct cmsghdr *cmsg;
1077         char *label = NULL;
1078         size_t label_len = 0, m;
1079         struct iovec iovec;
1080         ssize_t n;
1081         int *fds = NULL, v = 0;
1082         unsigned n_fds = 0;
1083
1084         union {
1085                 struct cmsghdr cmsghdr;
1086
1087                 /* We use NAME_MAX space for the SELinux label
1088                  * here. The kernel currently enforces no
1089                  * limit, but according to suggestions from
1090                  * the SELinux people this will change and it
1091                  * will probably be identical to NAME_MAX. For
1092                  * now we use that, but this should be updated
1093                  * one day when the final limit is known. */
1094                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1095                             CMSG_SPACE(sizeof(struct timeval)) +
1096                             CMSG_SPACE(sizeof(int)) + /* fd */
1097                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
1098         } control = {};
1099
1100         union sockaddr_union sa = {};
1101
1102         struct msghdr msghdr = {
1103                 .msg_iov = &iovec,
1104                 .msg_iovlen = 1,
1105                 .msg_control = &control,
1106                 .msg_controllen = sizeof(control),
1107                 .msg_name = &sa,
1108                 .msg_namelen = sizeof(sa),
1109         };
1110
1111         assert(s);
1112         assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1113
1114         if (revents != EPOLLIN) {
1115                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1116                 return -EIO;
1117         }
1118
1119         /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1120          * it.) */
1121         (void) ioctl(fd, SIOCINQ, &v);
1122
1123         /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1124         m = PAGE_ALIGN(MAX3((size_t) v + 1,
1125                             (size_t) LINE_MAX,
1126                             ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1127
1128         if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1129                 return log_oom();
1130
1131         iovec.iov_base = s->buffer;
1132         iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1133
1134         n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1135         if (n < 0) {
1136                 if (IN_SET(errno, EINTR, EAGAIN))
1137                         return 0;
1138
1139                 return log_error_errno(errno, "recvmsg() failed: %m");
1140         }
1141
1142         CMSG_FOREACH(cmsg, &msghdr) {
1143
1144                 if (cmsg->cmsg_level == SOL_SOCKET &&
1145                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1146                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1147                         ucred = (struct ucred*) CMSG_DATA(cmsg);
1148                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1149                          cmsg->cmsg_type == SCM_SECURITY) {
1150                         label = (char*) CMSG_DATA(cmsg);
1151                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
1152                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1153                            cmsg->cmsg_type == SO_TIMESTAMP &&
1154                            cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1155                         tv = (struct timeval*) CMSG_DATA(cmsg);
1156                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1157                          cmsg->cmsg_type == SCM_RIGHTS) {
1158                         fds = (int*) CMSG_DATA(cmsg);
1159                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1160                 }
1161         }
1162
1163         /* And a trailing NUL, just in case */
1164         s->buffer[n] = 0;
1165
1166         if (fd == s->syslog_fd) {
1167                 if (n > 0 && n_fds == 0)
1168                         server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1169                 else if (n_fds > 0)
1170                         log_warning("Got file descriptors via syslog socket. Ignoring.");
1171
1172         } else if (fd == s->native_fd) {
1173                 if (n > 0 && n_fds == 0)
1174                         server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1175                 else if (n == 0 && n_fds == 1)
1176                         server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1177                 else if (n_fds > 0)
1178                         log_warning("Got too many file descriptors via native socket. Ignoring.");
1179
1180         } else {
1181                 assert(fd == s->audit_fd);
1182
1183                 if (n > 0 && n_fds == 0)
1184                         server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1185                 else if (n_fds > 0)
1186                         log_warning("Got file descriptors via audit socket. Ignoring.");
1187         }
1188
1189         close_many(fds, n_fds);
1190         return 0;
1191 }
1192
1193 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1194         Server *s = userdata;
1195         int r;
1196
1197         assert(s);
1198
1199         log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1200
1201         (void) server_flush_to_var(s, false);
1202         server_sync(s);
1203         server_vacuum(s, false);
1204
1205         r = touch("/run/systemd/journal/flushed");
1206         if (r < 0)
1207                 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1208
1209         server_space_usage_message(s, NULL);
1210         return 0;
1211 }
1212
1213 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1214         Server *s = userdata;
1215         int r;
1216
1217         assert(s);
1218
1219         log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1220         server_rotate(s);
1221         server_vacuum(s, true);
1222
1223         if (s->system_journal)
1224                 patch_min_use(&s->system_storage);
1225         if (s->runtime_journal)
1226                 patch_min_use(&s->runtime_storage);
1227
1228         /* Let clients know when the most recent rotation happened. */
1229         r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1230         if (r < 0)
1231                 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1232
1233         return 0;
1234 }
1235
1236 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1237         Server *s = userdata;
1238
1239         assert(s);
1240
1241         log_received_signal(LOG_INFO, si);
1242
1243         sd_event_exit(s->event, 0);
1244         return 0;
1245 }
1246
1247 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1248         Server *s = userdata;
1249         int r;
1250
1251         assert(s);
1252
1253         log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1254
1255         server_sync(s);
1256
1257         /* Let clients know when the most recent sync happened. */
1258         r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1259         if (r < 0)
1260                 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1261
1262         return 0;
1263 }
1264
1265 static int setup_signals(Server *s) {
1266         int r;
1267
1268         assert(s);
1269
1270         assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1271
1272         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1273         if (r < 0)
1274                 return r;
1275
1276         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1277         if (r < 0)
1278                 return r;
1279
1280         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1281         if (r < 0)
1282                 return r;
1283
1284         /* Let's process SIGTERM late, so that we flush all queued
1285          * messages to disk before we exit */
1286         r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1287         if (r < 0)
1288                 return r;
1289
1290         /* When journald is invoked on the terminal (when debugging),
1291          * it's useful if C-c is handled equivalent to SIGTERM. */
1292         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1293         if (r < 0)
1294                 return r;
1295
1296         r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1297         if (r < 0)
1298                 return r;
1299
1300         /* SIGRTMIN+1 causes an immediate sync. We process this very
1301          * late, so that everything else queued at this point is
1302          * really written to disk. Clients can watch
1303          * /run/systemd/journal/synced with inotify until its mtime
1304          * changes to see when a sync happened. */
1305         r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1306         if (r < 0)
1307                 return r;
1308
1309         r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1310         if (r < 0)
1311                 return r;
1312
1313         return 0;
1314 }
1315
1316 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1317         Server *s = data;
1318         int r;
1319
1320         assert(s);
1321
1322         if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1323
1324                 r = value ? parse_boolean(value) : true;
1325                 if (r < 0)
1326                         log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1327                 else
1328                         s->forward_to_syslog = r;
1329
1330         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1331
1332                 r = value ? parse_boolean(value) : true;
1333                 if (r < 0)
1334                         log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1335                 else
1336                         s->forward_to_kmsg = r;
1337
1338         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1339
1340                 r = value ? parse_boolean(value) : true;
1341                 if (r < 0)
1342                         log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1343                 else
1344                         s->forward_to_console = r;
1345
1346         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1347
1348                 r = value ? parse_boolean(value) : true;
1349                 if (r < 0)
1350                         log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1351                 else
1352                         s->forward_to_wall = r;
1353
1354         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1355
1356                 if (proc_cmdline_value_missing(key, value))
1357                         return 0;
1358
1359                 r = log_level_from_string(value);
1360                 if (r < 0)
1361                         log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1362                 else
1363                         s->max_level_console = r;
1364
1365         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1366
1367                 if (proc_cmdline_value_missing(key, value))
1368                         return 0;
1369
1370                 r = log_level_from_string(value);
1371                 if (r < 0)
1372                         log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1373                 else
1374                         s->max_level_store = r;
1375
1376         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1377
1378                 if (proc_cmdline_value_missing(key, value))
1379                         return 0;
1380
1381                 r = log_level_from_string(value);
1382                 if (r < 0)
1383                         log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1384                 else
1385                         s->max_level_syslog = r;
1386
1387         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1388
1389                 if (proc_cmdline_value_missing(key, value))
1390                         return 0;
1391
1392                 r = log_level_from_string(value);
1393                 if (r < 0)
1394                         log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1395                 else
1396                         s->max_level_kmsg = r;
1397
1398         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1399
1400                 if (proc_cmdline_value_missing(key, value))
1401                         return 0;
1402
1403                 r = log_level_from_string(value);
1404                 if (r < 0)
1405                         log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1406                 else
1407                         s->max_level_wall = r;
1408
1409         } else if (startswith(key, "systemd.journald"))
1410                 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
1411
1412         /* do not warn about state here, since probably systemd already did */
1413         return 0;
1414 }
1415
1416 static int server_parse_config_file(Server *s) {
1417         assert(s);
1418
1419         return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
1420                                         CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1421                                         "Journal\0",
1422                                         config_item_perf_lookup, journald_gperf_lookup,
1423                                         CONFIG_PARSE_WARN, s);
1424 }
1425
1426 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1427         Server *s = userdata;
1428
1429         assert(s);
1430
1431         server_sync(s);
1432         return 0;
1433 }
1434
1435 int server_schedule_sync(Server *s, int priority) {
1436         int r;
1437
1438         assert(s);
1439
1440         if (priority <= LOG_CRIT) {
1441                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1442                 server_sync(s);
1443                 return 0;
1444         }
1445
1446         if (s->sync_scheduled)
1447                 return 0;
1448
1449         if (s->sync_interval_usec > 0) {
1450                 usec_t when;
1451
1452                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1453                 if (r < 0)
1454                         return r;
1455
1456                 when += s->sync_interval_usec;
1457
1458                 if (!s->sync_event_source) {
1459                         r = sd_event_add_time(
1460                                         s->event,
1461                                         &s->sync_event_source,
1462                                         CLOCK_MONOTONIC,
1463                                         when, 0,
1464                                         server_dispatch_sync, s);
1465                         if (r < 0)
1466                                 return r;
1467
1468                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1469                 } else {
1470                         r = sd_event_source_set_time(s->sync_event_source, when);
1471                         if (r < 0)
1472                                 return r;
1473
1474                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1475                 }
1476                 if (r < 0)
1477                         return r;
1478
1479                 s->sync_scheduled = true;
1480         }
1481
1482         return 0;
1483 }
1484
1485 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1486         Server *s = userdata;
1487
1488         assert(s);
1489
1490         server_cache_hostname(s);
1491         return 0;
1492 }
1493
1494 static int server_open_hostname(Server *s) {
1495         int r;
1496
1497         assert(s);
1498
1499         s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1500         if (s->hostname_fd < 0)
1501                 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1502
1503         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1504         if (r < 0) {
1505                 /* kernels prior to 3.2 don't support polling this file. Ignore
1506                  * the failure. */
1507                 if (r == -EPERM) {
1508                         log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1509                         s->hostname_fd = safe_close(s->hostname_fd);
1510                         return 0;
1511                 }
1512
1513                 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1514         }
1515
1516         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1517         if (r < 0)
1518                 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1519
1520         return 0;
1521 }
1522
1523 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1524         Server *s = userdata;
1525         int r;
1526
1527         assert(s);
1528         assert(s->notify_event_source == es);
1529         assert(s->notify_fd == fd);
1530
1531         /* The $NOTIFY_SOCKET is writable again, now send exactly one
1532          * message on it. Either it's the watchdog event, the initial
1533          * READY=1 event or an stdout stream event. If there's nothing
1534          * to write anymore, turn our event source off. The next time
1535          * there's something to send it will be turned on again. */
1536
1537         if (!s->sent_notify_ready) {
1538                 static const char p[] =
1539                         "READY=1\n"
1540                         "STATUS=Processing requests...";
1541                 ssize_t l;
1542
1543                 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1544                 if (l < 0) {
1545                         if (errno == EAGAIN)
1546                                 return 0;
1547
1548                         return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1549                 }
1550
1551                 s->sent_notify_ready = true;
1552                 log_debug("Sent READY=1 notification.");
1553
1554         } else if (s->send_watchdog) {
1555
1556                 static const char p[] =
1557                         "WATCHDOG=1";
1558
1559                 ssize_t l;
1560
1561                 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1562                 if (l < 0) {
1563                         if (errno == EAGAIN)
1564                                 return 0;
1565
1566                         return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1567                 }
1568
1569                 s->send_watchdog = false;
1570                 log_debug("Sent WATCHDOG=1 notification.");
1571
1572         } else if (s->stdout_streams_notify_queue)
1573                 /* Dispatch one stream notification event */
1574                 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1575
1576         /* Leave us enabled if there's still more to do. */
1577         if (s->send_watchdog || s->stdout_streams_notify_queue)
1578                 return 0;
1579
1580         /* There was nothing to do anymore, let's turn ourselves off. */
1581         r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1582         if (r < 0)
1583                 return log_error_errno(r, "Failed to turn off notify event source: %m");
1584
1585         return 0;
1586 }
1587
1588 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1589         Server *s = userdata;
1590         int r;
1591
1592         assert(s);
1593
1594         s->send_watchdog = true;
1595
1596         r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1597         if (r < 0)
1598                 log_warning_errno(r, "Failed to turn on notify event source: %m");
1599
1600         r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1601         if (r < 0)
1602                 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1603
1604         r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1605         if (r < 0)
1606                 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1607
1608         return 0;
1609 }
1610
1611 static int server_connect_notify(Server *s) {
1612         union sockaddr_union sa = {
1613                 .un.sun_family = AF_UNIX,
1614         };
1615         const char *e;
1616         int r;
1617
1618         assert(s);
1619         assert(s->notify_fd < 0);
1620         assert(!s->notify_event_source);
1621
1622         /*
1623           So here's the problem: we'd like to send notification
1624           messages to PID 1, but we cannot do that via sd_notify(),
1625           since that's synchronous, and we might end up blocking on
1626           it. Specifically: given that PID 1 might block on
1627           dbus-daemon during IPC, and dbus-daemon is logging to us,
1628           and might hence block on us, we might end up in a deadlock
1629           if we block on sending PID 1 notification messages — by
1630           generating a full blocking circle. To avoid this, let's
1631           create a non-blocking socket, and connect it to the
1632           notification socket, and then wait for POLLOUT before we
1633           send anything. This should efficiently avoid any deadlocks,
1634           as we'll never block on PID 1, hence PID 1 can safely block
1635           on dbus-daemon which can safely block on us again.
1636
1637           Don't think that this issue is real? It is, see:
1638           https://github.com/systemd/systemd/issues/1505
1639         */
1640
1641         e = getenv("NOTIFY_SOCKET");
1642         if (!e)
1643                 return 0;
1644
1645         if (!IN_SET(e[0], '@', '/') || e[1] == 0) {
1646                 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1647                 return -EINVAL;
1648         }
1649
1650         if (strlen(e) > sizeof(sa.un.sun_path)) {
1651                 log_error("NOTIFY_SOCKET path too long: %s", e);
1652                 return -EINVAL;
1653         }
1654
1655         s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1656         if (s->notify_fd < 0)
1657                 return log_error_errno(errno, "Failed to create notify socket: %m");
1658
1659         (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1660
1661         strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1662         if (sa.un.sun_path[0] == '@')
1663                 sa.un.sun_path[0] = 0;
1664
1665         r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
1666         if (r < 0)
1667                 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1668
1669         r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1670         if (r < 0)
1671                 return log_error_errno(r, "Failed to watch notification socket: %m");
1672
1673         if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1674                 s->send_watchdog = true;
1675
1676                 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1677                 if (r < 0)
1678                         return log_error_errno(r, "Failed to add watchdog time event: %m");
1679         }
1680
1681         /* This should fire pretty soon, which we'll use to send the
1682          * READY=1 event. */
1683
1684         return 0;
1685 }
1686
1687 int server_init(Server *s) {
1688         _cleanup_fdset_free_ FDSet *fds = NULL;
1689         int n, r, fd;
1690         bool no_sockets;
1691
1692         assert(s);
1693
1694         zero(*s);
1695         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1696         s->compress = true;
1697         s->seal = true;
1698         s->read_kmsg = true;
1699
1700         s->watchdog_usec = USEC_INFINITY;
1701
1702         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1703         s->sync_scheduled = false;
1704
1705         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1706         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1707
1708         s->forward_to_wall = true;
1709
1710         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1711
1712         s->max_level_store = LOG_DEBUG;
1713         s->max_level_syslog = LOG_DEBUG;
1714         s->max_level_kmsg = LOG_NOTICE;
1715         s->max_level_console = LOG_INFO;
1716         s->max_level_wall = LOG_EMERG;
1717
1718         s->line_max = DEFAULT_LINE_MAX;
1719
1720         journal_reset_metrics(&s->system_storage.metrics);
1721         journal_reset_metrics(&s->runtime_storage.metrics);
1722
1723         server_parse_config_file(s);
1724
1725         r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1726         if (r < 0)
1727                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
1728
1729         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1730                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1731                           s->rate_limit_interval, s->rate_limit_burst);
1732                 s->rate_limit_interval = s->rate_limit_burst = 0;
1733         }
1734
1735         (void) mkdir_p("/run/systemd/journal", 0755);
1736
1737         s->user_journals = ordered_hashmap_new(NULL);
1738         if (!s->user_journals)
1739                 return log_oom();
1740
1741         s->mmap = mmap_cache_new();
1742         if (!s->mmap)
1743                 return log_oom();
1744
1745         s->deferred_closes = set_new(NULL);
1746         if (!s->deferred_closes)
1747                 return log_oom();
1748
1749         r = sd_event_default(&s->event);
1750         if (r < 0)
1751                 return log_error_errno(r, "Failed to create event loop: %m");
1752
1753         n = sd_listen_fds(true);
1754         if (n < 0)
1755                 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1756
1757         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1758
1759                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1760
1761                         if (s->native_fd >= 0) {
1762                                 log_error("Too many native sockets passed.");
1763                                 return -EINVAL;
1764                         }
1765
1766                         s->native_fd = fd;
1767
1768                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1769
1770                         if (s->stdout_fd >= 0) {
1771                                 log_error("Too many stdout sockets passed.");
1772                                 return -EINVAL;
1773                         }
1774
1775                         s->stdout_fd = fd;
1776
1777                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1778                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1779
1780                         if (s->syslog_fd >= 0) {
1781                                 log_error("Too many /dev/log sockets passed.");
1782                                 return -EINVAL;
1783                         }
1784
1785                         s->syslog_fd = fd;
1786
1787                 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1788
1789                         if (s->audit_fd >= 0) {
1790                                 log_error("Too many audit sockets passed.");
1791                                 return -EINVAL;
1792                         }
1793
1794                         s->audit_fd = fd;
1795
1796                 } else {
1797
1798                         if (!fds) {
1799                                 fds = fdset_new();
1800                                 if (!fds)
1801                                         return log_oom();
1802                         }
1803
1804                         r = fdset_put(fds, fd);
1805                         if (r < 0)
1806                                 return log_oom();
1807                 }
1808         }
1809
1810         /* Try to restore streams, but don't bother if this fails */
1811         (void) server_restore_streams(s, fds);
1812
1813         if (fdset_size(fds) > 0) {
1814                 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1815                 fds = fdset_free(fds);
1816         }
1817
1818         no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1819
1820         /* always open stdout, syslog, native, and kmsg sockets */
1821
1822         /* systemd-journald.socket: /run/systemd/journal/stdout */
1823         r = server_open_stdout_socket(s);
1824         if (r < 0)
1825                 return r;
1826
1827         /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1828         r = server_open_syslog_socket(s);
1829         if (r < 0)
1830                 return r;
1831
1832         /* systemd-journald.socket: /run/systemd/journal/socket */
1833         r = server_open_native_socket(s);
1834         if (r < 0)
1835                 return r;
1836
1837         /* /dev/kmsg */
1838         r = server_open_dev_kmsg(s);
1839         if (r < 0)
1840                 return r;
1841
1842         /* Unless we got *some* sockets and not audit, open audit socket */
1843         if (s->audit_fd >= 0 || no_sockets) {
1844                 r = server_open_audit(s);
1845                 if (r < 0)
1846                         return r;
1847         }
1848
1849         r = server_open_kernel_seqnum(s);
1850         if (r < 0)
1851                 return r;
1852
1853         r = server_open_hostname(s);
1854         if (r < 0)
1855                 return r;
1856
1857         r = setup_signals(s);
1858         if (r < 0)
1859                 return r;
1860
1861         s->udev = udev_new();
1862         if (!s->udev)
1863                 return -ENOMEM;
1864
1865         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1866         if (!s->rate_limit)
1867                 return -ENOMEM;
1868
1869         r = cg_get_root_path(&s->cgroup_root);
1870         if (r < 0)
1871                 return r;
1872
1873         server_cache_hostname(s);
1874         server_cache_boot_id(s);
1875         server_cache_machine_id(s);
1876
1877         s->runtime_storage.name = "Runtime journal";
1878         s->system_storage.name = "System journal";
1879
1880         s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
1881         s->system_storage.path  = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
1882         if (!s->runtime_storage.path || !s->system_storage.path)
1883                 return -ENOMEM;
1884
1885         (void) server_connect_notify(s);
1886
1887         (void) client_context_acquire_default(s);
1888
1889         return system_journal_open(s, false);
1890 }
1891
1892 void server_maybe_append_tags(Server *s) {
1893 #if HAVE_GCRYPT
1894         JournalFile *f;
1895         Iterator i;
1896         usec_t n;
1897
1898         n = now(CLOCK_REALTIME);
1899
1900         if (s->system_journal)
1901                 journal_file_maybe_append_tag(s->system_journal, n);
1902
1903         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1904                 journal_file_maybe_append_tag(f, n);
1905 #endif
1906 }
1907
1908 void server_done(Server *s) {
1909         assert(s);
1910
1911         set_free_with_destructor(s->deferred_closes, journal_file_close);
1912
1913         while (s->stdout_streams)
1914                 stdout_stream_free(s->stdout_streams);
1915
1916         client_context_flush_all(s);
1917
1918         if (s->system_journal)
1919                 (void) journal_file_close(s->system_journal);
1920
1921         if (s->runtime_journal)
1922                 (void) journal_file_close(s->runtime_journal);
1923
1924         ordered_hashmap_free_with_destructor(s->user_journals, journal_file_close);
1925
1926         sd_event_source_unref(s->syslog_event_source);
1927         sd_event_source_unref(s->native_event_source);
1928         sd_event_source_unref(s->stdout_event_source);
1929         sd_event_source_unref(s->dev_kmsg_event_source);
1930         sd_event_source_unref(s->audit_event_source);
1931         sd_event_source_unref(s->sync_event_source);
1932         sd_event_source_unref(s->sigusr1_event_source);
1933         sd_event_source_unref(s->sigusr2_event_source);
1934         sd_event_source_unref(s->sigterm_event_source);
1935         sd_event_source_unref(s->sigint_event_source);
1936         sd_event_source_unref(s->sigrtmin1_event_source);
1937         sd_event_source_unref(s->hostname_event_source);
1938         sd_event_source_unref(s->notify_event_source);
1939         sd_event_source_unref(s->watchdog_event_source);
1940         sd_event_unref(s->event);
1941
1942         safe_close(s->syslog_fd);
1943         safe_close(s->native_fd);
1944         safe_close(s->stdout_fd);
1945         safe_close(s->dev_kmsg_fd);
1946         safe_close(s->audit_fd);
1947         safe_close(s->hostname_fd);
1948         safe_close(s->notify_fd);
1949
1950         if (s->rate_limit)
1951                 journal_rate_limit_free(s->rate_limit);
1952
1953         if (s->kernel_seqnum)
1954                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1955
1956         free(s->buffer);
1957         free(s->tty_path);
1958         free(s->cgroup_root);
1959         free(s->hostname_field);
1960         free(s->runtime_storage.path);
1961         free(s->system_storage.path);
1962
1963         if (s->mmap)
1964                 mmap_cache_unref(s->mmap);
1965
1966         udev_unref(s->udev);
1967 }
1968
1969 static const char* const storage_table[_STORAGE_MAX] = {
1970         [STORAGE_AUTO] = "auto",
1971         [STORAGE_VOLATILE] = "volatile",
1972         [STORAGE_PERSISTENT] = "persistent",
1973         [STORAGE_NONE] = "none"
1974 };
1975
1976 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1977 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1978
1979 static const char* const split_mode_table[_SPLIT_MAX] = {
1980         [SPLIT_LOGIN] = "login",
1981         [SPLIT_UID] = "uid",
1982         [SPLIT_NONE] = "none",
1983 };
1984
1985 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1986 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
1987
1988 int config_parse_line_max(
1989                 const char* unit,
1990                 const char *filename,
1991                 unsigned line,
1992                 const char *section,
1993                 unsigned section_line,
1994                 const char *lvalue,
1995                 int ltype,
1996                 const char *rvalue,
1997                 void *data,
1998                 void *userdata) {
1999
2000         size_t *sz = data;
2001         int r;
2002
2003         assert(filename);
2004         assert(lvalue);
2005         assert(rvalue);
2006         assert(data);
2007
2008         if (isempty(rvalue))
2009                 /* Empty assignment means default */
2010                 *sz = DEFAULT_LINE_MAX;
2011         else {
2012                 uint64_t v;
2013
2014                 r = parse_size(rvalue, 1024, &v);
2015                 if (r < 0) {
2016                         log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
2017                         return 0;
2018                 }
2019
2020                 if (v < 79) {
2021                         /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2022                          * terminal size is 80ch, and it might make sense to break one character before the natural
2023                          * line break would occur on that. */
2024                         log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
2025                         *sz = 79;
2026                 } else if (v > (uint64_t) (SSIZE_MAX-1)) {
2027                         /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2028                          * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2029                          * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2030                          * fail much earlier anyway. */
2031                         log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
2032                         *sz = SSIZE_MAX-1;
2033                 } else
2034                         *sz = (size_t) v;
2035         }
2036
2037         return 0;
2038 }