src/journal/journald-server.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2011 Lennart Poettering
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #if HAVE_SELINUX
  22 #include <selinux/selinux.h>
  23 #endif
  24 #include <sys/ioctl.h>
  25 #include <sys/mman.h>
  26 #include <sys/signalfd.h>
  27 #include <sys/statvfs.h>
  28 #include <linux/sockios.h>
  29
  30 #include "libudev.h"
  31 #include "sd-daemon.h"
  32 #include "sd-journal.h"
  33 #include "sd-messages.h"
  34
  35 #include "acl-util.h"
  36 #include "alloc-util.h"
  37 #include "audit-util.h"
  38 #include "cgroup-util.h"
  39 #include "conf-parser.h"
  40 #include "dirent-util.h"
  41 #include "extract-word.h"
  42 #include "fd-util.h"
  43 #include "fileio.h"
  44 #include "format-util.h"
  45 #include "fs-util.h"
  46 #include "hashmap.h"
  47 #include "hostname-util.h"
  48 #include "id128-util.h"
  49 #include "io-util.h"
  50 #include "journal-authenticate.h"
  51 #include "journal-file.h"
  52 #include "journal-internal.h"
  53 #include "journal-vacuum.h"
  54 #include "journald-audit.h"
  55 #include "journald-context.h"
  56 #include "journald-kmsg.h"
  57 #include "journald-native.h"
  58 #include "journald-rate-limit.h"
  59 #include "journald-server.h"
  60 #include "journald-stream.h"
  61 #include "journald-syslog.h"
  62 #include "log.h"
  63 #include "missing.h"
  64 #include "mkdir.h"
  65 #include "parse-util.h"
  66 #include "proc-cmdline.h"
  67 #include "process-util.h"
  68 #include "rm-rf.h"
  69 #include "selinux-util.h"
  70 #include "signal-util.h"
  71 #include "socket-util.h"
  72 #include "stdio-util.h"
  73 #include "string-table.h"
  74 #include "string-util.h"
  75 #include "syslog-util.h"
  76 #include "user-util.h"
  77
  78 #define USER_JOURNALS_MAX 1024
  79
  80 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
  81 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
  82 #define DEFAULT_RATE_LIMIT_BURST 1000
  83 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
  84
  85 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
  86
  87 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
  88
  89 /* The period to insert between posting changes for coalescing */
  90 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
  91
  92 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
  93  * for a bit of additional metadata. */
  94 #define DEFAULT_LINE_MAX (48*1024)
  95
  96 static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
  97         _cleanup_closedir_ DIR *d = NULL;
  98         struct dirent *de;
  99         struct statvfs ss;
 100
 101         assert(ret_used);
 102         assert(ret_free);
 103
 104         d = opendir(path);
 105         if (!d)
 106                 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
 107                                       errno, "Failed to open %s: %m", path);
 108
 109         if (fstatvfs(dirfd(d), &ss) < 0)
 110                 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
 111
 112         *ret_free = ss.f_bsize * ss.f_bavail;
 113         *ret_used = 0;
 114         FOREACH_DIRENT_ALL(de, d, break) {
 115                 struct stat st;
 116
 117                 if (!endswith(de->d_name, ".journal") &&
 118                     !endswith(de->d_name, ".journal~"))
 119                         continue;
 120
 121                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
 122                         log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
 123                         continue;
 124                 }
 125
 126                 if (!S_ISREG(st.st_mode))
 127                         continue;
 128
 129                 *ret_used += (uint64_t) st.st_blocks * 512UL;
 130         }
 131
 132         return 0;
 133 }
 134
 135 static void cache_space_invalidate(JournalStorageSpace *space) {
 136         zero(*space);
 137 }
 138
 139 static int cache_space_refresh(Server *s, JournalStorage *storage) {
 140         JournalStorageSpace *space;
 141         JournalMetrics *metrics;
 142         uint64_t vfs_used, vfs_avail, avail;
 143         usec_t ts;
 144         int r;
 145
 146         assert(s);
 147
 148         metrics = &storage->metrics;
 149         space = &storage->space;
 150
 151         ts = now(CLOCK_MONOTONIC);
 152
 153         if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
 154                 return 0;
 155
 156         r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
 157         if (r < 0)
 158                 return r;
 159
 160         space->vfs_used = vfs_used;
 161         space->vfs_available = vfs_avail;
 162
 163         avail = LESS_BY(vfs_avail, metrics->keep_free);
 164
 165         space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
 166         space->available = LESS_BY(space->limit, vfs_used);
 167         space->timestamp = ts;
 168         return 1;
 169 }
 170
 171 static void patch_min_use(JournalStorage *storage) {
 172         assert(storage);
 173
 174         /* Let's bump the min_use limit to the current usage on disk. We do
 175          * this when starting up and first opening the journal files. This way
 176          * sudden spikes in disk usage will not cause journald to vacuum files
 177          * without bounds. Note that this means that only a restart of journald
 178          * will make it reset this value. */
 179
 180         storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
 181 }
 182
 183
 184 static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
 185         JournalStorage *js;
 186         int r;
 187
 188         assert(s);
 189
 190         js = s->system_journal ? &s->system_storage : &s->runtime_storage;
 191
 192         r = cache_space_refresh(s, js);
 193         if (r >= 0) {
 194                 if (available)
 195                         *available = js->space.available;
 196                 if (limit)
 197                         *limit = js->space.limit;
 198         }
 199         return r;
 200 }
 201
 202 void server_space_usage_message(Server *s, JournalStorage *storage) {
 203         char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
 204              fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
 205         JournalMetrics *metrics;
 206
 207         assert(s);
 208
 209         if (!storage)
 210                 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
 211
 212         if (cache_space_refresh(s, storage) < 0)
 213                 return;
 214
 215         metrics = &storage->metrics;
 216         format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
 217         format_bytes(fb2, sizeof(fb2), metrics->max_use);
 218         format_bytes(fb3, sizeof(fb3), metrics->keep_free);
 219         format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
 220         format_bytes(fb5, sizeof(fb5), storage->space.limit);
 221         format_bytes(fb6, sizeof(fb6), storage->space.available);
 222
 223         server_driver_message(s, 0,
 224                               "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
 225                               LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
 226                                           storage->name, storage->path, fb1, fb5, fb6),
 227                               "JOURNAL_NAME=%s", storage->name,
 228                               "JOURNAL_PATH=%s", storage->path,
 229                               "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
 230                               "CURRENT_USE_PRETTY=%s", fb1,
 231                               "MAX_USE=%"PRIu64, metrics->max_use,
 232                               "MAX_USE_PRETTY=%s", fb2,
 233                               "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
 234                               "DISK_KEEP_FREE_PRETTY=%s", fb3,
 235                               "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
 236                               "DISK_AVAILABLE_PRETTY=%s", fb4,
 237                               "LIMIT=%"PRIu64, storage->space.limit,
 238                               "LIMIT_PRETTY=%s", fb5,
 239                               "AVAILABLE=%"PRIu64, storage->space.available,
 240                               "AVAILABLE_PRETTY=%s", fb6,
 241                               NULL);
 242 }
 243
 244 static bool uid_for_system_journal(uid_t uid) {
 245
 246         /* Returns true if the specified UID shall get its data stored in the system journal*/
 247
 248         return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY;
 249 }
 250
 251 static void server_add_acls(JournalFile *f, uid_t uid) {
 252 #if HAVE_ACL
 253         int r;
 254 #endif
 255         assert(f);
 256
 257 #if HAVE_ACL
 258         if (uid_for_system_journal(uid))
 259                 return;
 260
 261         r = add_acls_for_user(f->fd, uid);
 262         if (r < 0)
 263                 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
 264 #endif
 265 }
 266
 267 static int open_journal(
 268                 Server *s,
 269                 bool reliably,
 270                 const char *fname,
 271                 int flags,
 272                 bool seal,
 273                 JournalMetrics *metrics,
 274                 JournalFile **ret) {
 275         int r;
 276         JournalFile *f;
 277
 278         assert(s);
 279         assert(fname);
 280         assert(ret);
 281
 282         if (reliably)
 283                 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
 284         else
 285                 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
 286         if (r < 0)
 287                 return r;
 288
 289         r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
 290         if (r < 0) {
 291                 (void) journal_file_close(f);
 292                 return r;
 293         }
 294
 295         *ret = f;
 296         return r;
 297 }
 298
 299 static bool flushed_flag_is_set(void) {
 300         return access("/run/systemd/journal/flushed", F_OK) >= 0;
 301 }
 302
 303 static int system_journal_open(Server *s, bool flush_requested) {
 304         const char *fn;
 305         int r = 0;
 306
 307         if (!s->system_journal &&
 308             IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
 309             (flush_requested || flushed_flag_is_set())) {
 310
 311                 /* If in auto mode: first try to create the machine
 312                  * path, but not the prefix.
 313                  *
 314                  * If in persistent mode: create /var/log/journal and
 315                  * the machine path */
 316
 317                 if (s->storage == STORAGE_PERSISTENT)
 318                         (void) mkdir_p("/var/log/journal/", 0755);
 319
 320                 (void) mkdir(s->system_storage.path, 0755);
 321
 322                 fn = strjoina(s->system_storage.path, "/system.journal");
 323                 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
 324                 if (r >= 0) {
 325                         server_add_acls(s->system_journal, 0);
 326                         (void) cache_space_refresh(s, &s->system_storage);
 327                         patch_min_use(&s->system_storage);
 328                 } else if (r < 0) {
 329                         if (!IN_SET(r, -ENOENT, -EROFS))
 330                                 log_warning_errno(r, "Failed to open system journal: %m");
 331
 332                         r = 0;
 333                 }
 334
 335                 /* If the runtime journal is open, and we're post-flush, we're
 336                  * recovering from a failed system journal rotate (ENOSPC)
 337                  * for which the runtime journal was reopened.
 338                  *
 339                  * Perform an implicit flush to var, leaving the runtime
 340                  * journal closed, now that the system journal is back.
 341                  */
 342                 if (!flush_requested)
 343                         (void) server_flush_to_var(s, true);
 344         }
 345
 346         if (!s->runtime_journal &&
 347             (s->storage != STORAGE_NONE)) {
 348
 349                 fn = strjoina(s->runtime_storage.path, "/system.journal");
 350
 351                 if (s->system_journal) {
 352
 353                         /* Try to open the runtime journal, but only
 354                          * if it already exists, so that we can flush
 355                          * it into the system journal */
 356
 357                         r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
 358                         if (r < 0) {
 359                                 if (r != -ENOENT)
 360                                         log_warning_errno(r, "Failed to open runtime journal: %m");
 361
 362                                 r = 0;
 363                         }
 364
 365                 } else {
 366
 367                         /* OK, we really need the runtime journal, so create
 368                          * it if necessary. */
 369
 370                         (void) mkdir("/run/log", 0755);
 371                         (void) mkdir("/run/log/journal", 0755);
 372                         (void) mkdir_parents(fn, 0750);
 373
 374                         r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
 375                         if (r < 0)
 376                                 return log_error_errno(r, "Failed to open runtime journal: %m");
 377                 }
 378
 379                 if (s->runtime_journal) {
 380                         server_add_acls(s->runtime_journal, 0);
 381                         (void) cache_space_refresh(s, &s->runtime_storage);
 382                         patch_min_use(&s->runtime_storage);
 383                 }
 384         }
 385
 386         return r;
 387 }
 388
 389 static JournalFile* find_journal(Server *s, uid_t uid) {
 390         _cleanup_free_ char *p = NULL;
 391         int r;
 392         JournalFile *f;
 393         sd_id128_t machine;
 394
 395         assert(s);
 396
 397         /* A rotate that fails to create the new journal (ENOSPC) leaves the
 398          * rotated journal as NULL.  Unless we revisit opening, even after
 399          * space is made available we'll continue to return NULL indefinitely.
 400          *
 401          * system_journal_open() is a noop if the journals are already open, so
 402          * we can just call it here to recover from failed rotates (or anything
 403          * else that's left the journals as NULL).
 404          *
 405          * Fixes https://github.com/systemd/systemd/issues/3968 */
 406         (void) system_journal_open(s, false);
 407
 408         /* We split up user logs only on /var, not on /run. If the
 409          * runtime file is open, we write to it exclusively, in order
 410          * to guarantee proper order as soon as we flush /run to
 411          * /var and close the runtime file. */
 412
 413         if (s->runtime_journal)
 414                 return s->runtime_journal;
 415
 416         if (uid_for_system_journal(uid))
 417                 return s->system_journal;
 418
 419         r = sd_id128_get_machine(&machine);
 420         if (r < 0)
 421                 return s->system_journal;
 422
 423         f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
 424         if (f)
 425                 return f;
 426
 427         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
 428                      SD_ID128_FORMAT_VAL(machine), uid) < 0)
 429                 return s->system_journal;
 430
 431         while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
 432                 /* Too many open? Then let's close one */
 433                 f = ordered_hashmap_steal_first(s->user_journals);
 434                 assert(f);
 435                 (void) journal_file_close(f);
 436         }
 437
 438         r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
 439         if (r < 0)
 440                 return s->system_journal;
 441
 442         server_add_acls(f, uid);
 443
 444         r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
 445         if (r < 0) {
 446                 (void) journal_file_close(f);
 447                 return s->system_journal;
 448         }
 449
 450         return f;
 451 }
 452
 453 static int do_rotate(
 454                 Server *s,
 455                 JournalFile **f,
 456                 const char* name,
 457                 bool seal,
 458                 uint32_t uid) {
 459
 460         int r;
 461         assert(s);
 462
 463         if (!*f)
 464                 return -EINVAL;
 465
 466         r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
 467         if (r < 0) {
 468                 if (*f)
 469                         return log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
 470                 else
 471                         return log_error_errno(r, "Failed to create new %s journal: %m", name);
 472         }
 473
 474         server_add_acls(*f, uid);
 475
 476         return r;
 477 }
 478
 479 void server_rotate(Server *s) {
 480         JournalFile *f;
 481         void *k;
 482         Iterator i;
 483         int r;
 484
 485         log_debug("Rotating...");
 486
 487         (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
 488         (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
 489
 490         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
 491                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
 492                 if (r >= 0)
 493                         ordered_hashmap_replace(s->user_journals, k, f);
 494                 else if (!f)
 495                         /* Old file has been closed and deallocated */
 496                         ordered_hashmap_remove(s->user_journals, k);
 497         }
 498
 499         /* Perform any deferred closes which aren't still offlining. */
 500         SET_FOREACH(f, s->deferred_closes, i)
 501                 if (!journal_file_is_offlining(f)) {
 502                         (void) set_remove(s->deferred_closes, f);
 503                         (void) journal_file_close(f);
 504                 }
 505 }
 506
 507 void server_sync(Server *s) {
 508         JournalFile *f;
 509         Iterator i;
 510         int r;
 511
 512         if (s->system_journal) {
 513                 r = journal_file_set_offline(s->system_journal, false);
 514                 if (r < 0)
 515                         log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
 516         }
 517
 518         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
 519                 r = journal_file_set_offline(f, false);
 520                 if (r < 0)
 521                         log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
 522         }
 523
 524         if (s->sync_event_source) {
 525                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
 526                 if (r < 0)
 527                         log_error_errno(r, "Failed to disable sync timer source: %m");
 528         }
 529
 530         s->sync_scheduled = false;
 531 }
 532
 533 static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
 534
 535         int r;
 536
 537         assert(s);
 538         assert(storage);
 539
 540         (void) cache_space_refresh(s, storage);
 541
 542         if (verbose)
 543                 server_space_usage_message(s, storage);
 544
 545         r = journal_directory_vacuum(storage->path, storage->space.limit,
 546                                      storage->metrics.n_max_files, s->max_retention_usec,
 547                                      &s->oldest_file_usec, verbose);
 548         if (r < 0 && r != -ENOENT)
 549                 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
 550
 551         cache_space_invalidate(&storage->space);
 552 }
 553
 554 int server_vacuum(Server *s, bool verbose) {
 555         assert(s);
 556
 557         log_debug("Vacuuming...");
 558
 559         s->oldest_file_usec = 0;
 560
 561         if (s->system_journal)
 562                 do_vacuum(s, &s->system_storage, verbose);
 563         if (s->runtime_journal)
 564                 do_vacuum(s, &s->runtime_storage, verbose);
 565
 566         return 0;
 567 }
 568
 569 static void server_cache_machine_id(Server *s) {
 570         sd_id128_t id;
 571         int r;
 572
 573         assert(s);
 574
 575         r = sd_id128_get_machine(&id);
 576         if (r < 0)
 577                 return;
 578
 579         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
 580 }
 581
 582 static void server_cache_boot_id(Server *s) {
 583         sd_id128_t id;
 584         int r;
 585
 586         assert(s);
 587
 588         r = sd_id128_get_boot(&id);
 589         if (r < 0)
 590                 return;
 591
 592         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
 593 }
 594
 595 static void server_cache_hostname(Server *s) {
 596         _cleanup_free_ char *t = NULL;
 597         char *x;
 598
 599         assert(s);
 600
 601         t = gethostname_malloc();
 602         if (!t)
 603                 return;
 604
 605         x = strappend("_HOSTNAME=", t);
 606         if (!x)
 607                 return;
 608
 609         free(s->hostname_field);
 610         s->hostname_field = x;
 611 }
 612
 613 static bool shall_try_append_again(JournalFile *f, int r) {
 614         switch(r) {
 615
 616         case -E2BIG:           /* Hit configured limit          */
 617         case -EFBIG:           /* Hit fs limit                  */
 618         case -EDQUOT:          /* Quota limit hit               */
 619         case -ENOSPC:          /* Disk full                     */
 620                 log_debug("%s: Allocation limit reached, rotating.", f->path);
 621                 return true;
 622
 623         case -EIO:             /* I/O error of some kind (mmap) */
 624                 log_warning("%s: IO error, rotating.", f->path);
 625                 return true;
 626
 627         case -EHOSTDOWN:       /* Other machine                 */
 628                 log_info("%s: Journal file from other machine, rotating.", f->path);
 629                 return true;
 630
 631         case -EBUSY:           /* Unclean shutdown              */
 632                 log_info("%s: Unclean shutdown, rotating.", f->path);
 633                 return true;
 634
 635         case -EPROTONOSUPPORT: /* Unsupported feature           */
 636                 log_info("%s: Unsupported feature, rotating.", f->path);
 637                 return true;
 638
 639         case -EBADMSG:         /* Corrupted                     */
 640         case -ENODATA:         /* Truncated                     */
 641         case -ESHUTDOWN:       /* Already archived              */
 642                 log_warning("%s: Journal file corrupted, rotating.", f->path);
 643                 return true;
 644
 645         case -EIDRM:           /* Journal file has been deleted */
 646                 log_warning("%s: Journal file has been deleted, rotating.", f->path);
 647                 return true;
 648
 649         case -ETXTBSY:         /* Journal file is from the future */
 650                 log_warning("%s: Journal file is from the future, rotating.", f->path);
 651                 return true;
 652
 653         default:
 654                 return false;
 655         }
 656 }
 657
 658 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
 659         bool vacuumed = false, rotate = false;
 660         struct dual_timestamp ts;
 661         JournalFile *f;
 662         int r;
 663
 664         assert(s);
 665         assert(iovec);
 666         assert(n > 0);
 667
 668         /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
 669          * the source time, and not even the time the event was originally seen, but instead simply the time we started
 670          * processing it, as we want strictly linear ordering in what we write out.) */
 671         assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
 672         assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
 673
 674         if (ts.realtime < s->last_realtime_clock) {
 675                 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
 676                  * regular operation. However, when it does happen, then we should make sure that we start fresh files
 677                  * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
 678                  * bisection works correctly. */
 679
 680                 log_debug("Time jumped backwards, rotating.");
 681                 rotate = true;
 682         } else {
 683
 684                 f = find_journal(s, uid);
 685                 if (!f)
 686                         return;
 687
 688                 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
 689                         log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
 690                         rotate = true;
 691                 }
 692         }
 693
 694         if (rotate) {
 695                 server_rotate(s);
 696                 server_vacuum(s, false);
 697                 vacuumed = true;
 698
 699                 f = find_journal(s, uid);
 700                 if (!f)
 701                         return;
 702         }
 703
 704         s->last_realtime_clock = ts.realtime;
 705
 706         r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
 707         if (r >= 0) {
 708                 server_schedule_sync(s, priority);
 709                 return;
 710         }
 711
 712         if (vacuumed || !shall_try_append_again(f, r)) {
 713                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
 714                 return;
 715         }
 716
 717         server_rotate(s);
 718         server_vacuum(s, false);
 719
 720         f = find_journal(s, uid);
 721         if (!f)
 722                 return;
 723
 724         log_debug("Retrying write.");
 725         r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
 726         if (r < 0)
 727                 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
 728         else
 729                 server_schedule_sync(s, priority);
 730 }
 731
 732 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field)  \
 733         if (isset(value)) {                                             \
 734                 char *k;                                                \
 735                 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
 736                 sprintf(k, field "=" format, value);                    \
 737                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 738         }
 739
 740 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field)                  \
 741         if (!isempty(value)) {                                          \
 742                 char *k;                                                \
 743                 k = strjoina(field "=", value);                         \
 744                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 745         }
 746
 747 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field)                   \
 748         if (!sd_id128_is_null(value)) {                                 \
 749                 char *k;                                                \
 750                 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
 751                 sd_id128_to_string(value, stpcpy(k, field "="));        \
 752                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 753         }
 754
 755 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field)       \
 756         if (value_size > 0) {                                           \
 757                 char *k;                                                \
 758                 k = newa(char, STRLEN(field "=") + value_size + 1);     \
 759                 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
 760                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 761         }                                                               \
 762
 763 static void dispatch_message_real(
 764                 Server *s,
 765                 struct iovec *iovec, size_t n, size_t m,
 766                 const ClientContext *c,
 767                 const struct timeval *tv,
 768                 int priority,
 769                 pid_t object_pid) {
 770
 771         char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
 772         uid_t journal_uid;
 773         ClientContext *o;
 774
 775         assert(s);
 776         assert(iovec);
 777         assert(n > 0);
 778         assert(n +
 779                N_IOVEC_META_FIELDS +
 780                (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) +
 781                client_context_extra_fields_n_iovec(c) <= m);
 782
 783         if (c) {
 784                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
 785                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
 786                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
 787
 788                 IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM");
 789                 IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE");
 790                 IOVEC_ADD_STRING_FIELD(iovec, n, c->cmdline, "_CMDLINE");
 791                 IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE");
 792
 793                 IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
 794
 795                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
 796                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
 797
 798                 IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP");
 799                 IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
 800                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
 801                 IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT");
 802                 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
 803                 IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
 804                 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
 805
 806                 IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
 807
 808                 if (c->extra_fields_n_iovec > 0) {
 809                         memcpy(iovec + n, c->extra_fields_iovec, c->extra_fields_n_iovec * sizeof(struct iovec));
 810                         n += c->extra_fields_n_iovec;
 811                 }
 812         }
 813
 814         assert(n <= m);
 815
 816         if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
 817
 818                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
 819                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
 820                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
 821
 822                 IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
 823                 IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
 824                 IOVEC_ADD_STRING_FIELD(iovec, n, o->cmdline, "OBJECT_CMDLINE");
 825                 IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
 826
 827                 IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
 828
 829                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
 830                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
 831
 832                 IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
 833                 IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
 834                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
 835                 IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
 836                 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
 837                 IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
 838                 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
 839
 840                 IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
 841         }
 842
 843         assert(n <= m);
 844
 845         if (tv) {
 846                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
 847                 iovec[n++] = IOVEC_MAKE_STRING(source_time);
 848         }
 849
 850         /* Note that strictly speaking storing the boot id here is
 851          * redundant since the entry includes this in-line
 852          * anyway. However, we need this indexed, too. */
 853         if (!isempty(s->boot_id_field))
 854                 iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
 855
 856         if (!isempty(s->machine_id_field))
 857                 iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
 858
 859         if (!isempty(s->hostname_field))
 860                 iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
 861
 862         assert(n <= m);
 863
 864         if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
 865                 /* Split up strictly by (non-root) UID */
 866                 journal_uid = c->uid;
 867         else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
 868                 /* Split up by login UIDs.  We do this only if the
 869                  * realuid is not root, in order not to accidentally
 870                  * leak privileged information to the user that is
 871                  * logged by a privileged process that is part of an
 872                  * unprivileged session. */
 873                 journal_uid = c->owner_uid;
 874         else
 875                 journal_uid = 0;
 876
 877         write_to_journal(s, journal_uid, iovec, n, priority);
 878 }
 879
 880 void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) {
 881
 882         struct iovec *iovec;
 883         size_t n = 0, k, m;
 884         va_list ap;
 885         int r;
 886
 887         assert(s);
 888         assert(format);
 889
 890         m = N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS + client_context_extra_fields_n_iovec(s->my_context) + N_IOVEC_OBJECT_FIELDS;
 891         iovec = newa(struct iovec, m);
 892
 893         assert_cc(3 == LOG_FAC(LOG_DAEMON));
 894         iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
 895         iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
 896
 897         iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
 898         assert_cc(6 == LOG_INFO);
 899         iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
 900
 901         if (message_id)
 902                 iovec[n++] = IOVEC_MAKE_STRING(message_id);
 903         k = n;
 904
 905         va_start(ap, format);
 906         r = log_format_iovec(iovec, m, &n, false, 0, format, ap);
 907         /* Error handling below */
 908         va_end(ap);
 909
 910         if (r >= 0)
 911                 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
 912
 913         while (k < n)
 914                 free(iovec[k++].iov_base);
 915
 916         if (r < 0) {
 917                 /* We failed to format the message. Emit a warning instead. */
 918                 char buf[LINE_MAX];
 919
 920                 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
 921
 922                 n = 3;
 923                 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
 924                 iovec[n++] = IOVEC_MAKE_STRING(buf);
 925                 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
 926         }
 927 }
 928
 929 void server_dispatch_message(
 930                 Server *s,
 931                 struct iovec *iovec, size_t n, size_t m,
 932                 ClientContext *c,
 933                 const struct timeval *tv,
 934                 int priority,
 935                 pid_t object_pid) {
 936
 937         uint64_t available = 0;
 938         int rl;
 939
 940         assert(s);
 941         assert(iovec || n == 0);
 942
 943         if (n == 0)
 944                 return;
 945
 946         if (LOG_PRI(priority) > s->max_level_store)
 947                 return;
 948
 949         /* Stop early in case the information will not be stored
 950          * in a journal. */
 951         if (s->storage == STORAGE_NONE)
 952                 return;
 953
 954         if (c && c->unit) {
 955                 (void) determine_space(s, &available, NULL);
 956
 957                 rl = journal_rate_limit_test(s->rate_limit, c->unit, priority & LOG_PRIMASK, available);
 958                 if (rl == 0)
 959                         return;
 960
 961                 /* Write a suppression message if we suppressed something */
 962                 if (rl > 1)
 963                         server_driver_message(s, c->pid,
 964                                               "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
 965                                               LOG_MESSAGE("Suppressed %i messages from %s", rl - 1, c->unit),
 966                                               "N_DROPPED=%i", rl - 1,
 967                                               NULL);
 968         }
 969
 970         dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
 971 }
 972
 973 int server_flush_to_var(Server *s, bool require_flag_file) {
 974         sd_id128_t machine;
 975         sd_journal *j = NULL;
 976         char ts[FORMAT_TIMESPAN_MAX];
 977         usec_t start;
 978         unsigned n = 0;
 979         int r;
 980
 981         assert(s);
 982
 983         if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
 984                 return 0;
 985
 986         if (!s->runtime_journal)
 987                 return 0;
 988
 989         if (require_flag_file && !flushed_flag_is_set())
 990                 return 0;
 991
 992         (void) system_journal_open(s, true);
 993
 994         if (!s->system_journal)
 995                 return 0;
 996
 997         log_debug("Flushing to /var...");
 998
 999         start = now(CLOCK_MONOTONIC);
1000
1001         r = sd_id128_get_machine(&machine);
1002         if (r < 0)
1003                 return r;
1004
1005         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1006         if (r < 0)
1007                 return log_error_errno(r, "Failed to read runtime journal: %m");
1008
1009         sd_journal_set_data_threshold(j, 0);
1010
1011         SD_JOURNAL_FOREACH(j) {
1012                 Object *o = NULL;
1013                 JournalFile *f;
1014
1015                 f = j->current_file;
1016                 assert(f && f->current_offset > 0);
1017
1018                 n++;
1019
1020                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1021                 if (r < 0) {
1022                         log_error_errno(r, "Can't read entry: %m");
1023                         goto finish;
1024                 }
1025
1026                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1027                 if (r >= 0)
1028                         continue;
1029
1030                 if (!shall_try_append_again(s->system_journal, r)) {
1031                         log_error_errno(r, "Can't write entry: %m");
1032                         goto finish;
1033                 }
1034
1035                 server_rotate(s);
1036                 server_vacuum(s, false);
1037
1038                 if (!s->system_journal) {
1039                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1040                         r = -EIO;
1041                         goto finish;
1042                 }
1043
1044                 log_debug("Retrying write.");
1045                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1046                 if (r < 0) {
1047                         log_error_errno(r, "Can't write entry: %m");
1048                         goto finish;
1049                 }
1050         }
1051
1052         r = 0;
1053
1054 finish:
1055         journal_file_post_change(s->system_journal);
1056
1057         s->runtime_journal = journal_file_close(s->runtime_journal);
1058
1059         if (r >= 0)
1060                 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1061
1062         sd_journal_close(j);
1063
1064         server_driver_message(s, 0, NULL,
1065                               LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1066                                           format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1067                                           n),
1068                               NULL);
1069
1070         return r;
1071 }
1072
1073 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1074         Server *s = userdata;
1075         struct ucred *ucred = NULL;
1076         struct timeval *tv = NULL;
1077         struct cmsghdr *cmsg;
1078         char *label = NULL;
1079         size_t label_len = 0, m;
1080         struct iovec iovec;
1081         ssize_t n;
1082         int *fds = NULL, v = 0;
1083         unsigned n_fds = 0;
1084
1085         union {
1086                 struct cmsghdr cmsghdr;
1087
1088                 /* We use NAME_MAX space for the SELinux label
1089                  * here. The kernel currently enforces no
1090                  * limit, but according to suggestions from
1091                  * the SELinux people this will change and it
1092                  * will probably be identical to NAME_MAX. For
1093                  * now we use that, but this should be updated
1094                  * one day when the final limit is known. */
1095                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1096                             CMSG_SPACE(sizeof(struct timeval)) +
1097                             CMSG_SPACE(sizeof(int)) + /* fd */
1098                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
1099         } control = {};
1100
1101         union sockaddr_union sa = {};
1102
1103         struct msghdr msghdr = {
1104                 .msg_iov = &iovec,
1105                 .msg_iovlen = 1,
1106                 .msg_control = &control,
1107                 .msg_controllen = sizeof(control),
1108                 .msg_name = &sa,
1109                 .msg_namelen = sizeof(sa),
1110         };
1111
1112         assert(s);
1113         assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1114
1115         if (revents != EPOLLIN) {
1116                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1117                 return -EIO;
1118         }
1119
1120         /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1121          * it.) */
1122         (void) ioctl(fd, SIOCINQ, &v);
1123
1124         /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1125         m = PAGE_ALIGN(MAX3((size_t) v + 1,
1126                             (size_t) LINE_MAX,
1127                             ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1128
1129         if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1130                 return log_oom();
1131
1132         iovec.iov_base = s->buffer;
1133         iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1134
1135         n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1136         if (n < 0) {
1137                 if (IN_SET(errno, EINTR, EAGAIN))
1138                         return 0;
1139
1140                 return log_error_errno(errno, "recvmsg() failed: %m");
1141         }
1142
1143         CMSG_FOREACH(cmsg, &msghdr) {
1144
1145                 if (cmsg->cmsg_level == SOL_SOCKET &&
1146                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1147                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1148                         ucred = (struct ucred*) CMSG_DATA(cmsg);
1149                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1150                          cmsg->cmsg_type == SCM_SECURITY) {
1151                         label = (char*) CMSG_DATA(cmsg);
1152                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
1153                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1154                            cmsg->cmsg_type == SO_TIMESTAMP &&
1155                            cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1156                         tv = (struct timeval*) CMSG_DATA(cmsg);
1157                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1158                          cmsg->cmsg_type == SCM_RIGHTS) {
1159                         fds = (int*) CMSG_DATA(cmsg);
1160                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1161                 }
1162         }
1163
1164         /* And a trailing NUL, just in case */
1165         s->buffer[n] = 0;
1166
1167         if (fd == s->syslog_fd) {
1168                 if (n > 0 && n_fds == 0)
1169                         server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1170                 else if (n_fds > 0)
1171                         log_warning("Got file descriptors via syslog socket. Ignoring.");
1172
1173         } else if (fd == s->native_fd) {
1174                 if (n > 0 && n_fds == 0)
1175                         server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1176                 else if (n == 0 && n_fds == 1)
1177                         server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1178                 else if (n_fds > 0)
1179                         log_warning("Got too many file descriptors via native socket. Ignoring.");
1180
1181         } else {
1182                 assert(fd == s->audit_fd);
1183
1184                 if (n > 0 && n_fds == 0)
1185                         server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1186                 else if (n_fds > 0)
1187                         log_warning("Got file descriptors via audit socket. Ignoring.");
1188         }
1189
1190         close_many(fds, n_fds);
1191         return 0;
1192 }
1193
1194 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1195         Server *s = userdata;
1196         int r;
1197
1198         assert(s);
1199
1200         log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1201
1202         (void) server_flush_to_var(s, false);
1203         server_sync(s);
1204         server_vacuum(s, false);
1205
1206         r = touch("/run/systemd/journal/flushed");
1207         if (r < 0)
1208                 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1209
1210         server_space_usage_message(s, NULL);
1211         return 0;
1212 }
1213
1214 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1215         Server *s = userdata;
1216         int r;
1217
1218         assert(s);
1219
1220         log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1221         server_rotate(s);
1222         server_vacuum(s, true);
1223
1224         if (s->system_journal)
1225                 patch_min_use(&s->system_storage);
1226         if (s->runtime_journal)
1227                 patch_min_use(&s->runtime_storage);
1228
1229         /* Let clients know when the most recent rotation happened. */
1230         r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1231         if (r < 0)
1232                 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1233
1234         return 0;
1235 }
1236
1237 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1238         Server *s = userdata;
1239
1240         assert(s);
1241
1242         log_received_signal(LOG_INFO, si);
1243
1244         sd_event_exit(s->event, 0);
1245         return 0;
1246 }
1247
1248 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1249         Server *s = userdata;
1250         int r;
1251
1252         assert(s);
1253
1254         log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1255
1256         server_sync(s);
1257
1258         /* Let clients know when the most recent sync happened. */
1259         r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1260         if (r < 0)
1261                 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1262
1263         return 0;
1264 }
1265
1266 static int setup_signals(Server *s) {
1267         int r;
1268
1269         assert(s);
1270
1271         assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1272
1273         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1274         if (r < 0)
1275                 return r;
1276
1277         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1278         if (r < 0)
1279                 return r;
1280
1281         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1282         if (r < 0)
1283                 return r;
1284
1285         /* Let's process SIGTERM late, so that we flush all queued
1286          * messages to disk before we exit */
1287         r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1288         if (r < 0)
1289                 return r;
1290
1291         /* When journald is invoked on the terminal (when debugging),
1292          * it's useful if C-c is handled equivalent to SIGTERM. */
1293         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1294         if (r < 0)
1295                 return r;
1296
1297         r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1298         if (r < 0)
1299                 return r;
1300
1301         /* SIGRTMIN+1 causes an immediate sync. We process this very
1302          * late, so that everything else queued at this point is
1303          * really written to disk. Clients can watch
1304          * /run/systemd/journal/synced with inotify until its mtime
1305          * changes to see when a sync happened. */
1306         r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1307         if (r < 0)
1308                 return r;
1309
1310         r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1311         if (r < 0)
1312                 return r;
1313
1314         return 0;
1315 }
1316
1317 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1318         Server *s = data;
1319         int r;
1320
1321         assert(s);
1322
1323         if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1324
1325                 r = value ? parse_boolean(value) : true;
1326                 if (r < 0)
1327                         log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1328                 else
1329                         s->forward_to_syslog = r;
1330
1331         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1332
1333                 r = value ? parse_boolean(value) : true;
1334                 if (r < 0)
1335                         log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1336                 else
1337                         s->forward_to_kmsg = r;
1338
1339         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1340
1341                 r = value ? parse_boolean(value) : true;
1342                 if (r < 0)
1343                         log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1344                 else
1345                         s->forward_to_console = r;
1346
1347         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1348
1349                 r = value ? parse_boolean(value) : true;
1350                 if (r < 0)
1351                         log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1352                 else
1353                         s->forward_to_wall = r;
1354
1355         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1356
1357                 if (proc_cmdline_value_missing(key, value))
1358                         return 0;
1359
1360                 r = log_level_from_string(value);
1361                 if (r < 0)
1362                         log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1363                 else
1364                         s->max_level_console = r;
1365
1366         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1367
1368                 if (proc_cmdline_value_missing(key, value))
1369                         return 0;
1370
1371                 r = log_level_from_string(value);
1372                 if (r < 0)
1373                         log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1374                 else
1375                         s->max_level_store = r;
1376
1377         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1378
1379                 if (proc_cmdline_value_missing(key, value))
1380                         return 0;
1381
1382                 r = log_level_from_string(value);
1383                 if (r < 0)
1384                         log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1385                 else
1386                         s->max_level_syslog = r;
1387
1388         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1389
1390                 if (proc_cmdline_value_missing(key, value))
1391                         return 0;
1392
1393                 r = log_level_from_string(value);
1394                 if (r < 0)
1395                         log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1396                 else
1397                         s->max_level_kmsg = r;
1398
1399         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1400
1401                 if (proc_cmdline_value_missing(key, value))
1402                         return 0;
1403
1404                 r = log_level_from_string(value);
1405                 if (r < 0)
1406                         log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1407                 else
1408                         s->max_level_wall = r;
1409
1410         } else if (startswith(key, "systemd.journald"))
1411                 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
1412
1413         /* do not warn about state here, since probably systemd already did */
1414         return 0;
1415 }
1416
1417 static int server_parse_config_file(Server *s) {
1418         assert(s);
1419
1420         return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
1421                                         CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1422                                         "Journal\0",
1423                                         config_item_perf_lookup, journald_gperf_lookup,
1424                                         CONFIG_PARSE_WARN, s);
1425 }
1426
1427 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1428         Server *s = userdata;
1429
1430         assert(s);
1431
1432         server_sync(s);
1433         return 0;
1434 }
1435
1436 int server_schedule_sync(Server *s, int priority) {
1437         int r;
1438
1439         assert(s);
1440
1441         if (priority <= LOG_CRIT) {
1442                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1443                 server_sync(s);
1444                 return 0;
1445         }
1446
1447         if (s->sync_scheduled)
1448                 return 0;
1449
1450         if (s->sync_interval_usec > 0) {
1451                 usec_t when;
1452
1453                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1454                 if (r < 0)
1455                         return r;
1456
1457                 when += s->sync_interval_usec;
1458
1459                 if (!s->sync_event_source) {
1460                         r = sd_event_add_time(
1461                                         s->event,
1462                                         &s->sync_event_source,
1463                                         CLOCK_MONOTONIC,
1464                                         when, 0,
1465                                         server_dispatch_sync, s);
1466                         if (r < 0)
1467                                 return r;
1468
1469                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1470                 } else {
1471                         r = sd_event_source_set_time(s->sync_event_source, when);
1472                         if (r < 0)
1473                                 return r;
1474
1475                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1476                 }
1477                 if (r < 0)
1478                         return r;
1479
1480                 s->sync_scheduled = true;
1481         }
1482
1483         return 0;
1484 }
1485
1486 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1487         Server *s = userdata;
1488
1489         assert(s);
1490
1491         server_cache_hostname(s);
1492         return 0;
1493 }
1494
1495 static int server_open_hostname(Server *s) {
1496         int r;
1497
1498         assert(s);
1499
1500         s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1501         if (s->hostname_fd < 0)
1502                 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1503
1504         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1505         if (r < 0) {
1506                 /* kernels prior to 3.2 don't support polling this file. Ignore
1507                  * the failure. */
1508                 if (r == -EPERM) {
1509                         log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1510                         s->hostname_fd = safe_close(s->hostname_fd);
1511                         return 0;
1512                 }
1513
1514                 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1515         }
1516
1517         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1518         if (r < 0)
1519                 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1520
1521         return 0;
1522 }
1523
1524 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1525         Server *s = userdata;
1526         int r;
1527
1528         assert(s);
1529         assert(s->notify_event_source == es);
1530         assert(s->notify_fd == fd);
1531
1532         /* The $NOTIFY_SOCKET is writable again, now send exactly one
1533          * message on it. Either it's the watchdog event, the initial
1534          * READY=1 event or an stdout stream event. If there's nothing
1535          * to write anymore, turn our event source off. The next time
1536          * there's something to send it will be turned on again. */
1537
1538         if (!s->sent_notify_ready) {
1539                 static const char p[] =
1540                         "READY=1\n"
1541                         "STATUS=Processing requests...";
1542                 ssize_t l;
1543
1544                 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1545                 if (l < 0) {
1546                         if (errno == EAGAIN)
1547                                 return 0;
1548
1549                         return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1550                 }
1551
1552                 s->sent_notify_ready = true;
1553                 log_debug("Sent READY=1 notification.");
1554
1555         } else if (s->send_watchdog) {
1556
1557                 static const char p[] =
1558                         "WATCHDOG=1";
1559
1560                 ssize_t l;
1561
1562                 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1563                 if (l < 0) {
1564                         if (errno == EAGAIN)
1565                                 return 0;
1566
1567                         return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1568                 }
1569
1570                 s->send_watchdog = false;
1571                 log_debug("Sent WATCHDOG=1 notification.");
1572
1573         } else if (s->stdout_streams_notify_queue)
1574                 /* Dispatch one stream notification event */
1575                 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1576
1577         /* Leave us enabled if there's still more to do. */
1578         if (s->send_watchdog || s->stdout_streams_notify_queue)
1579                 return 0;
1580
1581         /* There was nothing to do anymore, let's turn ourselves off. */
1582         r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1583         if (r < 0)
1584                 return log_error_errno(r, "Failed to turn off notify event source: %m");
1585
1586         return 0;
1587 }
1588
1589 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1590         Server *s = userdata;
1591         int r;
1592
1593         assert(s);
1594
1595         s->send_watchdog = true;
1596
1597         r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1598         if (r < 0)
1599                 log_warning_errno(r, "Failed to turn on notify event source: %m");
1600
1601         r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1602         if (r < 0)
1603                 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1604
1605         r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1606         if (r < 0)
1607                 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1608
1609         return 0;
1610 }
1611
1612 static int server_connect_notify(Server *s) {
1613         union sockaddr_union sa = {
1614                 .un.sun_family = AF_UNIX,
1615         };
1616         const char *e;
1617         int r;
1618
1619         assert(s);
1620         assert(s->notify_fd < 0);
1621         assert(!s->notify_event_source);
1622
1623         /*
1624           So here's the problem: we'd like to send notification
1625           messages to PID 1, but we cannot do that via sd_notify(),
1626           since that's synchronous, and we might end up blocking on
1627           it. Specifically: given that PID 1 might block on
1628           dbus-daemon during IPC, and dbus-daemon is logging to us,
1629           and might hence block on us, we might end up in a deadlock
1630           if we block on sending PID 1 notification messages — by
1631           generating a full blocking circle. To avoid this, let's
1632           create a non-blocking socket, and connect it to the
1633           notification socket, and then wait for POLLOUT before we
1634           send anything. This should efficiently avoid any deadlocks,
1635           as we'll never block on PID 1, hence PID 1 can safely block
1636           on dbus-daemon which can safely block on us again.
1637
1638           Don't think that this issue is real? It is, see:
1639           https://github.com/systemd/systemd/issues/1505
1640         */
1641
1642         e = getenv("NOTIFY_SOCKET");
1643         if (!e)
1644                 return 0;
1645
1646         if (!IN_SET(e[0], '@', '/') || e[1] == 0) {
1647                 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1648                 return -EINVAL;
1649         }
1650
1651         if (strlen(e) > sizeof(sa.un.sun_path)) {
1652                 log_error("NOTIFY_SOCKET path too long: %s", e);
1653                 return -EINVAL;
1654         }
1655
1656         s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1657         if (s->notify_fd < 0)
1658                 return log_error_errno(errno, "Failed to create notify socket: %m");
1659
1660         (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1661
1662         strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1663         if (sa.un.sun_path[0] == '@')
1664                 sa.un.sun_path[0] = 0;
1665
1666         r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
1667         if (r < 0)
1668                 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1669
1670         r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1671         if (r < 0)
1672                 return log_error_errno(r, "Failed to watch notification socket: %m");
1673
1674         if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1675                 s->send_watchdog = true;
1676
1677                 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1678                 if (r < 0)
1679                         return log_error_errno(r, "Failed to add watchdog time event: %m");
1680         }
1681
1682         /* This should fire pretty soon, which we'll use to send the
1683          * READY=1 event. */
1684
1685         return 0;
1686 }
1687
1688 int server_init(Server *s) {
1689         _cleanup_fdset_free_ FDSet *fds = NULL;
1690         int n, r, fd;
1691         bool no_sockets;
1692
1693         assert(s);
1694
1695         zero(*s);
1696         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1697         s->compress = true;
1698         s->seal = true;
1699         s->read_kmsg = true;
1700
1701         s->watchdog_usec = USEC_INFINITY;
1702
1703         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1704         s->sync_scheduled = false;
1705
1706         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1707         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1708
1709         s->forward_to_wall = true;
1710
1711         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1712
1713         s->max_level_store = LOG_DEBUG;
1714         s->max_level_syslog = LOG_DEBUG;
1715         s->max_level_kmsg = LOG_NOTICE;
1716         s->max_level_console = LOG_INFO;
1717         s->max_level_wall = LOG_EMERG;
1718
1719         s->line_max = DEFAULT_LINE_MAX;
1720
1721         journal_reset_metrics(&s->system_storage.metrics);
1722         journal_reset_metrics(&s->runtime_storage.metrics);
1723
1724         server_parse_config_file(s);
1725
1726         r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1727         if (r < 0)
1728                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
1729
1730         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1731                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1732                           s->rate_limit_interval, s->rate_limit_burst);
1733                 s->rate_limit_interval = s->rate_limit_burst = 0;
1734         }
1735
1736         (void) mkdir_p("/run/systemd/journal", 0755);
1737
1738         s->user_journals = ordered_hashmap_new(NULL);
1739         if (!s->user_journals)
1740                 return log_oom();
1741
1742         s->mmap = mmap_cache_new();
1743         if (!s->mmap)
1744                 return log_oom();
1745
1746         s->deferred_closes = set_new(NULL);
1747         if (!s->deferred_closes)
1748                 return log_oom();
1749
1750         r = sd_event_default(&s->event);
1751         if (r < 0)
1752                 return log_error_errno(r, "Failed to create event loop: %m");
1753
1754         n = sd_listen_fds(true);
1755         if (n < 0)
1756                 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1757
1758         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1759
1760                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1761
1762                         if (s->native_fd >= 0) {
1763                                 log_error("Too many native sockets passed.");
1764                                 return -EINVAL;
1765                         }
1766
1767                         s->native_fd = fd;
1768
1769                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1770
1771                         if (s->stdout_fd >= 0) {
1772                                 log_error("Too many stdout sockets passed.");
1773                                 return -EINVAL;
1774                         }
1775
1776                         s->stdout_fd = fd;
1777
1778                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1779                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1780
1781                         if (s->syslog_fd >= 0) {
1782                                 log_error("Too many /dev/log sockets passed.");
1783                                 return -EINVAL;
1784                         }
1785
1786                         s->syslog_fd = fd;
1787
1788                 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1789
1790                         if (s->audit_fd >= 0) {
1791                                 log_error("Too many audit sockets passed.");
1792                                 return -EINVAL;
1793                         }
1794
1795                         s->audit_fd = fd;
1796
1797                 } else {
1798
1799                         if (!fds) {
1800                                 fds = fdset_new();
1801                                 if (!fds)
1802                                         return log_oom();
1803                         }
1804
1805                         r = fdset_put(fds, fd);
1806                         if (r < 0)
1807                                 return log_oom();
1808                 }
1809         }
1810
1811         /* Try to restore streams, but don't bother if this fails */
1812         (void) server_restore_streams(s, fds);
1813
1814         if (fdset_size(fds) > 0) {
1815                 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1816                 fds = fdset_free(fds);
1817         }
1818
1819         no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1820
1821         /* always open stdout, syslog, native, and kmsg sockets */
1822
1823         /* systemd-journald.socket: /run/systemd/journal/stdout */
1824         r = server_open_stdout_socket(s);
1825         if (r < 0)
1826                 return r;
1827
1828         /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1829         r = server_open_syslog_socket(s);
1830         if (r < 0)
1831                 return r;
1832
1833         /* systemd-journald.socket: /run/systemd/journal/socket */
1834         r = server_open_native_socket(s);
1835         if (r < 0)
1836                 return r;
1837
1838         /* /dev/kmsg */
1839         r = server_open_dev_kmsg(s);
1840         if (r < 0)
1841                 return r;
1842
1843         /* Unless we got *some* sockets and not audit, open audit socket */
1844         if (s->audit_fd >= 0 || no_sockets) {
1845                 r = server_open_audit(s);
1846                 if (r < 0)
1847                         return r;
1848         }
1849
1850         r = server_open_kernel_seqnum(s);
1851         if (r < 0)
1852                 return r;
1853
1854         r = server_open_hostname(s);
1855         if (r < 0)
1856                 return r;
1857
1858         r = setup_signals(s);
1859         if (r < 0)
1860                 return r;
1861
1862         s->udev = udev_new();
1863         if (!s->udev)
1864                 return -ENOMEM;
1865
1866         s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1867         if (!s->rate_limit)
1868                 return -ENOMEM;
1869
1870         r = cg_get_root_path(&s->cgroup_root);
1871         if (r < 0)
1872                 return r;
1873
1874         server_cache_hostname(s);
1875         server_cache_boot_id(s);
1876         server_cache_machine_id(s);
1877
1878         s->runtime_storage.name = "Runtime journal";
1879         s->system_storage.name = "System journal";
1880
1881         s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
1882         s->system_storage.path  = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
1883         if (!s->runtime_storage.path || !s->system_storage.path)
1884                 return -ENOMEM;
1885
1886         (void) server_connect_notify(s);
1887
1888         (void) client_context_acquire_default(s);
1889
1890         return system_journal_open(s, false);
1891 }
1892
1893 void server_maybe_append_tags(Server *s) {
1894 #if HAVE_GCRYPT
1895         JournalFile *f;
1896         Iterator i;
1897         usec_t n;
1898
1899         n = now(CLOCK_REALTIME);
1900
1901         if (s->system_journal)
1902                 journal_file_maybe_append_tag(s->system_journal, n);
1903
1904         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1905                 journal_file_maybe_append_tag(f, n);
1906 #endif
1907 }
1908
1909 void server_done(Server *s) {
1910         assert(s);
1911
1912         set_free_with_destructor(s->deferred_closes, journal_file_close);
1913
1914         while (s->stdout_streams)
1915                 stdout_stream_free(s->stdout_streams);
1916
1917         client_context_flush_all(s);
1918
1919         if (s->system_journal)
1920                 (void) journal_file_close(s->system_journal);
1921
1922         if (s->runtime_journal)
1923                 (void) journal_file_close(s->runtime_journal);
1924
1925         ordered_hashmap_free_with_destructor(s->user_journals, journal_file_close);
1926
1927         sd_event_source_unref(s->syslog_event_source);
1928         sd_event_source_unref(s->native_event_source);
1929         sd_event_source_unref(s->stdout_event_source);
1930         sd_event_source_unref(s->dev_kmsg_event_source);
1931         sd_event_source_unref(s->audit_event_source);
1932         sd_event_source_unref(s->sync_event_source);
1933         sd_event_source_unref(s->sigusr1_event_source);
1934         sd_event_source_unref(s->sigusr2_event_source);
1935         sd_event_source_unref(s->sigterm_event_source);
1936         sd_event_source_unref(s->sigint_event_source);
1937         sd_event_source_unref(s->sigrtmin1_event_source);
1938         sd_event_source_unref(s->hostname_event_source);
1939         sd_event_source_unref(s->notify_event_source);
1940         sd_event_source_unref(s->watchdog_event_source);
1941         sd_event_unref(s->event);
1942
1943         safe_close(s->syslog_fd);
1944         safe_close(s->native_fd);
1945         safe_close(s->stdout_fd);
1946         safe_close(s->dev_kmsg_fd);
1947         safe_close(s->audit_fd);
1948         safe_close(s->hostname_fd);
1949         safe_close(s->notify_fd);
1950
1951         if (s->rate_limit)
1952                 journal_rate_limit_free(s->rate_limit);
1953
1954         if (s->kernel_seqnum)
1955                 munmap(s->kernel_seqnum, sizeof(uint64_t));
1956
1957         free(s->buffer);
1958         free(s->tty_path);
1959         free(s->cgroup_root);
1960         free(s->hostname_field);
1961         free(s->runtime_storage.path);
1962         free(s->system_storage.path);
1963
1964         if (s->mmap)
1965                 mmap_cache_unref(s->mmap);
1966
1967         udev_unref(s->udev);
1968 }
1969
1970 static const char* const storage_table[_STORAGE_MAX] = {
1971         [STORAGE_AUTO] = "auto",
1972         [STORAGE_VOLATILE] = "volatile",
1973         [STORAGE_PERSISTENT] = "persistent",
1974         [STORAGE_NONE] = "none"
1975 };
1976
1977 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1978 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1979
1980 static const char* const split_mode_table[_SPLIT_MAX] = {
1981         [SPLIT_LOGIN] = "login",
1982         [SPLIT_UID] = "uid",
1983         [SPLIT_NONE] = "none",
1984 };
1985
1986 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1987 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
1988
1989 int config_parse_line_max(
1990                 const char* unit,
1991                 const char *filename,
1992                 unsigned line,
1993                 const char *section,
1994                 unsigned section_line,
1995                 const char *lvalue,
1996                 int ltype,
1997                 const char *rvalue,
1998                 void *data,
1999                 void *userdata) {
2000
2001         size_t *sz = data;
2002         int r;
2003
2004         assert(filename);
2005         assert(lvalue);
2006         assert(rvalue);
2007         assert(data);
2008
2009         if (isempty(rvalue))
2010                 /* Empty assignment means default */
2011                 *sz = DEFAULT_LINE_MAX;
2012         else {
2013                 uint64_t v;
2014
2015                 r = parse_size(rvalue, 1024, &v);
2016                 if (r < 0) {
2017                         log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
2018                         return 0;
2019                 }
2020
2021                 if (v < 79) {
2022                         /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2023                          * terminal size is 80ch, and it might make sense to break one character before the natural
2024                          * line break would occur on that. */
2025                         log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
2026                         *sz = 79;
2027                 } else if (v > (uint64_t) (SSIZE_MAX-1)) {
2028                         /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2029                          * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2030                          * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2031                          * fail much earlier anyway. */
2032                         log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
2033                         *sz = SSIZE_MAX-1;
2034                 } else
2035                         *sz = (size_t) v;
2036         }
2037
2038         return 0;
2039 }