src/journal/journald-server.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2
   3 #if HAVE_SELINUX
   4 #include <selinux/selinux.h>
   5 #endif
   6 #include <sys/ioctl.h>
   7 #include <sys/mman.h>
   8 #include <sys/signalfd.h>
   9 #include <sys/statvfs.h>
  10 #include <linux/sockios.h>
  11
  12 #include "sd-daemon.h"
  13 #include "sd-journal.h"
  14 #include "sd-messages.h"
  15
  16 #include "acl-util.h"
  17 #include "alloc-util.h"
  18 #include "audit-util.h"
  19 #include "cgroup-util.h"
  20 #include "conf-parser.h"
  21 #include "dirent-util.h"
  22 #include "extract-word.h"
  23 #include "fd-util.h"
  24 #include "fileio.h"
  25 #include "format-util.h"
  26 #include "fs-util.h"
  27 #include "hashmap.h"
  28 #include "hostname-util.h"
  29 #include "id128-util.h"
  30 #include "io-util.h"
  31 #include "journal-authenticate.h"
  32 #include "journal-file.h"
  33 #include "journal-internal.h"
  34 #include "journal-vacuum.h"
  35 #include "journald-audit.h"
  36 #include "journald-context.h"
  37 #include "journald-kmsg.h"
  38 #include "journald-native.h"
  39 #include "journald-rate-limit.h"
  40 #include "journald-server.h"
  41 #include "journald-stream.h"
  42 #include "journald-syslog.h"
  43 #include "log.h"
  44 #include "missing_audit.h"
  45 #include "mkdir.h"
  46 #include "parse-util.h"
  47 #include "path-util.h"
  48 #include "proc-cmdline.h"
  49 #include "process-util.h"
  50 #include "rm-rf.h"
  51 #include "selinux-util.h"
  52 #include "signal-util.h"
  53 #include "socket-util.h"
  54 #include "stdio-util.h"
  55 #include "string-table.h"
  56 #include "string-util.h"
  57 #include "syslog-util.h"
  58 #include "user-record.h"
  59 #include "user-util.h"
  60
  61 #define USER_JOURNALS_MAX 1024
  62
  63 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
  64 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
  65 #define DEFAULT_RATE_LIMIT_BURST 10000
  66 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
  67
  68 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
  69
  70 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
  71
  72 /* The period to insert between posting changes for coalescing */
  73 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
  74
  75 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
  76  * for a bit of additional metadata. */
  77 #define DEFAULT_LINE_MAX (48*1024)
  78
  79 #define DEFERRED_CLOSES_MAX (4096)
  80
  81 #define IDLE_TIMEOUT_USEC (30*USEC_PER_SEC)
  82
  83 static int determine_path_usage(
  84                 Server *s,
  85                 const char *path,
  86                 uint64_t *ret_used,
  87                 uint64_t *ret_free) {
  88
  89         _cleanup_closedir_ DIR *d = NULL;
  90         struct dirent *de;
  91         struct statvfs ss;
  92
  93         assert(s);
  94         assert(path);
  95         assert(ret_used);
  96         assert(ret_free);
  97
  98         d = opendir(path);
  99         if (!d)
 100                 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
 101                                       errno, "Failed to open %s: %m", path);
 102
 103         if (fstatvfs(dirfd(d), &ss) < 0)
 104                 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
 105
 106         *ret_free = ss.f_bsize * ss.f_bavail;
 107         *ret_used = 0;
 108         FOREACH_DIRENT_ALL(de, d, break) {
 109                 struct stat st;
 110
 111                 if (!endswith(de->d_name, ".journal") &&
 112                     !endswith(de->d_name, ".journal~"))
 113                         continue;
 114
 115                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
 116                         log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
 117                         continue;
 118                 }
 119
 120                 if (!S_ISREG(st.st_mode))
 121                         continue;
 122
 123                 *ret_used += (uint64_t) st.st_blocks * 512UL;
 124         }
 125
 126         return 0;
 127 }
 128
 129 static void cache_space_invalidate(JournalStorageSpace *space) {
 130         zero(*space);
 131 }
 132
 133 static int cache_space_refresh(Server *s, JournalStorage *storage) {
 134         JournalStorageSpace *space;
 135         JournalMetrics *metrics;
 136         uint64_t vfs_used, vfs_avail, avail;
 137         usec_t ts;
 138         int r;
 139
 140         assert(s);
 141
 142         metrics = &storage->metrics;
 143         space = &storage->space;
 144
 145         ts = now(CLOCK_MONOTONIC);
 146
 147         if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
 148                 return 0;
 149
 150         r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
 151         if (r < 0)
 152                 return r;
 153
 154         space->vfs_used = vfs_used;
 155         space->vfs_available = vfs_avail;
 156
 157         avail = LESS_BY(vfs_avail, metrics->keep_free);
 158
 159         space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
 160         space->available = LESS_BY(space->limit, vfs_used);
 161         space->timestamp = ts;
 162         return 1;
 163 }
 164
 165 static void patch_min_use(JournalStorage *storage) {
 166         assert(storage);
 167
 168         /* Let's bump the min_use limit to the current usage on disk. We do
 169          * this when starting up and first opening the journal files. This way
 170          * sudden spikes in disk usage will not cause journald to vacuum files
 171          * without bounds. Note that this means that only a restart of journald
 172          * will make it reset this value. */
 173
 174         storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
 175 }
 176
 177 static JournalStorage* server_current_storage(Server *s) {
 178         assert(s);
 179
 180         return s->system_journal ? &s->system_storage : &s->runtime_storage;
 181 }
 182
 183 static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
 184         JournalStorage *js;
 185         int r;
 186
 187         assert(s);
 188
 189         js = server_current_storage(s);
 190
 191         r = cache_space_refresh(s, js);
 192         if (r >= 0) {
 193                 if (available)
 194                         *available = js->space.available;
 195                 if (limit)
 196                         *limit = js->space.limit;
 197         }
 198         return r;
 199 }
 200
 201 void server_space_usage_message(Server *s, JournalStorage *storage) {
 202         char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
 203              fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
 204         JournalMetrics *metrics;
 205
 206         assert(s);
 207
 208         if (!storage)
 209                 storage = server_current_storage(s);
 210
 211         if (cache_space_refresh(s, storage) < 0)
 212                 return;
 213
 214         metrics = &storage->metrics;
 215         format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
 216         format_bytes(fb2, sizeof(fb2), metrics->max_use);
 217         format_bytes(fb3, sizeof(fb3), metrics->keep_free);
 218         format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
 219         format_bytes(fb5, sizeof(fb5), storage->space.limit);
 220         format_bytes(fb6, sizeof(fb6), storage->space.available);
 221
 222         server_driver_message(s, 0,
 223                               "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
 224                               LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
 225                                           storage->name, storage->path, fb1, fb5, fb6),
 226                               "JOURNAL_NAME=%s", storage->name,
 227                               "JOURNAL_PATH=%s", storage->path,
 228                               "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
 229                               "CURRENT_USE_PRETTY=%s", fb1,
 230                               "MAX_USE=%"PRIu64, metrics->max_use,
 231                               "MAX_USE_PRETTY=%s", fb2,
 232                               "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
 233                               "DISK_KEEP_FREE_PRETTY=%s", fb3,
 234                               "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
 235                               "DISK_AVAILABLE_PRETTY=%s", fb4,
 236                               "LIMIT=%"PRIu64, storage->space.limit,
 237                               "LIMIT_PRETTY=%s", fb5,
 238                               "AVAILABLE=%"PRIu64, storage->space.available,
 239                               "AVAILABLE_PRETTY=%s", fb6,
 240                               NULL);
 241 }
 242
 243 static bool uid_for_system_journal(uid_t uid) {
 244
 245         /* Returns true if the specified UID shall get its data stored in the system journal*/
 246
 247         return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY;
 248 }
 249
 250 static void server_add_acls(JournalFile *f, uid_t uid) {
 251         assert(f);
 252
 253 #if HAVE_ACL
 254         int r;
 255
 256         if (uid_for_system_journal(uid))
 257                 return;
 258
 259         r = fd_add_uid_acl_permission(f->fd, uid, ACL_READ);
 260         if (r < 0)
 261                 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
 262 #endif
 263 }
 264
 265 static int open_journal(
 266                 Server *s,
 267                 bool reliably,
 268                 const char *fname,
 269                 int flags,
 270                 bool seal,
 271                 JournalMetrics *metrics,
 272                 JournalFile **ret) {
 273
 274         _cleanup_(journal_file_closep) JournalFile *f = NULL;
 275         int r;
 276
 277         assert(s);
 278         assert(fname);
 279         assert(ret);
 280
 281         if (reliably)
 282                 r = journal_file_open_reliably(fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes,
 283                                                seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
 284         else
 285                 r = journal_file_open(-1, fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes, seal,
 286                                       metrics, s->mmap, s->deferred_closes, NULL, &f);
 287
 288         if (r < 0)
 289                 return r;
 290
 291         r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
 292         if (r < 0)
 293                 return r;
 294
 295         *ret = TAKE_PTR(f);
 296         return r;
 297 }
 298
 299 static bool flushed_flag_is_set(Server *s) {
 300         const char *fn;
 301
 302         assert(s);
 303
 304         /* We don't support the "flushing" concept for namespace instances, we assume them to always have
 305          * access to /var */
 306         if (s->namespace)
 307                 return true;
 308
 309         fn = strjoina(s->runtime_directory, "/flushed");
 310         return access(fn, F_OK) >= 0;
 311 }
 312
 313 static int system_journal_open(Server *s, bool flush_requested, bool relinquish_requested) {
 314         const char *fn;
 315         int r = 0;
 316
 317         if (!s->system_journal &&
 318             IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
 319             (flush_requested || flushed_flag_is_set(s)) &&
 320             !relinquish_requested) {
 321
 322                 /* If in auto mode: first try to create the machine path, but not the prefix.
 323                  *
 324                  * If in persistent mode: create /var/log/journal and the machine path */
 325
 326                 if (s->storage == STORAGE_PERSISTENT)
 327                         (void) mkdir_parents(s->system_storage.path, 0755);
 328
 329                 (void) mkdir(s->system_storage.path, 0755);
 330
 331                 fn = strjoina(s->system_storage.path, "/system.journal");
 332                 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
 333                 if (r >= 0) {
 334                         server_add_acls(s->system_journal, 0);
 335                         (void) cache_space_refresh(s, &s->system_storage);
 336                         patch_min_use(&s->system_storage);
 337                 } else {
 338                         if (!IN_SET(r, -ENOENT, -EROFS))
 339                                 log_warning_errno(r, "Failed to open system journal: %m");
 340
 341                         r = 0;
 342                 }
 343
 344                 /* If the runtime journal is open, and we're post-flush, we're recovering from a failed
 345                  * system journal rotate (ENOSPC) for which the runtime journal was reopened.
 346                  *
 347                  * Perform an implicit flush to var, leaving the runtime journal closed, now that the system
 348                  * journal is back.
 349                  */
 350                 if (!flush_requested)
 351                         (void) server_flush_to_var(s, true);
 352         }
 353
 354         if (!s->runtime_journal &&
 355             (s->storage != STORAGE_NONE)) {
 356
 357                 fn = strjoina(s->runtime_storage.path, "/system.journal");
 358
 359                 if (s->system_journal && !relinquish_requested) {
 360
 361                         /* Try to open the runtime journal, but only
 362                          * if it already exists, so that we can flush
 363                          * it into the system journal */
 364
 365                         r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
 366                         if (r < 0) {
 367                                 if (r != -ENOENT)
 368                                         log_warning_errno(r, "Failed to open runtime journal: %m");
 369
 370                                 r = 0;
 371                         }
 372
 373                 } else {
 374
 375                         /* OK, we really need the runtime journal, so create it if necessary. */
 376
 377                         (void) mkdir_parents(s->runtime_storage.path, 0755);
 378                         (void) mkdir(s->runtime_storage.path, 0750);
 379
 380                         r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
 381                         if (r < 0)
 382                                 return log_error_errno(r, "Failed to open runtime journal: %m");
 383                 }
 384
 385                 if (s->runtime_journal) {
 386                         server_add_acls(s->runtime_journal, 0);
 387                         (void) cache_space_refresh(s, &s->runtime_storage);
 388                         patch_min_use(&s->runtime_storage);
 389                 }
 390         }
 391
 392         return r;
 393 }
 394
 395 static JournalFile* find_journal(Server *s, uid_t uid) {
 396         _cleanup_free_ char *p = NULL;
 397         JournalFile *f;
 398         int r;
 399
 400         assert(s);
 401
 402         /* A rotate that fails to create the new journal (ENOSPC) leaves the rotated journal as NULL.  Unless
 403          * we revisit opening, even after space is made available we'll continue to return NULL indefinitely.
 404          *
 405          * system_journal_open() is a noop if the journals are already open, so we can just call it here to
 406          * recover from failed rotates (or anything else that's left the journals as NULL).
 407          *
 408          * Fixes https://github.com/systemd/systemd/issues/3968 */
 409         (void) system_journal_open(s, false, false);
 410
 411         /* We split up user logs only on /var, not on /run. If the runtime file is open, we write to it
 412          * exclusively, in order to guarantee proper order as soon as we flush /run to /var and close the
 413          * runtime file. */
 414
 415         if (s->runtime_journal)
 416                 return s->runtime_journal;
 417
 418         if (uid_for_system_journal(uid))
 419                 return s->system_journal;
 420
 421         f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
 422         if (f)
 423                 return f;
 424
 425         if (asprintf(&p, "%s/user-" UID_FMT ".journal", s->system_storage.path, uid) < 0) {
 426                 log_oom();
 427                 return s->system_journal;
 428         }
 429
 430         /* Too many open? Then let's close one (or more) */
 431         while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
 432                 assert_se(f = ordered_hashmap_steal_first(s->user_journals));
 433                 (void) journal_file_close(f);
 434         }
 435
 436         r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
 437         if (r < 0)
 438                 return s->system_journal;
 439
 440         r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
 441         if (r < 0) {
 442                 (void) journal_file_close(f);
 443                 return s->system_journal;
 444         }
 445
 446         server_add_acls(f, uid);
 447         return f;
 448 }
 449
 450 static int do_rotate(
 451                 Server *s,
 452                 JournalFile **f,
 453                 const char* name,
 454                 bool seal,
 455                 uint32_t uid) {
 456
 457         int r;
 458         assert(s);
 459
 460         if (!*f)
 461                 return -EINVAL;
 462
 463         r = journal_file_rotate(f, s->compress.enabled, s->compress.threshold_bytes, seal, s->deferred_closes);
 464         if (r < 0) {
 465                 if (*f)
 466                         return log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
 467                 else
 468                         return log_error_errno(r, "Failed to create new %s journal: %m", name);
 469         }
 470
 471         server_add_acls(*f, uid);
 472         return r;
 473 }
 474
 475 static void server_process_deferred_closes(Server *s) {
 476         JournalFile *f;
 477
 478         /* Perform any deferred closes which aren't still offlining. */
 479         SET_FOREACH(f, s->deferred_closes) {
 480                 if (journal_file_is_offlining(f))
 481                         continue;
 482
 483                 (void) set_remove(s->deferred_closes, f);
 484                 (void) journal_file_close(f);
 485         }
 486 }
 487
 488 static void server_vacuum_deferred_closes(Server *s) {
 489         assert(s);
 490
 491         /* Make some room in the deferred closes list, so that it doesn't grow without bounds */
 492         if (set_size(s->deferred_closes) < DEFERRED_CLOSES_MAX)
 493                 return;
 494
 495         /* Let's first remove all journal files that might already have completed closing */
 496         server_process_deferred_closes(s);
 497
 498         /* And now, let's close some more until we reach the limit again. */
 499         while (set_size(s->deferred_closes) >= DEFERRED_CLOSES_MAX) {
 500                 JournalFile *f;
 501
 502                 assert_se(f = set_steal_first(s->deferred_closes));
 503                 journal_file_close(f);
 504         }
 505 }
 506
 507 static int vacuum_offline_user_journals(Server *s) {
 508         _cleanup_closedir_ DIR *d = NULL;
 509         int r;
 510
 511         assert(s);
 512
 513         d = opendir(s->system_storage.path);
 514         if (!d) {
 515                 if (errno == ENOENT)
 516                         return 0;
 517
 518                 return log_error_errno(errno, "Failed to open %s: %m", s->system_storage.path);
 519         }
 520
 521         for (;;) {
 522                 _cleanup_free_ char *u = NULL, *full = NULL;
 523                 _cleanup_close_ int fd = -1;
 524                 const char *a, *b;
 525                 struct dirent *de;
 526                 JournalFile *f;
 527                 uid_t uid;
 528
 529                 errno = 0;
 530                 de = readdir_no_dot(d);
 531                 if (!de) {
 532                         if (errno != 0)
 533                                 log_warning_errno(errno, "Failed to enumerate %s, ignoring: %m", s->system_storage.path);
 534
 535                         break;
 536                 }
 537
 538                 a = startswith(de->d_name, "user-");
 539                 if (!a)
 540                         continue;
 541                 b = endswith(de->d_name, ".journal");
 542                 if (!b)
 543                         continue;
 544
 545                 u = strndup(a, b-a);
 546                 if (!u)
 547                         return log_oom();
 548
 549                 r = parse_uid(u, &uid);
 550                 if (r < 0) {
 551                         log_debug_errno(r, "Failed to parse UID from file name '%s', ignoring: %m", de->d_name);
 552                         continue;
 553                 }
 554
 555                 /* Already rotated in the above loop? i.e. is it an open user journal? */
 556                 if (ordered_hashmap_contains(s->user_journals, UID_TO_PTR(uid)))
 557                         continue;
 558
 559                 full = path_join(s->system_storage.path, de->d_name);
 560                 if (!full)
 561                         return log_oom();
 562
 563                 fd = openat(dirfd(d), de->d_name, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK);
 564                 if (fd < 0) {
 565                         log_full_errno(IN_SET(errno, ELOOP, ENOENT) ? LOG_DEBUG : LOG_WARNING, errno,
 566                                        "Failed to open journal file '%s' for rotation: %m", full);
 567                         continue;
 568                 }
 569
 570                 /* Make some room in the set of deferred close()s */
 571                 server_vacuum_deferred_closes(s);
 572
 573                 /* Open the file briefly, so that we can archive it */
 574                 r = journal_file_open(fd,
 575                                       full,
 576                                       O_RDWR,
 577                                       0640,
 578                                       s->compress.enabled,
 579                                       s->compress.threshold_bytes,
 580                                       s->seal,
 581                                       &s->system_storage.metrics,
 582                                       s->mmap,
 583                                       s->deferred_closes,
 584                                       NULL,
 585                                       &f);
 586                 if (r < 0) {
 587                         log_warning_errno(r, "Failed to read journal file %s for rotation, trying to move it out of the way: %m", full);
 588
 589                         r = journal_file_dispose(dirfd(d), de->d_name);
 590                         if (r < 0)
 591                                 log_warning_errno(r, "Failed to move %s out of the way, ignoring: %m", full);
 592                         else
 593                                 log_debug("Successfully moved %s out of the way.", full);
 594
 595                         continue;
 596                 }
 597
 598                 TAKE_FD(fd); /* Donated to journal_file_open() */
 599
 600                 r = journal_file_archive(f);
 601                 if (r < 0)
 602                         log_debug_errno(r, "Failed to archive journal file '%s', ignoring: %m", full);
 603
 604                 f = journal_initiate_close(f, s->deferred_closes);
 605         }
 606
 607         return 0;
 608 }
 609
 610 void server_rotate(Server *s) {
 611         JournalFile *f;
 612         void *k;
 613         int r;
 614
 615         log_debug("Rotating...");
 616
 617         /* First, rotate the system journal (either in its runtime flavour or in its runtime flavour) */
 618         (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
 619         (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
 620
 621         /* Then, rotate all user journals we have open (keeping them open) */
 622         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals) {
 623                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
 624                 if (r >= 0)
 625                         ordered_hashmap_replace(s->user_journals, k, f);
 626                 else if (!f)
 627                         /* Old file has been closed and deallocated */
 628                         ordered_hashmap_remove(s->user_journals, k);
 629         }
 630
 631         /* Finally, also rotate all user journals we currently do not have open. (But do so only if we
 632          * actually have access to /var, i.e. are not in the log-to-runtime-journal mode). */
 633         if (!s->runtime_journal)
 634                 (void) vacuum_offline_user_journals(s);
 635
 636         server_process_deferred_closes(s);
 637 }
 638
 639 void server_sync(Server *s) {
 640         JournalFile *f;
 641         int r;
 642
 643         if (s->system_journal) {
 644                 r = journal_file_set_offline(s->system_journal, false);
 645                 if (r < 0)
 646                         log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
 647         }
 648
 649         ORDERED_HASHMAP_FOREACH(f, s->user_journals) {
 650                 r = journal_file_set_offline(f, false);
 651                 if (r < 0)
 652                         log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
 653         }
 654
 655         if (s->sync_event_source) {
 656                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
 657                 if (r < 0)
 658                         log_error_errno(r, "Failed to disable sync timer source: %m");
 659         }
 660
 661         s->sync_scheduled = false;
 662 }
 663
 664 static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
 665
 666         int r;
 667
 668         assert(s);
 669         assert(storage);
 670
 671         (void) cache_space_refresh(s, storage);
 672
 673         if (verbose)
 674                 server_space_usage_message(s, storage);
 675
 676         r = journal_directory_vacuum(storage->path, storage->space.limit,
 677                                      storage->metrics.n_max_files, s->max_retention_usec,
 678                                      &s->oldest_file_usec, verbose);
 679         if (r < 0 && r != -ENOENT)
 680                 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
 681
 682         cache_space_invalidate(&storage->space);
 683 }
 684
 685 int server_vacuum(Server *s, bool verbose) {
 686         assert(s);
 687
 688         log_debug("Vacuuming...");
 689
 690         s->oldest_file_usec = 0;
 691
 692         if (s->system_journal)
 693                 do_vacuum(s, &s->system_storage, verbose);
 694         if (s->runtime_journal)
 695                 do_vacuum(s, &s->runtime_storage, verbose);
 696
 697         return 0;
 698 }
 699
 700 static void server_cache_machine_id(Server *s) {
 701         sd_id128_t id;
 702         int r;
 703
 704         assert(s);
 705
 706         r = sd_id128_get_machine(&id);
 707         if (r < 0)
 708                 return;
 709
 710         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
 711 }
 712
 713 static void server_cache_boot_id(Server *s) {
 714         sd_id128_t id;
 715         int r;
 716
 717         assert(s);
 718
 719         r = sd_id128_get_boot(&id);
 720         if (r < 0)
 721                 return;
 722
 723         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
 724 }
 725
 726 static void server_cache_hostname(Server *s) {
 727         _cleanup_free_ char *t = NULL;
 728         char *x;
 729
 730         assert(s);
 731
 732         t = gethostname_malloc();
 733         if (!t)
 734                 return;
 735
 736         x = strjoin("_HOSTNAME=", t);
 737         if (!x)
 738                 return;
 739
 740         free_and_replace(s->hostname_field, x);
 741 }
 742
 743 static bool shall_try_append_again(JournalFile *f, int r) {
 744         switch(r) {
 745
 746         case -E2BIG:           /* Hit configured limit          */
 747         case -EFBIG:           /* Hit fs limit                  */
 748         case -EDQUOT:          /* Quota limit hit               */
 749         case -ENOSPC:          /* Disk full                     */
 750                 log_debug("%s: Allocation limit reached, rotating.", f->path);
 751                 return true;
 752
 753         case -EIO:             /* I/O error of some kind (mmap) */
 754                 log_warning("%s: IO error, rotating.", f->path);
 755                 return true;
 756
 757         case -EHOSTDOWN:       /* Other machine                 */
 758                 log_info("%s: Journal file from other machine, rotating.", f->path);
 759                 return true;
 760
 761         case -EBUSY:           /* Unclean shutdown              */
 762                 log_info("%s: Unclean shutdown, rotating.", f->path);
 763                 return true;
 764
 765         case -EPROTONOSUPPORT: /* Unsupported feature           */
 766                 log_info("%s: Unsupported feature, rotating.", f->path);
 767                 return true;
 768
 769         case -EBADMSG:         /* Corrupted                     */
 770         case -ENODATA:         /* Truncated                     */
 771         case -ESHUTDOWN:       /* Already archived              */
 772                 log_warning("%s: Journal file corrupted, rotating.", f->path);
 773                 return true;
 774
 775         case -EIDRM:           /* Journal file has been deleted */
 776                 log_warning("%s: Journal file has been deleted, rotating.", f->path);
 777                 return true;
 778
 779         case -ETXTBSY:         /* Journal file is from the future */
 780                 log_warning("%s: Journal file is from the future, rotating.", f->path);
 781                 return true;
 782
 783         case -EAFNOSUPPORT:
 784                 log_warning("%s: underlying file system does not support memory mapping or another required file system feature.", f->path);
 785                 return false;
 786
 787         default:
 788                 return false;
 789         }
 790 }
 791
 792 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n, int priority) {
 793         bool vacuumed = false, rotate = false;
 794         struct dual_timestamp ts;
 795         JournalFile *f;
 796         int r;
 797
 798         assert(s);
 799         assert(iovec);
 800         assert(n > 0);
 801
 802         /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
 803          * the source time, and not even the time the event was originally seen, but instead simply the time we started
 804          * processing it, as we want strictly linear ordering in what we write out.) */
 805         assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
 806         assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
 807
 808         if (ts.realtime < s->last_realtime_clock) {
 809                 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
 810                  * regular operation. However, when it does happen, then we should make sure that we start fresh files
 811                  * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
 812                  * bisection works correctly. */
 813
 814                 log_debug("Time jumped backwards, rotating.");
 815                 rotate = true;
 816         } else {
 817
 818                 f = find_journal(s, uid);
 819                 if (!f)
 820                         return;
 821
 822                 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
 823                         log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
 824                         rotate = true;
 825                 }
 826         }
 827
 828         if (rotate) {
 829                 server_rotate(s);
 830                 server_vacuum(s, false);
 831                 vacuumed = true;
 832
 833                 f = find_journal(s, uid);
 834                 if (!f)
 835                         return;
 836         }
 837
 838         s->last_realtime_clock = ts.realtime;
 839
 840         r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
 841         if (r >= 0) {
 842                 server_schedule_sync(s, priority);
 843                 return;
 844         }
 845
 846         if (vacuumed || !shall_try_append_again(f, r)) {
 847                 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
 848                 return;
 849         }
 850
 851         server_rotate(s);
 852         server_vacuum(s, false);
 853
 854         f = find_journal(s, uid);
 855         if (!f)
 856                 return;
 857
 858         log_debug("Retrying write.");
 859         r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
 860         if (r < 0)
 861                 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
 862         else
 863                 server_schedule_sync(s, priority);
 864 }
 865
 866 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field)  \
 867         if (isset(value)) {                                             \
 868                 char *k;                                                \
 869                 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
 870                 sprintf(k, field "=" format, value);                    \
 871                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 872         }
 873
 874 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field)                  \
 875         if (!isempty(value)) {                                          \
 876                 char *k;                                                \
 877                 k = strjoina(field "=", value);                         \
 878                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 879         }
 880
 881 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field)                   \
 882         if (!sd_id128_is_null(value)) {                                 \
 883                 char *k;                                                \
 884                 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
 885                 sd_id128_to_string(value, stpcpy(k, field "="));        \
 886                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 887         }
 888
 889 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field)       \
 890         if (value_size > 0) {                                           \
 891                 char *k;                                                \
 892                 k = newa(char, STRLEN(field "=") + value_size + 1);     \
 893                 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
 894                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 895         }                                                               \
 896
 897 static void dispatch_message_real(
 898                 Server *s,
 899                 struct iovec *iovec, size_t n, size_t m,
 900                 const ClientContext *c,
 901                 const struct timeval *tv,
 902                 int priority,
 903                 pid_t object_pid) {
 904
 905         char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
 906         _cleanup_free_ char *cmdline1 = NULL, *cmdline2 = NULL;
 907         uid_t journal_uid;
 908         ClientContext *o;
 909
 910         assert(s);
 911         assert(iovec);
 912         assert(n > 0);
 913         assert(n +
 914                N_IOVEC_META_FIELDS +
 915                (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) +
 916                client_context_extra_fields_n_iovec(c) <= m);
 917
 918         if (c) {
 919                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
 920                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
 921                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
 922
 923                 IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM"); /* At most TASK_COMM_LENGTH (16 bytes) */
 924                 IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE"); /* A path, so at most PATH_MAX (4096 bytes) */
 925
 926                 if (c->cmdline)
 927                         /* At most _SC_ARG_MAX (2MB usually), which is too much to put on stack.
 928                          * Let's use a heap allocation for this one. */
 929                         cmdline1 = set_iovec_string_field(iovec, &n, "_CMDLINE=", c->cmdline);
 930
 931                 IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE"); /* Read from /proc/.../status */
 932                 IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
 933                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
 934                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
 935
 936                 IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP"); /* A path */
 937                 IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
 938                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
 939                 IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT"); /* Unit names are bounded by UNIT_NAME_MAX */
 940                 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
 941                 IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
 942                 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
 943
 944                 IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
 945
 946                 if (c->extra_fields_n_iovec > 0) {
 947                         memcpy(iovec + n, c->extra_fields_iovec, c->extra_fields_n_iovec * sizeof(struct iovec));
 948                         n += c->extra_fields_n_iovec;
 949                 }
 950         }
 951
 952         assert(n <= m);
 953
 954         if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
 955
 956                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
 957                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
 958                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
 959
 960                 /* See above for size limits, only ->cmdline may be large, so use a heap allocation for it. */
 961                 IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
 962                 IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
 963                 if (o->cmdline)
 964                         cmdline2 = set_iovec_string_field(iovec, &n, "OBJECT_CMDLINE=", o->cmdline);
 965
 966                 IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
 967                 IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
 968                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
 969                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
 970
 971                 IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
 972                 IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
 973                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
 974                 IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
 975                 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
 976                 IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
 977                 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
 978
 979                 IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
 980         }
 981
 982         assert(n <= m);
 983
 984         if (tv) {
 985                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
 986                 iovec[n++] = IOVEC_MAKE_STRING(source_time);
 987         }
 988
 989         /* Note that strictly speaking storing the boot id here is
 990          * redundant since the entry includes this in-line
 991          * anyway. However, we need this indexed, too. */
 992         if (!isempty(s->boot_id_field))
 993                 iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
 994
 995         if (!isempty(s->machine_id_field))
 996                 iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
 997
 998         if (!isempty(s->hostname_field))
 999                 iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
1000
1001         if (!isempty(s->namespace_field))
1002                 iovec[n++] = IOVEC_MAKE_STRING(s->namespace_field);
1003
1004         assert(n <= m);
1005
1006         if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
1007                 /* Split up strictly by (non-root) UID */
1008                 journal_uid = c->uid;
1009         else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
1010                 /* Split up by login UIDs.  We do this only if the
1011                  * realuid is not root, in order not to accidentally
1012                  * leak privileged information to the user that is
1013                  * logged by a privileged process that is part of an
1014                  * unprivileged session. */
1015                 journal_uid = c->owner_uid;
1016         else
1017                 journal_uid = 0;
1018
1019         write_to_journal(s, journal_uid, iovec, n, priority);
1020 }
1021
1022 void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) {
1023
1024         struct iovec *iovec;
1025         size_t n = 0, k, m;
1026         va_list ap;
1027         int r;
1028
1029         assert(s);
1030         assert(format);
1031
1032         m = N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS + client_context_extra_fields_n_iovec(s->my_context) + N_IOVEC_OBJECT_FIELDS;
1033         iovec = newa(struct iovec, m);
1034
1035         assert_cc(3 == LOG_FAC(LOG_DAEMON));
1036         iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
1037         iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
1038
1039         iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
1040         assert_cc(6 == LOG_INFO);
1041         iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
1042
1043         if (message_id)
1044                 iovec[n++] = IOVEC_MAKE_STRING(message_id);
1045         k = n;
1046
1047         va_start(ap, format);
1048         r = log_format_iovec(iovec, m, &n, false, 0, format, ap);
1049         /* Error handling below */
1050         va_end(ap);
1051
1052         if (r >= 0)
1053                 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
1054
1055         while (k < n)
1056                 free(iovec[k++].iov_base);
1057
1058         if (r < 0) {
1059                 /* We failed to format the message. Emit a warning instead. */
1060                 char buf[LINE_MAX];
1061
1062                 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror_safe(r));
1063
1064                 n = 3;
1065                 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
1066                 iovec[n++] = IOVEC_MAKE_STRING(buf);
1067                 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
1068         }
1069 }
1070
1071 void server_dispatch_message(
1072                 Server *s,
1073                 struct iovec *iovec, size_t n, size_t m,
1074                 ClientContext *c,
1075                 const struct timeval *tv,
1076                 int priority,
1077                 pid_t object_pid) {
1078
1079         uint64_t available = 0;
1080         int rl;
1081
1082         assert(s);
1083         assert(iovec || n == 0);
1084
1085         if (n == 0)
1086                 return;
1087
1088         if (LOG_PRI(priority) > s->max_level_store)
1089                 return;
1090
1091         /* Stop early in case the information will not be stored
1092          * in a journal. */
1093         if (s->storage == STORAGE_NONE)
1094                 return;
1095
1096         if (c && c->unit) {
1097                 (void) determine_space(s, &available, NULL);
1098
1099                 rl = journal_ratelimit_test(s->ratelimit, c->unit, c->log_ratelimit_interval, c->log_ratelimit_burst, priority & LOG_PRIMASK, available);
1100                 if (rl == 0)
1101                         return;
1102
1103                 /* Write a suppression message if we suppressed something */
1104                 if (rl > 1)
1105                         server_driver_message(s, c->pid,
1106                                               "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
1107                                               LOG_MESSAGE("Suppressed %i messages from %s", rl - 1, c->unit),
1108                                               "N_DROPPED=%i", rl - 1,
1109                                               NULL);
1110         }
1111
1112         dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
1113 }
1114
1115 int server_flush_to_var(Server *s, bool require_flag_file) {
1116         char ts[FORMAT_TIMESPAN_MAX];
1117         sd_journal *j = NULL;
1118         const char *fn;
1119         unsigned n = 0;
1120         usec_t start;
1121         int r, k;
1122
1123         assert(s);
1124
1125         if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
1126                 return 0;
1127
1128         if (s->namespace) /* Flushing concept does not exist for namespace instances */
1129                 return 0;
1130
1131         if (!s->runtime_journal) /* Nothing to flush? */
1132                 return 0;
1133
1134         if (require_flag_file && !flushed_flag_is_set(s))
1135                 return 0;
1136
1137         (void) system_journal_open(s, true, false);
1138
1139         if (!s->system_journal)
1140                 return 0;
1141
1142         log_debug("Flushing to %s...", s->system_storage.path);
1143
1144         start = now(CLOCK_MONOTONIC);
1145
1146         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1147         if (r < 0)
1148                 return log_error_errno(r, "Failed to read runtime journal: %m");
1149
1150         sd_journal_set_data_threshold(j, 0);
1151
1152         SD_JOURNAL_FOREACH(j) {
1153                 Object *o = NULL;
1154                 JournalFile *f;
1155
1156                 f = j->current_file;
1157                 assert(f && f->current_offset > 0);
1158
1159                 n++;
1160
1161                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1162                 if (r < 0) {
1163                         log_error_errno(r, "Can't read entry: %m");
1164                         goto finish;
1165                 }
1166
1167                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
1168                 if (r >= 0)
1169                         continue;
1170
1171                 if (!shall_try_append_again(s->system_journal, r)) {
1172                         log_error_errno(r, "Can't write entry: %m");
1173                         goto finish;
1174                 }
1175
1176                 server_rotate(s);
1177                 server_vacuum(s, false);
1178
1179                 if (!s->system_journal) {
1180                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1181                         r = -EIO;
1182                         goto finish;
1183                 }
1184
1185                 log_debug("Retrying write.");
1186                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
1187                 if (r < 0) {
1188                         log_error_errno(r, "Can't write entry: %m");
1189                         goto finish;
1190                 }
1191         }
1192
1193         r = 0;
1194
1195 finish:
1196         if (s->system_journal)
1197                 journal_file_post_change(s->system_journal);
1198
1199         s->runtime_journal = journal_file_close(s->runtime_journal);
1200
1201         if (r >= 0)
1202                 (void) rm_rf(s->runtime_storage.path, REMOVE_ROOT);
1203
1204         sd_journal_close(j);
1205
1206         server_driver_message(s, 0, NULL,
1207                               LOG_MESSAGE("Time spent on flushing to %s is %s for %u entries.",
1208                                           s->system_storage.path,
1209                                           format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1210                                           n),
1211                               NULL);
1212
1213         fn = strjoina(s->runtime_directory, "/flushed");
1214         k = touch(fn);
1215         if (k < 0)
1216                 log_warning_errno(k, "Failed to touch %s, ignoring: %m", fn);
1217
1218         server_refresh_idle_timer(s);
1219         return r;
1220 }
1221
1222 static int server_relinquish_var(Server *s) {
1223         const char *fn;
1224         assert(s);
1225
1226         if (s->storage == STORAGE_NONE)
1227                 return 0;
1228
1229         if (s->namespace) /* Concept does not exist for namespaced instances */
1230                 return -EOPNOTSUPP;
1231
1232         if (s->runtime_journal && !s->system_journal)
1233                 return 0;
1234
1235         log_debug("Relinquishing %s...", s->system_storage.path);
1236
1237         (void) system_journal_open(s, false, true);
1238
1239         s->system_journal = journal_file_close(s->system_journal);
1240         ordered_hashmap_clear_with_destructor(s->user_journals, journal_file_close);
1241         set_clear_with_destructor(s->deferred_closes, journal_file_close);
1242
1243         fn = strjoina(s->runtime_directory, "/flushed");
1244         if (unlink(fn) < 0 && errno != ENOENT)
1245                 log_warning_errno(errno, "Failed to unlink %s, ignoring: %m", fn);
1246
1247         server_refresh_idle_timer(s);
1248         return 0;
1249 }
1250
1251 int server_process_datagram(
1252                 sd_event_source *es,
1253                 int fd,
1254                 uint32_t revents,
1255                 void *userdata) {
1256
1257         Server *s = userdata;
1258         struct ucred *ucred = NULL;
1259         struct timeval *tv = NULL;
1260         struct cmsghdr *cmsg;
1261         char *label = NULL;
1262         size_t label_len = 0, m;
1263         struct iovec iovec;
1264         ssize_t n;
1265         int *fds = NULL, v = 0;
1266         size_t n_fds = 0;
1267
1268         /* We use NAME_MAX space for the SELinux label here. The kernel currently enforces no limit, but
1269          * according to suggestions from the SELinux people this will change and it will probably be
1270          * identical to NAME_MAX. For now we use that, but this should be updated one day when the final
1271          * limit is known. */
1272         CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred)) +
1273                          CMSG_SPACE(sizeof(struct timeval)) +
1274                          CMSG_SPACE(sizeof(int)) + /* fd */
1275                          CMSG_SPACE(NAME_MAX) /* selinux label */) control;
1276
1277         union sockaddr_union sa = {};
1278
1279         struct msghdr msghdr = {
1280                 .msg_iov = &iovec,
1281                 .msg_iovlen = 1,
1282                 .msg_control = &control,
1283                 .msg_controllen = sizeof(control),
1284                 .msg_name = &sa,
1285                 .msg_namelen = sizeof(sa),
1286         };
1287
1288         assert(s);
1289         assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1290
1291         if (revents != EPOLLIN)
1292                 return log_error_errno(SYNTHETIC_ERRNO(EIO),
1293                                        "Got invalid event from epoll for datagram fd: %" PRIx32,
1294                                        revents);
1295
1296         /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1297          * it.) */
1298         (void) ioctl(fd, SIOCINQ, &v);
1299
1300         /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1301         m = PAGE_ALIGN(MAX3((size_t) v + 1,
1302                             (size_t) LINE_MAX,
1303                             ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1304
1305         if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1306                 return log_oom();
1307
1308         iovec = IOVEC_MAKE(s->buffer, s->buffer_size - 1); /* Leave room for trailing NUL we add later */
1309
1310         n = recvmsg_safe(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1311         if (IN_SET(n, -EINTR, -EAGAIN))
1312                 return 0;
1313         if (n == -EXFULL) {
1314                 log_warning("Got message with truncated control data (too many fds sent?), ignoring.");
1315                 return 0;
1316         }
1317         if (n < 0)
1318                 return log_error_errno(n, "recvmsg() failed: %m");
1319
1320         CMSG_FOREACH(cmsg, &msghdr)
1321                 if (cmsg->cmsg_level == SOL_SOCKET &&
1322                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1323                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) {
1324                         assert(!ucred);
1325                         ucred = (struct ucred*) CMSG_DATA(cmsg);
1326                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1327                          cmsg->cmsg_type == SCM_SECURITY) {
1328                         assert(!label);
1329                         label = (char*) CMSG_DATA(cmsg);
1330                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
1331                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1332                            cmsg->cmsg_type == SO_TIMESTAMP &&
1333                            cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval))) {
1334                         assert(!tv);
1335                         tv = (struct timeval*) CMSG_DATA(cmsg);
1336                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1337                          cmsg->cmsg_type == SCM_RIGHTS) {
1338                         assert(!fds);
1339                         fds = (int*) CMSG_DATA(cmsg);
1340                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1341                 }
1342
1343         /* And a trailing NUL, just in case */
1344         s->buffer[n] = 0;
1345
1346         if (fd == s->syslog_fd) {
1347                 if (n > 0 && n_fds == 0)
1348                         server_process_syslog_message(s, s->buffer, n, ucred, tv, label, label_len);
1349                 else if (n_fds > 0)
1350                         log_warning("Got file descriptors via syslog socket. Ignoring.");
1351
1352         } else if (fd == s->native_fd) {
1353                 if (n > 0 && n_fds == 0)
1354                         server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1355                 else if (n == 0 && n_fds == 1)
1356                         server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1357                 else if (n_fds > 0)
1358                         log_warning("Got too many file descriptors via native socket. Ignoring.");
1359
1360         } else {
1361                 assert(fd == s->audit_fd);
1362
1363                 if (n > 0 && n_fds == 0)
1364                         server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1365                 else if (n_fds > 0)
1366                         log_warning("Got file descriptors via audit socket. Ignoring.");
1367         }
1368
1369         close_many(fds, n_fds);
1370
1371         server_refresh_idle_timer(s);
1372         return 0;
1373 }
1374
1375 static void server_full_flush(Server *s) {
1376         assert(s);
1377
1378         (void) server_flush_to_var(s, false);
1379         server_sync(s);
1380         server_vacuum(s, false);
1381
1382         server_space_usage_message(s, NULL);
1383
1384         server_refresh_idle_timer(s);
1385 }
1386
1387 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1388         Server *s = userdata;
1389
1390         assert(s);
1391
1392         if (s->namespace) {
1393                 log_error("Received SIGUSR1 signal from PID " PID_FMT ", but flushing runtime journals not supported for namespaced instances.", si->ssi_pid);
1394                 return 0;
1395         }
1396
1397         log_info("Received SIGUSR1 signal from PID " PID_FMT ", as request to flush runtime journal.", si->ssi_pid);
1398         server_full_flush(s);
1399
1400         return 0;
1401 }
1402
1403 static void server_full_rotate(Server *s) {
1404         const char *fn;
1405         int r;
1406
1407         assert(s);
1408
1409         server_rotate(s);
1410         server_vacuum(s, true);
1411
1412         if (s->system_journal)
1413                 patch_min_use(&s->system_storage);
1414         if (s->runtime_journal)
1415                 patch_min_use(&s->runtime_storage);
1416
1417         /* Let clients know when the most recent rotation happened. */
1418         fn = strjoina(s->runtime_directory, "/rotated");
1419         r = write_timestamp_file_atomic(fn, now(CLOCK_MONOTONIC));
1420         if (r < 0)
1421                 log_warning_errno(r, "Failed to write %s, ignoring: %m", fn);
1422 }
1423
1424 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1425         Server *s = userdata;
1426
1427         assert(s);
1428
1429         log_info("Received SIGUSR2 signal from PID " PID_FMT ", as request to rotate journal.", si->ssi_pid);
1430         server_full_rotate(s);
1431
1432         return 0;
1433 }
1434
1435 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1436         Server *s = userdata;
1437
1438         assert(s);
1439
1440         log_received_signal(LOG_INFO, si);
1441
1442         sd_event_exit(s->event, 0);
1443         return 0;
1444 }
1445
1446 static void server_full_sync(Server *s) {
1447         const char *fn;
1448         int r;
1449
1450         assert(s);
1451
1452         server_sync(s);
1453
1454         /* Let clients know when the most recent sync happened. */
1455         fn = strjoina(s->runtime_directory, "/synced");
1456         r = write_timestamp_file_atomic(fn, now(CLOCK_MONOTONIC));
1457         if (r < 0)
1458                 log_warning_errno(r, "Failed to write %s, ignoring: %m", fn);
1459
1460         return;
1461 }
1462
1463 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1464         Server *s = userdata;
1465
1466         assert(s);
1467
1468         log_debug("Received SIGRTMIN1 signal from PID " PID_FMT ", as request to sync.", si->ssi_pid );
1469         server_full_sync(s);
1470
1471         return 0;
1472 }
1473
1474 static int setup_signals(Server *s) {
1475         int r;
1476
1477         assert(s);
1478
1479         assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1480
1481         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1482         if (r < 0)
1483                 return r;
1484
1485         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1486         if (r < 0)
1487                 return r;
1488
1489         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1490         if (r < 0)
1491                 return r;
1492
1493         /* Let's process SIGTERM late, so that we flush all queued messages to disk before we exit */
1494         r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1495         if (r < 0)
1496                 return r;
1497
1498         /* When journald is invoked on the terminal (when debugging), it's useful if C-c is handled
1499          * equivalent to SIGTERM. */
1500         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1501         if (r < 0)
1502                 return r;
1503
1504         r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1505         if (r < 0)
1506                 return r;
1507
1508         /* SIGRTMIN+1 causes an immediate sync. We process this very late, so that everything else queued at
1509          * this point is really written to disk. Clients can watch /run/systemd/journal/synced with inotify
1510          * until its mtime changes to see when a sync happened. */
1511         r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1512         if (r < 0)
1513                 return r;
1514
1515         r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1516         if (r < 0)
1517                 return r;
1518
1519         return 0;
1520 }
1521
1522 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1523         Server *s = data;
1524         int r;
1525
1526         assert(s);
1527
1528         if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1529
1530                 r = value ? parse_boolean(value) : true;
1531                 if (r < 0)
1532                         log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1533                 else
1534                         s->forward_to_syslog = r;
1535
1536         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1537
1538                 r = value ? parse_boolean(value) : true;
1539                 if (r < 0)
1540                         log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1541                 else
1542                         s->forward_to_kmsg = r;
1543
1544         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1545
1546                 r = value ? parse_boolean(value) : true;
1547                 if (r < 0)
1548                         log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1549                 else
1550                         s->forward_to_console = r;
1551
1552         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1553
1554                 r = value ? parse_boolean(value) : true;
1555                 if (r < 0)
1556                         log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1557                 else
1558                         s->forward_to_wall = r;
1559
1560         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1561
1562                 if (proc_cmdline_value_missing(key, value))
1563                         return 0;
1564
1565                 r = log_level_from_string(value);
1566                 if (r < 0)
1567                         log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1568                 else
1569                         s->max_level_console = r;
1570
1571         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1572
1573                 if (proc_cmdline_value_missing(key, value))
1574                         return 0;
1575
1576                 r = log_level_from_string(value);
1577                 if (r < 0)
1578                         log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1579                 else
1580                         s->max_level_store = r;
1581
1582         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1583
1584                 if (proc_cmdline_value_missing(key, value))
1585                         return 0;
1586
1587                 r = log_level_from_string(value);
1588                 if (r < 0)
1589                         log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1590                 else
1591                         s->max_level_syslog = r;
1592
1593         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1594
1595                 if (proc_cmdline_value_missing(key, value))
1596                         return 0;
1597
1598                 r = log_level_from_string(value);
1599                 if (r < 0)
1600                         log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1601                 else
1602                         s->max_level_kmsg = r;
1603
1604         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1605
1606                 if (proc_cmdline_value_missing(key, value))
1607                         return 0;
1608
1609                 r = log_level_from_string(value);
1610                 if (r < 0)
1611                         log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1612                 else
1613                         s->max_level_wall = r;
1614
1615         } else if (startswith(key, "systemd.journald"))
1616                 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
1617
1618         /* do not warn about state here, since probably systemd already did */
1619         return 0;
1620 }
1621
1622 static int server_parse_config_file(Server *s) {
1623         int r;
1624
1625         assert(s);
1626
1627         if (s->namespace) {
1628                 const char *namespaced;
1629
1630                 /* If we are running in namespace mode, load the namespace specific configuration file, and nothing else */
1631                 namespaced = strjoina(PKGSYSCONFDIR "/journald@", s->namespace, ".conf");
1632
1633                 r = config_parse(NULL,
1634                                  namespaced, NULL,
1635                                  "Journal\0",
1636                                  config_item_perf_lookup, journald_gperf_lookup,
1637                                  CONFIG_PARSE_WARN, s,
1638                                  NULL);
1639                 if (r < 0)
1640                         return r;
1641
1642                 return 0;
1643         }
1644
1645         return config_parse_many_nulstr(
1646                         PKGSYSCONFDIR "/journald.conf",
1647                         CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1648                         "Journal\0",
1649                         config_item_perf_lookup, journald_gperf_lookup,
1650                         CONFIG_PARSE_WARN, s, NULL);
1651 }
1652
1653 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1654         Server *s = userdata;
1655
1656         assert(s);
1657
1658         server_sync(s);
1659         return 0;
1660 }
1661
1662 int server_schedule_sync(Server *s, int priority) {
1663         int r;
1664
1665         assert(s);
1666
1667         if (priority <= LOG_CRIT) {
1668                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1669                 server_sync(s);
1670                 return 0;
1671         }
1672
1673         if (s->sync_scheduled)
1674                 return 0;
1675
1676         if (s->sync_interval_usec > 0) {
1677
1678                 if (!s->sync_event_source) {
1679                         r = sd_event_add_time_relative(
1680                                         s->event,
1681                                         &s->sync_event_source,
1682                                         CLOCK_MONOTONIC,
1683                                         s->sync_interval_usec, 0,
1684                                         server_dispatch_sync, s);
1685                         if (r < 0)
1686                                 return r;
1687
1688                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1689                 } else {
1690                         r = sd_event_source_set_time_relative(s->sync_event_source, s->sync_interval_usec);
1691                         if (r < 0)
1692                                 return r;
1693
1694                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1695                 }
1696                 if (r < 0)
1697                         return r;
1698
1699                 s->sync_scheduled = true;
1700         }
1701
1702         return 0;
1703 }
1704
1705 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1706         Server *s = userdata;
1707
1708         assert(s);
1709
1710         server_cache_hostname(s);
1711         return 0;
1712 }
1713
1714 static int server_open_hostname(Server *s) {
1715         int r;
1716
1717         assert(s);
1718
1719         s->hostname_fd = open("/proc/sys/kernel/hostname",
1720                               O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
1721         if (s->hostname_fd < 0)
1722                 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1723
1724         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1725         if (r < 0) {
1726                 /* kernels prior to 3.2 don't support polling this file. Ignore
1727                  * the failure. */
1728                 if (r == -EPERM) {
1729                         log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1730                         s->hostname_fd = safe_close(s->hostname_fd);
1731                         return 0;
1732                 }
1733
1734                 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1735         }
1736
1737         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1738         if (r < 0)
1739                 return log_error_errno(r, "Failed to adjust priority of hostname event source: %m");
1740
1741         return 0;
1742 }
1743
1744 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1745         Server *s = userdata;
1746         int r;
1747
1748         assert(s);
1749         assert(s->notify_event_source == es);
1750         assert(s->notify_fd == fd);
1751
1752         /* The $NOTIFY_SOCKET is writable again, now send exactly one
1753          * message on it. Either it's the watchdog event, the initial
1754          * READY=1 event or an stdout stream event. If there's nothing
1755          * to write anymore, turn our event source off. The next time
1756          * there's something to send it will be turned on again. */
1757
1758         if (!s->sent_notify_ready) {
1759                 static const char p[] =
1760                         "READY=1\n"
1761                         "STATUS=Processing requests...";
1762                 ssize_t l;
1763
1764                 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1765                 if (l < 0) {
1766                         if (errno == EAGAIN)
1767                                 return 0;
1768
1769                         return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1770                 }
1771
1772                 s->sent_notify_ready = true;
1773                 log_debug("Sent READY=1 notification.");
1774
1775         } else if (s->send_watchdog) {
1776
1777                 static const char p[] =
1778                         "WATCHDOG=1";
1779
1780                 ssize_t l;
1781
1782                 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1783                 if (l < 0) {
1784                         if (errno == EAGAIN)
1785                                 return 0;
1786
1787                         return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1788                 }
1789
1790                 s->send_watchdog = false;
1791                 log_debug("Sent WATCHDOG=1 notification.");
1792
1793         } else if (s->stdout_streams_notify_queue)
1794                 /* Dispatch one stream notification event */
1795                 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1796
1797         /* Leave us enabled if there's still more to do. */
1798         if (s->send_watchdog || s->stdout_streams_notify_queue)
1799                 return 0;
1800
1801         /* There was nothing to do anymore, let's turn ourselves off. */
1802         r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1803         if (r < 0)
1804                 return log_error_errno(r, "Failed to turn off notify event source: %m");
1805
1806         return 0;
1807 }
1808
1809 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1810         Server *s = userdata;
1811         int r;
1812
1813         assert(s);
1814
1815         s->send_watchdog = true;
1816
1817         r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1818         if (r < 0)
1819                 log_warning_errno(r, "Failed to turn on notify event source: %m");
1820
1821         r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1822         if (r < 0)
1823                 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1824
1825         r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1826         if (r < 0)
1827                 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1828
1829         return 0;
1830 }
1831
1832 static int server_connect_notify(Server *s) {
1833         union sockaddr_union sa;
1834         socklen_t sa_len;
1835         const char *e;
1836         int r;
1837
1838         assert(s);
1839         assert(s->notify_fd < 0);
1840         assert(!s->notify_event_source);
1841
1842         /*
1843          * So here's the problem: we'd like to send notification messages to PID 1, but we cannot do that via
1844          * sd_notify(), since that's synchronous, and we might end up blocking on it. Specifically: given
1845          * that PID 1 might block on dbus-daemon during IPC, and dbus-daemon is logging to us, and might
1846          * hence block on us, we might end up in a deadlock if we block on sending PID 1 notification
1847          * messages — by generating a full blocking circle. To avoid this, let's create a non-blocking
1848          * socket, and connect it to the notification socket, and then wait for POLLOUT before we send
1849          * anything. This should efficiently avoid any deadlocks, as we'll never block on PID 1, hence PID 1
1850          * can safely block on dbus-daemon which can safely block on us again.
1851          *
1852          * Don't think that this issue is real? It is, see: https://github.com/systemd/systemd/issues/1505
1853          */
1854
1855         e = getenv("NOTIFY_SOCKET");
1856         if (!e)
1857                 return 0;
1858
1859         r = sockaddr_un_set_path(&sa.un, e);
1860         if (r < 0)
1861                 return log_error_errno(r, "NOTIFY_SOCKET set to invalid value '%s': %m", e);
1862         sa_len = r;
1863
1864         s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1865         if (s->notify_fd < 0)
1866                 return log_error_errno(errno, "Failed to create notify socket: %m");
1867
1868         (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1869
1870         r = connect(s->notify_fd, &sa.sa, sa_len);
1871         if (r < 0)
1872                 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1873
1874         r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1875         if (r < 0)
1876                 return log_error_errno(r, "Failed to watch notification socket: %m");
1877
1878         if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1879                 s->send_watchdog = true;
1880
1881                 r = sd_event_add_time_relative(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1882                 if (r < 0)
1883                         return log_error_errno(r, "Failed to add watchdog time event: %m");
1884         }
1885
1886         /* This should fire pretty soon, which we'll use to send the READY=1 event. */
1887
1888         return 0;
1889 }
1890
1891 static int synchronize_second_half(sd_event_source *event_source, void *userdata) {
1892         Varlink *link = userdata;
1893         Server *s;
1894         int r;
1895
1896         assert(link);
1897         assert_se(s = varlink_get_userdata(link));
1898
1899         /* This is the "second half" of the Synchronize() varlink method. This function is called as deferred
1900          * event source at a low priority to ensure the synchronization completes after all queued log
1901          * messages are processed. */
1902         server_full_sync(s);
1903
1904         /* Let's get rid of the event source now, by marking it as non-floating again. It then has no ref
1905          * anymore and is immediately destroyed after we return from this function, i.e. from this event
1906          * source handler at the end. */
1907         r = sd_event_source_set_floating(event_source, false);
1908         if (r < 0)
1909                 return log_error_errno(r, "Failed to mark event source as non-floating: %m");
1910
1911         return varlink_reply(link, NULL);
1912 }
1913
1914 static void synchronize_destroy(void *userdata) {
1915         varlink_unref(userdata);
1916 }
1917
1918 static int vl_method_synchronize(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
1919         _cleanup_(sd_event_source_unrefp) sd_event_source *event_source = NULL;
1920         Server *s = userdata;
1921         int r;
1922
1923         assert(link);
1924         assert(s);
1925
1926         if (json_variant_elements(parameters) > 0)
1927                 return varlink_error_invalid_parameter(link, parameters);
1928
1929         log_info("Received client request to rotate journal.");
1930
1931         /* We don't do the main work now, but instead enqueue a deferred event loop job which will do
1932          * it. That job is scheduled at low priority, so that we return from this method call only after all
1933          * queued but not processed log messages are written to disk, so that this method call returning can
1934          * be used as nice synchronization point. */
1935         r = sd_event_add_defer(s->event, &event_source, synchronize_second_half, link);
1936         if (r < 0)
1937                 return log_error_errno(r, "Failed to allocate defer event source: %m");
1938
1939         r = sd_event_source_set_destroy_callback(event_source, synchronize_destroy);
1940         if (r < 0)
1941                 return log_error_errno(r, "Failed to set event source destroy callback: %m");
1942
1943         varlink_ref(link); /* The varlink object is now left to the destroy callback to unref */
1944
1945         r = sd_event_source_set_priority(event_source, SD_EVENT_PRIORITY_NORMAL+15);
1946         if (r < 0)
1947                 return log_error_errno(r, "Failed to set defer event source priority: %m");
1948
1949         /* Give up ownership of this event source. It will now be destroyed along with event loop itself,
1950          * unless it destroys itself earlier. */
1951         r = sd_event_source_set_floating(event_source, true);
1952         if (r < 0)
1953                 return log_error_errno(r, "Failed to mark event source as floating: %m");
1954
1955         (void) sd_event_source_set_description(event_source, "deferred-sync");
1956
1957         return 0;
1958 }
1959
1960 static int vl_method_rotate(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
1961         Server *s = userdata;
1962
1963         assert(link);
1964         assert(s);
1965
1966         if (json_variant_elements(parameters) > 0)
1967                 return varlink_error_invalid_parameter(link, parameters);
1968
1969         log_info("Received client request to rotate journal.");
1970         server_full_rotate(s);
1971
1972         return varlink_reply(link, NULL);
1973 }
1974
1975 static int vl_method_flush_to_var(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
1976         Server *s = userdata;
1977
1978         assert(link);
1979         assert(s);
1980
1981         if (json_variant_elements(parameters) > 0)
1982                 return varlink_error_invalid_parameter(link, parameters);
1983         if (s->namespace)
1984                 return varlink_error(link, "io.systemd.Journal.NotSupportedByNamespaces", NULL);
1985
1986         log_info("Received client request to flush runtime journal.");
1987         server_full_flush(s);
1988
1989         return varlink_reply(link, NULL);
1990 }
1991
1992 static int vl_method_relinquish_var(Varlink *link, JsonVariant *parameters, VarlinkMethodFlags flags, void *userdata) {
1993         Server *s = userdata;
1994
1995         assert(link);
1996         assert(s);
1997
1998         if (json_variant_elements(parameters) > 0)
1999                 return varlink_error_invalid_parameter(link, parameters);
2000         if (s->namespace)
2001                 return varlink_error(link, "io.systemd.Journal.NotSupportedByNamespaces", NULL);
2002
2003         log_info("Received client request to relinquish %s access.", s->system_storage.path);
2004         server_relinquish_var(s);
2005
2006         return varlink_reply(link, NULL);
2007 }
2008
2009 static int vl_connect(VarlinkServer *server, Varlink *link, void *userdata) {
2010         Server *s = userdata;
2011
2012         assert(server);
2013         assert(link);
2014         assert(s);
2015
2016         (void) server_start_or_stop_idle_timer(s); /* maybe we are no longer idle */
2017
2018         return 0;
2019 }
2020
2021 static void vl_disconnect(VarlinkServer *server, Varlink *link, void *userdata) {
2022         Server *s = userdata;
2023
2024         assert(server);
2025         assert(link);
2026         assert(s);
2027
2028         (void) server_start_or_stop_idle_timer(s); /* maybe we are idle now */
2029 }
2030
2031 static int server_open_varlink(Server *s, const char *socket, int fd) {
2032         int r;
2033
2034         assert(s);
2035
2036         r = varlink_server_new(&s->varlink_server, VARLINK_SERVER_ROOT_ONLY);
2037         if (r < 0)
2038                 return r;
2039
2040         varlink_server_set_userdata(s->varlink_server, s);
2041
2042         r = varlink_server_bind_method_many(
2043                         s->varlink_server,
2044                         "io.systemd.Journal.Synchronize",   vl_method_synchronize,
2045                         "io.systemd.Journal.Rotate",        vl_method_rotate,
2046                         "io.systemd.Journal.FlushToVar",    vl_method_flush_to_var,
2047                         "io.systemd.Journal.RelinquishVar", vl_method_relinquish_var);
2048         if (r < 0)
2049                 return r;
2050
2051         r = varlink_server_bind_connect(s->varlink_server, vl_connect);
2052         if (r < 0)
2053                 return r;
2054
2055         r = varlink_server_bind_disconnect(s->varlink_server, vl_disconnect);
2056         if (r < 0)
2057                 return r;
2058
2059         if (fd < 0)
2060                 r = varlink_server_listen_address(s->varlink_server, socket, 0600);
2061         else
2062                 r = varlink_server_listen_fd(s->varlink_server, fd);
2063         if (r < 0)
2064                 return r;
2065
2066         r = varlink_server_attach_event(s->varlink_server, s->event, SD_EVENT_PRIORITY_NORMAL);
2067         if (r < 0)
2068                 return r;
2069
2070         return 0;
2071 }
2072
2073 static bool server_is_idle(Server *s) {
2074         assert(s);
2075
2076         /* The server for the main namespace is never idle */
2077         if (!s->namespace)
2078                 return false;
2079
2080         /* If a retention maximum is set larger than the idle time we need to be running to enforce it, hence
2081          * turn off the idle logic. */
2082         if (s->max_retention_usec > IDLE_TIMEOUT_USEC)
2083                 return false;
2084
2085         /* We aren't idle if we have a varlink client */
2086         if (varlink_server_current_connections(s->varlink_server) > 0)
2087                 return false;
2088
2089         /* If we have stdout streams we aren't idle */
2090         if (s->n_stdout_streams > 0)
2091                 return false;
2092
2093         return true;
2094 }
2095
2096 static int server_idle_handler(sd_event_source *source, uint64_t usec, void *userdata) {
2097         Server *s = userdata;
2098
2099         assert(source);
2100         assert(s);
2101
2102         log_debug("Server is idle, exiting.");
2103         sd_event_exit(s->event, 0);
2104         return 0;
2105 }
2106
2107 int server_start_or_stop_idle_timer(Server *s) {
2108         _cleanup_(sd_event_source_unrefp) sd_event_source *source = NULL;
2109         int r;
2110
2111         assert(s);
2112
2113         if (!server_is_idle(s)) {
2114                 s->idle_event_source = sd_event_source_disable_unref(s->idle_event_source);
2115                 return 0;
2116         }
2117
2118         if (s->idle_event_source)
2119                 return 1;
2120
2121         r = sd_event_add_time_relative(s->event, &source, CLOCK_MONOTONIC, IDLE_TIMEOUT_USEC, 0, server_idle_handler, s);
2122         if (r < 0)
2123                 return log_error_errno(r, "Failed to allocate idle timer: %m");
2124
2125         r = sd_event_source_set_priority(source, SD_EVENT_PRIORITY_IDLE);
2126         if (r < 0)
2127                 return log_error_errno(r, "Failed to set idle timer priority: %m");
2128
2129         (void) sd_event_source_set_description(source, "idle-timer");
2130
2131         s->idle_event_source = TAKE_PTR(source);
2132         return 1;
2133 }
2134
2135 int server_refresh_idle_timer(Server *s) {
2136         int r;
2137
2138         assert(s);
2139
2140         if (!s->idle_event_source)
2141                 return 0;
2142
2143         r = sd_event_source_set_time_relative(s->idle_event_source, IDLE_TIMEOUT_USEC);
2144         if (r < 0)
2145                 return log_error_errno(r, "Failed to refresh idle timer: %m");
2146
2147         return 1;
2148 }
2149
2150 static int set_namespace(Server *s, const char *namespace) {
2151         assert(s);
2152
2153         if (!namespace)
2154                 return 0;
2155
2156         if (!log_namespace_name_valid(namespace))
2157                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Specified namespace name not valid, refusing: %s", namespace);
2158
2159         s->namespace = strdup(namespace);
2160         if (!s->namespace)
2161                 return log_oom();
2162
2163         s->namespace_field = strjoin("_NAMESPACE=", namespace);
2164         if (!s->namespace_field)
2165                 return log_oom();
2166
2167         return 1;
2168 }
2169
2170 int server_init(Server *s, const char *namespace) {
2171         const char *native_socket, *syslog_socket, *stdout_socket, *varlink_socket, *e;
2172         _cleanup_fdset_free_ FDSet *fds = NULL;
2173         int n, r, fd, varlink_fd = -1;
2174         bool no_sockets;
2175
2176         assert(s);
2177
2178         *s = (Server) {
2179                 .syslog_fd = -1,
2180                 .native_fd = -1,
2181                 .stdout_fd = -1,
2182                 .dev_kmsg_fd = -1,
2183                 .audit_fd = -1,
2184                 .hostname_fd = -1,
2185                 .notify_fd = -1,
2186
2187                 .compress.enabled = true,
2188                 .compress.threshold_bytes = (uint64_t) -1,
2189                 .seal = true,
2190
2191                 .set_audit = true,
2192
2193                 .watchdog_usec = USEC_INFINITY,
2194
2195                 .sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC,
2196                 .sync_scheduled = false,
2197
2198                 .ratelimit_interval = DEFAULT_RATE_LIMIT_INTERVAL,
2199                 .ratelimit_burst = DEFAULT_RATE_LIMIT_BURST,
2200
2201                 .forward_to_wall = true,
2202
2203                 .max_file_usec = DEFAULT_MAX_FILE_USEC,
2204
2205                 .max_level_store = LOG_DEBUG,
2206                 .max_level_syslog = LOG_DEBUG,
2207                 .max_level_kmsg = LOG_NOTICE,
2208                 .max_level_console = LOG_INFO,
2209                 .max_level_wall = LOG_EMERG,
2210
2211                 .line_max = DEFAULT_LINE_MAX,
2212
2213                 .runtime_storage.name = "Runtime Journal",
2214                 .system_storage.name = "System Journal",
2215         };
2216
2217         r = set_namespace(s, namespace);
2218         if (r < 0)
2219                 return r;
2220
2221         /* By default, only read from /dev/kmsg if are the main namespace */
2222         s->read_kmsg = !s->namespace;
2223         s->storage = s->namespace ? STORAGE_PERSISTENT : STORAGE_AUTO;
2224
2225         journal_reset_metrics(&s->system_storage.metrics);
2226         journal_reset_metrics(&s->runtime_storage.metrics);
2227
2228         server_parse_config_file(s);
2229
2230         if (!s->namespace) {
2231                 /* Parse kernel command line, but only if we are not a namespace instance */
2232                 r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
2233                 if (r < 0)
2234                         log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
2235         }
2236
2237         if (!!s->ratelimit_interval != !!s->ratelimit_burst) { /* One set to 0 and the other not? */
2238                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
2239                           s->ratelimit_interval, s->ratelimit_burst);
2240                 s->ratelimit_interval = s->ratelimit_burst = 0;
2241         }
2242
2243         e = getenv("RUNTIME_DIRECTORY");
2244         if (e)
2245                 s->runtime_directory = strdup(e);
2246         else if (s->namespace)
2247                 s->runtime_directory = strjoin("/run/systemd/journal.", s->namespace);
2248         else
2249                 s->runtime_directory = strdup("/run/systemd/journal");
2250         if (!s->runtime_directory)
2251                 return log_oom();
2252
2253         (void) mkdir_p(s->runtime_directory, 0755);
2254
2255         s->user_journals = ordered_hashmap_new(NULL);
2256         if (!s->user_journals)
2257                 return log_oom();
2258
2259         s->mmap = mmap_cache_new();
2260         if (!s->mmap)
2261                 return log_oom();
2262
2263         s->deferred_closes = set_new(NULL);
2264         if (!s->deferred_closes)
2265                 return log_oom();
2266
2267         r = sd_event_default(&s->event);
2268         if (r < 0)
2269                 return log_error_errno(r, "Failed to create event loop: %m");
2270
2271         n = sd_listen_fds(true);
2272         if (n < 0)
2273                 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
2274
2275         native_socket = strjoina(s->runtime_directory, "/socket");
2276         stdout_socket = strjoina(s->runtime_directory, "/stdout");
2277         syslog_socket = strjoina(s->runtime_directory, "/dev-log");
2278         varlink_socket = strjoina(s->runtime_directory, "/io.systemd.journal");
2279
2280         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
2281
2282                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, native_socket, 0) > 0) {
2283
2284                         if (s->native_fd >= 0)
2285                                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
2286                                                        "Too many native sockets passed.");
2287
2288                         s->native_fd = fd;
2289
2290                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, stdout_socket, 0) > 0) {
2291
2292                         if (s->stdout_fd >= 0)
2293                                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
2294                                                        "Too many stdout sockets passed.");
2295
2296                         s->stdout_fd = fd;
2297
2298                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, syslog_socket, 0) > 0) {
2299
2300                         if (s->syslog_fd >= 0)
2301                                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
2302                                                        "Too many /dev/log sockets passed.");
2303
2304                         s->syslog_fd = fd;
2305
2306                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, varlink_socket, 0) > 0) {
2307
2308                         if (varlink_fd >= 0)
2309                                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
2310                                                        "Too many varlink sockets passed.");
2311
2312                         varlink_fd = fd;
2313                 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
2314
2315                         if (s->audit_fd >= 0)
2316                                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
2317                                                        "Too many audit sockets passed.");
2318
2319                         s->audit_fd = fd;
2320
2321                 } else {
2322
2323                         if (!fds) {
2324                                 fds = fdset_new();
2325                                 if (!fds)
2326                                         return log_oom();
2327                         }
2328
2329                         r = fdset_put(fds, fd);
2330                         if (r < 0)
2331                                 return log_oom();
2332                 }
2333         }
2334
2335         /* Try to restore streams, but don't bother if this fails */
2336         (void) server_restore_streams(s, fds);
2337
2338         if (fdset_size(fds) > 0) {
2339                 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
2340                 fds = fdset_free(fds);
2341         }
2342
2343         no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0 && varlink_fd < 0;
2344
2345         /* always open stdout, syslog, native, and kmsg sockets */
2346
2347         /* systemd-journald.socket: /run/systemd/journal/stdout */
2348         r = server_open_stdout_socket(s, stdout_socket);
2349         if (r < 0)
2350                 return r;
2351
2352         /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
2353         r = server_open_syslog_socket(s, syslog_socket);
2354         if (r < 0)
2355                 return r;
2356
2357         /* systemd-journald.socket: /run/systemd/journal/socket */
2358         r = server_open_native_socket(s, native_socket);
2359         if (r < 0)
2360                 return r;
2361
2362         /* /dev/kmsg */
2363         r = server_open_dev_kmsg(s);
2364         if (r < 0)
2365                 return r;
2366
2367         /* Unless we got *some* sockets and not audit, open audit socket */
2368         if (s->audit_fd >= 0 || no_sockets) {
2369                 r = server_open_audit(s);
2370                 if (r < 0)
2371                         return r;
2372         }
2373
2374         r = server_open_varlink(s, varlink_socket, varlink_fd);
2375         if (r < 0)
2376                 return r;
2377
2378         r = server_open_kernel_seqnum(s);
2379         if (r < 0)
2380                 return r;
2381
2382         r = server_open_hostname(s);
2383         if (r < 0)
2384                 return r;
2385
2386         r = setup_signals(s);
2387         if (r < 0)
2388                 return r;
2389
2390         s->ratelimit = journal_ratelimit_new();
2391         if (!s->ratelimit)
2392                 return log_oom();
2393
2394         r = cg_get_root_path(&s->cgroup_root);
2395         if (r < 0)
2396                 return log_error_errno(r, "Failed to acquire cgroup root path: %m");
2397
2398         server_cache_hostname(s);
2399         server_cache_boot_id(s);
2400         server_cache_machine_id(s);
2401
2402         if (s->namespace)
2403                 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s), ".", s->namespace);
2404         else
2405                 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
2406         if (!s->runtime_storage.path)
2407                 return log_oom();
2408
2409         e = getenv("LOGS_DIRECTORY");
2410         if (e)
2411                 s->system_storage.path = strdup(e);
2412         else if (s->namespace)
2413                 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s), ".", s->namespace);
2414         else
2415                 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
2416         if (!s->system_storage.path)
2417                 return log_oom();
2418
2419         (void) server_connect_notify(s);
2420
2421         (void) client_context_acquire_default(s);
2422
2423         r = system_journal_open(s, false, false);
2424         if (r < 0)
2425                 return r;
2426
2427         server_start_or_stop_idle_timer(s);
2428         return 0;
2429 }
2430
2431 void server_maybe_append_tags(Server *s) {
2432 #if HAVE_GCRYPT
2433         JournalFile *f;
2434         usec_t n;
2435
2436         n = now(CLOCK_REALTIME);
2437
2438         if (s->system_journal)
2439                 journal_file_maybe_append_tag(s->system_journal, n);
2440
2441         ORDERED_HASHMAP_FOREACH(f, s->user_journals)
2442                 journal_file_maybe_append_tag(f, n);
2443 #endif
2444 }
2445
2446 void server_done(Server *s) {
2447         assert(s);
2448
2449         free(s->namespace);
2450         free(s->namespace_field);
2451
2452         set_free_with_destructor(s->deferred_closes, journal_file_close);
2453
2454         while (s->stdout_streams)
2455                 stdout_stream_free(s->stdout_streams);
2456
2457         client_context_flush_all(s);
2458
2459         (void) journal_file_close(s->system_journal);
2460         (void) journal_file_close(s->runtime_journal);
2461
2462         ordered_hashmap_free_with_destructor(s->user_journals, journal_file_close);
2463
2464         varlink_server_unref(s->varlink_server);
2465
2466         sd_event_source_unref(s->syslog_event_source);
2467         sd_event_source_unref(s->native_event_source);
2468         sd_event_source_unref(s->stdout_event_source);
2469         sd_event_source_unref(s->dev_kmsg_event_source);
2470         sd_event_source_unref(s->audit_event_source);
2471         sd_event_source_unref(s->sync_event_source);
2472         sd_event_source_unref(s->sigusr1_event_source);
2473         sd_event_source_unref(s->sigusr2_event_source);
2474         sd_event_source_unref(s->sigterm_event_source);
2475         sd_event_source_unref(s->sigint_event_source);
2476         sd_event_source_unref(s->sigrtmin1_event_source);
2477         sd_event_source_unref(s->hostname_event_source);
2478         sd_event_source_unref(s->notify_event_source);
2479         sd_event_source_unref(s->watchdog_event_source);
2480         sd_event_source_unref(s->idle_event_source);
2481         sd_event_unref(s->event);
2482
2483         safe_close(s->syslog_fd);
2484         safe_close(s->native_fd);
2485         safe_close(s->stdout_fd);
2486         safe_close(s->dev_kmsg_fd);
2487         safe_close(s->audit_fd);
2488         safe_close(s->hostname_fd);
2489         safe_close(s->notify_fd);
2490
2491         if (s->ratelimit)
2492                 journal_ratelimit_free(s->ratelimit);
2493
2494         if (s->kernel_seqnum)
2495                 munmap(s->kernel_seqnum, sizeof(uint64_t));
2496
2497         free(s->buffer);
2498         free(s->tty_path);
2499         free(s->cgroup_root);
2500         free(s->hostname_field);
2501         free(s->runtime_storage.path);
2502         free(s->system_storage.path);
2503         free(s->runtime_directory);
2504
2505         mmap_cache_unref(s->mmap);
2506 }
2507
2508 static const char* const storage_table[_STORAGE_MAX] = {
2509         [STORAGE_AUTO] = "auto",
2510         [STORAGE_VOLATILE] = "volatile",
2511         [STORAGE_PERSISTENT] = "persistent",
2512         [STORAGE_NONE] = "none"
2513 };
2514
2515 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2516 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2517
2518 static const char* const split_mode_table[_SPLIT_MAX] = {
2519         [SPLIT_LOGIN] = "login",
2520         [SPLIT_UID] = "uid",
2521         [SPLIT_NONE] = "none",
2522 };
2523
2524 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2525 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
2526
2527 int config_parse_line_max(
2528                 const char* unit,
2529                 const char *filename,
2530                 unsigned line,
2531                 const char *section,
2532                 unsigned section_line,
2533                 const char *lvalue,
2534                 int ltype,
2535                 const char *rvalue,
2536                 void *data,
2537                 void *userdata) {
2538
2539         size_t *sz = data;
2540         int r;
2541
2542         assert(filename);
2543         assert(lvalue);
2544         assert(rvalue);
2545         assert(data);
2546
2547         if (isempty(rvalue))
2548                 /* Empty assignment means default */
2549                 *sz = DEFAULT_LINE_MAX;
2550         else {
2551                 uint64_t v;
2552
2553                 r = parse_size(rvalue, 1024, &v);
2554                 if (r < 0) {
2555                         log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
2556                         return 0;
2557                 }
2558
2559                 if (v < 79) {
2560                         /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2561                          * terminal size is 80ch, and it might make sense to break one character before the natural
2562                          * line break would occur on that. */
2563                         log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
2564                         *sz = 79;
2565                 } else if (v > (uint64_t) (SSIZE_MAX-1)) {
2566                         /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2567                          * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2568                          * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2569                          * fail much earlier anyway. */
2570                         log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
2571                         *sz = SSIZE_MAX-1;
2572                 } else
2573                         *sz = (size_t) v;
2574         }
2575
2576         return 0;
2577 }
2578
2579 int config_parse_compress(
2580                 const char* unit,
2581                 const char *filename,
2582                 unsigned line,
2583                 const char *section,
2584                 unsigned section_line,
2585                 const char *lvalue,
2586                 int ltype,
2587                 const char *rvalue,
2588                 void *data,
2589                 void *userdata) {
2590
2591         JournalCompressOptions* compress = data;
2592         int r;
2593
2594         if (isempty(rvalue)) {
2595                 compress->enabled = true;
2596                 compress->threshold_bytes = (uint64_t) -1;
2597         } else if (streq(rvalue, "1")) {
2598                 log_syntax(unit, LOG_WARNING, filename, line, 0,
2599                            "Compress= ambiguously specified as 1, enabling compression with default threshold");
2600                 compress->enabled = true;
2601         } else if (streq(rvalue, "0")) {
2602                 log_syntax(unit, LOG_WARNING, filename, line, 0,
2603                            "Compress= ambiguously specified as 0, disabling compression");
2604                 compress->enabled = false;
2605         } else {
2606                 r = parse_boolean(rvalue);
2607                 if (r < 0) {
2608                         r = parse_size(rvalue, 1024, &compress->threshold_bytes);
2609                         if (r < 0)
2610                                 log_syntax(unit, LOG_WARNING, filename, line, r,
2611                                            "Failed to parse Compress= value, ignoring: %s", rvalue);
2612                         else
2613                                 compress->enabled = true;
2614                 } else
2615                         compress->enabled = r;
2616         }
2617
2618         return 0;
2619 }