src/journal/journald-server.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2
   3 #if HAVE_SELINUX
   4 #include <selinux/selinux.h>
   5 #endif
   6 #include <sys/ioctl.h>
   7 #include <sys/mman.h>
   8 #include <sys/signalfd.h>
   9 #include <sys/statvfs.h>
  10 #include <linux/sockios.h>
  11
  12 #include "sd-daemon.h"
  13 #include "sd-journal.h"
  14 #include "sd-messages.h"
  15
  16 #include "acl-util.h"
  17 #include "alloc-util.h"
  18 #include "audit-util.h"
  19 #include "cgroup-util.h"
  20 #include "conf-parser.h"
  21 #include "dirent-util.h"
  22 #include "extract-word.h"
  23 #include "fd-util.h"
  24 #include "fileio.h"
  25 #include "format-util.h"
  26 #include "fs-util.h"
  27 #include "hashmap.h"
  28 #include "hostname-util.h"
  29 #include "id128-util.h"
  30 #include "io-util.h"
  31 #include "journal-authenticate.h"
  32 #include "journal-file.h"
  33 #include "journal-internal.h"
  34 #include "journal-vacuum.h"
  35 #include "journald-audit.h"
  36 #include "journald-context.h"
  37 #include "journald-kmsg.h"
  38 #include "journald-native.h"
  39 #include "journald-rate-limit.h"
  40 #include "journald-server.h"
  41 #include "journald-stream.h"
  42 #include "journald-syslog.h"
  43 #include "log.h"
  44 #include "missing.h"
  45 #include "mkdir.h"
  46 #include "parse-util.h"
  47 #include "proc-cmdline.h"
  48 #include "process-util.h"
  49 #include "rm-rf.h"
  50 #include "selinux-util.h"
  51 #include "signal-util.h"
  52 #include "socket-util.h"
  53 #include "stdio-util.h"
  54 #include "string-table.h"
  55 #include "string-util.h"
  56 #include "syslog-util.h"
  57 #include "user-util.h"
  58
  59 #define USER_JOURNALS_MAX 1024
  60
  61 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
  62 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
  63 #define DEFAULT_RATE_LIMIT_BURST 10000
  64 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
  65
  66 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
  67
  68 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
  69
  70 /* The period to insert between posting changes for coalescing */
  71 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
  72
  73 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
  74  * for a bit of additional metadata. */
  75 #define DEFAULT_LINE_MAX (48*1024)
  76
  77 #define DEFERRED_CLOSES_MAX (4096)
  78
  79 static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
  80         _cleanup_closedir_ DIR *d = NULL;
  81         struct dirent *de;
  82         struct statvfs ss;
  83
  84         assert(ret_used);
  85         assert(ret_free);
  86
  87         d = opendir(path);
  88         if (!d)
  89                 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
  90                                       errno, "Failed to open %s: %m", path);
  91
  92         if (fstatvfs(dirfd(d), &ss) < 0)
  93                 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
  94
  95         *ret_free = ss.f_bsize * ss.f_bavail;
  96         *ret_used = 0;
  97         FOREACH_DIRENT_ALL(de, d, break) {
  98                 struct stat st;
  99
 100                 if (!endswith(de->d_name, ".journal") &&
 101                     !endswith(de->d_name, ".journal~"))
 102                         continue;
 103
 104                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
 105                         log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
 106                         continue;
 107                 }
 108
 109                 if (!S_ISREG(st.st_mode))
 110                         continue;
 111
 112                 *ret_used += (uint64_t) st.st_blocks * 512UL;
 113         }
 114
 115         return 0;
 116 }
 117
 118 static void cache_space_invalidate(JournalStorageSpace *space) {
 119         zero(*space);
 120 }
 121
 122 static int cache_space_refresh(Server *s, JournalStorage *storage) {
 123         JournalStorageSpace *space;
 124         JournalMetrics *metrics;
 125         uint64_t vfs_used, vfs_avail, avail;
 126         usec_t ts;
 127         int r;
 128
 129         assert(s);
 130
 131         metrics = &storage->metrics;
 132         space = &storage->space;
 133
 134         ts = now(CLOCK_MONOTONIC);
 135
 136         if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
 137                 return 0;
 138
 139         r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
 140         if (r < 0)
 141                 return r;
 142
 143         space->vfs_used = vfs_used;
 144         space->vfs_available = vfs_avail;
 145
 146         avail = LESS_BY(vfs_avail, metrics->keep_free);
 147
 148         space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
 149         space->available = LESS_BY(space->limit, vfs_used);
 150         space->timestamp = ts;
 151         return 1;
 152 }
 153
 154 static void patch_min_use(JournalStorage *storage) {
 155         assert(storage);
 156
 157         /* Let's bump the min_use limit to the current usage on disk. We do
 158          * this when starting up and first opening the journal files. This way
 159          * sudden spikes in disk usage will not cause journald to vacuum files
 160          * without bounds. Note that this means that only a restart of journald
 161          * will make it reset this value. */
 162
 163         storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
 164 }
 165
 166 static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
 167         JournalStorage *js;
 168         int r;
 169
 170         assert(s);
 171
 172         js = s->system_journal ? &s->system_storage : &s->runtime_storage;
 173
 174         r = cache_space_refresh(s, js);
 175         if (r >= 0) {
 176                 if (available)
 177                         *available = js->space.available;
 178                 if (limit)
 179                         *limit = js->space.limit;
 180         }
 181         return r;
 182 }
 183
 184 void server_space_usage_message(Server *s, JournalStorage *storage) {
 185         char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
 186              fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
 187         JournalMetrics *metrics;
 188
 189         assert(s);
 190
 191         if (!storage)
 192                 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
 193
 194         if (cache_space_refresh(s, storage) < 0)
 195                 return;
 196
 197         metrics = &storage->metrics;
 198         format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
 199         format_bytes(fb2, sizeof(fb2), metrics->max_use);
 200         format_bytes(fb3, sizeof(fb3), metrics->keep_free);
 201         format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
 202         format_bytes(fb5, sizeof(fb5), storage->space.limit);
 203         format_bytes(fb6, sizeof(fb6), storage->space.available);
 204
 205         server_driver_message(s, 0,
 206                               "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
 207                               LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
 208                                           storage->name, storage->path, fb1, fb5, fb6),
 209                               "JOURNAL_NAME=%s", storage->name,
 210                               "JOURNAL_PATH=%s", storage->path,
 211                               "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
 212                               "CURRENT_USE_PRETTY=%s", fb1,
 213                               "MAX_USE=%"PRIu64, metrics->max_use,
 214                               "MAX_USE_PRETTY=%s", fb2,
 215                               "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
 216                               "DISK_KEEP_FREE_PRETTY=%s", fb3,
 217                               "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
 218                               "DISK_AVAILABLE_PRETTY=%s", fb4,
 219                               "LIMIT=%"PRIu64, storage->space.limit,
 220                               "LIMIT_PRETTY=%s", fb5,
 221                               "AVAILABLE=%"PRIu64, storage->space.available,
 222                               "AVAILABLE_PRETTY=%s", fb6,
 223                               NULL);
 224 }
 225
 226 static bool uid_for_system_journal(uid_t uid) {
 227
 228         /* Returns true if the specified UID shall get its data stored in the system journal*/
 229
 230         return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY;
 231 }
 232
 233 static void server_add_acls(JournalFile *f, uid_t uid) {
 234 #if HAVE_ACL
 235         int r;
 236 #endif
 237         assert(f);
 238
 239 #if HAVE_ACL
 240         if (uid_for_system_journal(uid))
 241                 return;
 242
 243         r = add_acls_for_user(f->fd, uid);
 244         if (r < 0)
 245                 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
 246 #endif
 247 }
 248
 249 static int open_journal(
 250                 Server *s,
 251                 bool reliably,
 252                 const char *fname,
 253                 int flags,
 254                 bool seal,
 255                 JournalMetrics *metrics,
 256                 JournalFile **ret) {
 257
 258         JournalFile *f;
 259         int r;
 260
 261         assert(s);
 262         assert(fname);
 263         assert(ret);
 264
 265         if (reliably)
 266                 r = journal_file_open_reliably(fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes,
 267                                                seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
 268         else
 269                 r = journal_file_open(-1, fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes, seal,
 270                                       metrics, s->mmap, s->deferred_closes, NULL, &f);
 271
 272         if (r < 0)
 273                 return r;
 274
 275         r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
 276         if (r < 0) {
 277                 (void) journal_file_close(f);
 278                 return r;
 279         }
 280
 281         *ret = f;
 282         return r;
 283 }
 284
 285 static bool flushed_flag_is_set(void) {
 286         return access("/run/systemd/journal/flushed", F_OK) >= 0;
 287 }
 288
 289 static int system_journal_open(Server *s, bool flush_requested) {
 290         const char *fn;
 291         int r = 0;
 292
 293         if (!s->system_journal &&
 294             IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
 295             (flush_requested || flushed_flag_is_set())) {
 296
 297                 /* If in auto mode: first try to create the machine
 298                  * path, but not the prefix.
 299                  *
 300                  * If in persistent mode: create /var/log/journal and
 301                  * the machine path */
 302
 303                 if (s->storage == STORAGE_PERSISTENT)
 304                         (void) mkdir_p("/var/log/journal/", 0755);
 305
 306                 (void) mkdir(s->system_storage.path, 0755);
 307
 308                 fn = strjoina(s->system_storage.path, "/system.journal");
 309                 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
 310                 if (r >= 0) {
 311                         server_add_acls(s->system_journal, 0);
 312                         (void) cache_space_refresh(s, &s->system_storage);
 313                         patch_min_use(&s->system_storage);
 314                 } else {
 315                         if (!IN_SET(r, -ENOENT, -EROFS))
 316                                 log_warning_errno(r, "Failed to open system journal: %m");
 317
 318                         r = 0;
 319                 }
 320
 321                 /* If the runtime journal is open, and we're post-flush, we're
 322                  * recovering from a failed system journal rotate (ENOSPC)
 323                  * for which the runtime journal was reopened.
 324                  *
 325                  * Perform an implicit flush to var, leaving the runtime
 326                  * journal closed, now that the system journal is back.
 327                  */
 328                 if (!flush_requested)
 329                         (void) server_flush_to_var(s, true);
 330         }
 331
 332         if (!s->runtime_journal &&
 333             (s->storage != STORAGE_NONE)) {
 334
 335                 fn = strjoina(s->runtime_storage.path, "/system.journal");
 336
 337                 if (s->system_journal) {
 338
 339                         /* Try to open the runtime journal, but only
 340                          * if it already exists, so that we can flush
 341                          * it into the system journal */
 342
 343                         r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
 344                         if (r < 0) {
 345                                 if (r != -ENOENT)
 346                                         log_warning_errno(r, "Failed to open runtime journal: %m");
 347
 348                                 r = 0;
 349                         }
 350
 351                 } else {
 352
 353                         /* OK, we really need the runtime journal, so create
 354                          * it if necessary. */
 355
 356                         (void) mkdir("/run/log", 0755);
 357                         (void) mkdir("/run/log/journal", 0755);
 358                         (void) mkdir_parents(fn, 0750);
 359
 360                         r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
 361                         if (r < 0)
 362                                 return log_error_errno(r, "Failed to open runtime journal: %m");
 363                 }
 364
 365                 if (s->runtime_journal) {
 366                         server_add_acls(s->runtime_journal, 0);
 367                         (void) cache_space_refresh(s, &s->runtime_storage);
 368                         patch_min_use(&s->runtime_storage);
 369                 }
 370         }
 371
 372         return r;
 373 }
 374
 375 static JournalFile* find_journal(Server *s, uid_t uid) {
 376         _cleanup_free_ char *p = NULL;
 377         int r;
 378         JournalFile *f;
 379         sd_id128_t machine;
 380
 381         assert(s);
 382
 383         /* A rotate that fails to create the new journal (ENOSPC) leaves the
 384          * rotated journal as NULL.  Unless we revisit opening, even after
 385          * space is made available we'll continue to return NULL indefinitely.
 386          *
 387          * system_journal_open() is a noop if the journals are already open, so
 388          * we can just call it here to recover from failed rotates (or anything
 389          * else that's left the journals as NULL).
 390          *
 391          * Fixes https://github.com/systemd/systemd/issues/3968 */
 392         (void) system_journal_open(s, false);
 393
 394         /* We split up user logs only on /var, not on /run. If the
 395          * runtime file is open, we write to it exclusively, in order
 396          * to guarantee proper order as soon as we flush /run to
 397          * /var and close the runtime file. */
 398
 399         if (s->runtime_journal)
 400                 return s->runtime_journal;
 401
 402         if (uid_for_system_journal(uid))
 403                 return s->system_journal;
 404
 405         f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
 406         if (f)
 407                 return f;
 408
 409         r = sd_id128_get_machine(&machine);
 410         if (r < 0) {
 411                 log_debug_errno(r, "Failed to determine machine ID, using system log: %m");
 412                 return s->system_journal;
 413         }
 414
 415         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
 416                      SD_ID128_FORMAT_VAL(machine), uid) < 0) {
 417                 log_oom();
 418                 return s->system_journal;
 419         }
 420
 421         while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
 422                 /* Too many open? Then let's close one */
 423                 f = ordered_hashmap_steal_first(s->user_journals);
 424                 assert(f);
 425                 (void) journal_file_close(f);
 426         }
 427
 428         r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
 429         if (r < 0)
 430                 return s->system_journal;
 431
 432         server_add_acls(f, uid);
 433
 434         r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
 435         if (r < 0) {
 436                 (void) journal_file_close(f);
 437                 return s->system_journal;
 438         }
 439
 440         return f;
 441 }
 442
 443 static int do_rotate(
 444                 Server *s,
 445                 JournalFile **f,
 446                 const char* name,
 447                 bool seal,
 448                 uint32_t uid) {
 449
 450         int r;
 451         assert(s);
 452
 453         if (!*f)
 454                 return -EINVAL;
 455
 456         r = journal_file_rotate(f, s->compress.enabled, s->compress.threshold_bytes, seal, s->deferred_closes);
 457         if (r < 0) {
 458                 if (*f)
 459                         return log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
 460                 else
 461                         return log_error_errno(r, "Failed to create new %s journal: %m", name);
 462         }
 463
 464         server_add_acls(*f, uid);
 465
 466         return r;
 467 }
 468
 469 static void server_process_deferred_closes(Server *s) {
 470         JournalFile *f;
 471         Iterator i;
 472
 473         /* Perform any deferred closes which aren't still offlining. */
 474         SET_FOREACH(f, s->deferred_closes, i) {
 475                 if (journal_file_is_offlining(f))
 476                         continue;
 477
 478                 (void) set_remove(s->deferred_closes, f);
 479                 (void) journal_file_close(f);
 480         }
 481 }
 482
 483 static void server_vacuum_deferred_closes(Server *s) {
 484         assert(s);
 485
 486         /* Make some room in the deferred closes list, so that it doesn't grow without bounds */
 487         if (set_size(s->deferred_closes) < DEFERRED_CLOSES_MAX)
 488                 return;
 489
 490         /* Let's first remove all journal files that might already have completed closing */
 491         server_process_deferred_closes(s);
 492
 493         /* And now, let's close some more until we reach the limit again. */
 494         while (set_size(s->deferred_closes) >= DEFERRED_CLOSES_MAX) {
 495                 JournalFile *f;
 496
 497                 assert_se(f = set_steal_first(s->deferred_closes));
 498                 journal_file_close(f);
 499         }
 500 }
 501
 502 static int open_user_journal_directory(Server *s, DIR **ret_dir, char **ret_path) {
 503         _cleanup_closedir_ DIR *dir = NULL;
 504         _cleanup_free_ char *path = NULL;
 505         sd_id128_t machine;
 506         int r;
 507
 508         assert(s);
 509
 510         r = sd_id128_get_machine(&machine);
 511         if (r < 0)
 512                 return log_error_errno(r, "Failed to determine machine ID, ignoring: %m");
 513
 514         if (asprintf(&path, "/var/log/journal/" SD_ID128_FORMAT_STR "/", SD_ID128_FORMAT_VAL(machine)) < 0)
 515                 return log_oom();
 516
 517         dir = opendir(path);
 518         if (!dir)
 519                 return log_error_errno(errno, "Failed to open user journal directory '%s': %m", path);
 520
 521         if (ret_dir)
 522                 *ret_dir = TAKE_PTR(dir);
 523         if (ret_path)
 524                 *ret_path = TAKE_PTR(path);
 525
 526         return 0;
 527 }
 528
 529 void server_rotate(Server *s) {
 530         _cleanup_free_ char *path = NULL;
 531         _cleanup_closedir_ DIR *d = NULL;
 532         JournalFile *f;
 533         Iterator i;
 534         void *k;
 535         int r;
 536
 537         log_debug("Rotating...");
 538
 539         /* First, rotate the system journal (either in its runtime flavour or in its runtime flavour) */
 540         (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
 541         (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
 542
 543         /* Then, rotate all user journals we have open (keeping them open) */
 544         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
 545                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
 546                 if (r >= 0)
 547                         ordered_hashmap_replace(s->user_journals, k, f);
 548                 else if (!f)
 549                         /* Old file has been closed and deallocated */
 550                         ordered_hashmap_remove(s->user_journals, k);
 551         }
 552
 553         /* Finally, also rotate all user journals we currently do not have open. (But do so only if we actually have
 554          * access to /var, i.e. are not in the log-to-runtime-journal mode). */
 555         if (!s->runtime_journal &&
 556             open_user_journal_directory(s, &d, &path) >= 0) {
 557
 558                 struct dirent *de;
 559
 560                 FOREACH_DIRENT(de, d, log_warning_errno(errno, "Failed to enumerate %s, ignoring: %m", path)) {
 561                         _cleanup_free_ char *u = NULL, *full = NULL;
 562                         _cleanup_close_ int fd = -1;
 563                         const char *a, *b;
 564                         uid_t uid;
 565
 566                         a = startswith(de->d_name, "user-");
 567                         if (!a)
 568                                 continue;
 569                         b = endswith(de->d_name, ".journal");
 570                         if (!b)
 571                                 continue;
 572
 573                         u = strndup(a, b-a);
 574                         if (!u) {
 575                                 log_oom();
 576                                 break;
 577                         }
 578
 579                         r = parse_uid(u, &uid);
 580                         if (r < 0) {
 581                                 log_debug_errno(r, "Failed to parse UID from file name '%s', ignoring: %m", de->d_name);
 582                                 continue;
 583                         }
 584
 585                         /* Already rotated in the above loop? i.e. is it an open user journal? */
 586                         if (ordered_hashmap_contains(s->user_journals, UID_TO_PTR(uid)))
 587                                 continue;
 588
 589                         full = strjoin(path, de->d_name);
 590                         if (!full) {
 591                                 log_oom();
 592                                 break;
 593                         }
 594
 595                         fd = openat(dirfd(d), de->d_name, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK);
 596                         if (fd < 0) {
 597                                 log_full_errno(IN_SET(errno, ELOOP, ENOENT) ? LOG_DEBUG : LOG_WARNING, errno,
 598                                                "Failed to open journal file '%s' for rotation: %m", full);
 599                                 continue;
 600                         }
 601
 602                         /* Make some room in the set of deferred close()s */
 603                         server_vacuum_deferred_closes(s);
 604
 605                         /* Open the file briefly, so that we can archive it */
 606                         r = journal_file_open(fd,
 607                                               full,
 608                                               O_RDWR,
 609                                               0640,
 610                                               s->compress.enabled,
 611                                               s->compress.threshold_bytes,
 612                                               s->seal,
 613                                               &s->system_storage.metrics,
 614                                               s->mmap,
 615                                               s->deferred_closes,
 616                                               NULL,
 617                                               &f);
 618                         if (r < 0) {
 619                                 log_warning_errno(r, "Failed to read journal file %s for rotation, trying to move it out of the way: %m", full);
 620
 621                                 r = journal_file_dispose(dirfd(d), de->d_name);
 622                                 if (r < 0)
 623                                         log_warning_errno(r, "Failed to move %s out of the way, ignoring: %m", full);
 624                                 else
 625                                         log_debug("Successfully moved %s out of the way.", full);
 626
 627                                 continue;
 628                         }
 629
 630                         TAKE_FD(fd); /* Donated to journal_file_open() */
 631
 632                         r = journal_file_archive(f);
 633                         if (r < 0)
 634                                 log_debug_errno(r, "Failed to archive journal file '%s', ignoring: %m", full);
 635
 636                         f = journal_initiate_close(f, s->deferred_closes);
 637                 }
 638         }
 639
 640         server_process_deferred_closes(s);
 641 }
 642
 643 void server_sync(Server *s) {
 644         JournalFile *f;
 645         Iterator i;
 646         int r;
 647
 648         if (s->system_journal) {
 649                 r = journal_file_set_offline(s->system_journal, false);
 650                 if (r < 0)
 651                         log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
 652         }
 653
 654         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
 655                 r = journal_file_set_offline(f, false);
 656                 if (r < 0)
 657                         log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
 658         }
 659
 660         if (s->sync_event_source) {
 661                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
 662                 if (r < 0)
 663                         log_error_errno(r, "Failed to disable sync timer source: %m");
 664         }
 665
 666         s->sync_scheduled = false;
 667 }
 668
 669 static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
 670
 671         int r;
 672
 673         assert(s);
 674         assert(storage);
 675
 676         (void) cache_space_refresh(s, storage);
 677
 678         if (verbose)
 679                 server_space_usage_message(s, storage);
 680
 681         r = journal_directory_vacuum(storage->path, storage->space.limit,
 682                                      storage->metrics.n_max_files, s->max_retention_usec,
 683                                      &s->oldest_file_usec, verbose);
 684         if (r < 0 && r != -ENOENT)
 685                 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
 686
 687         cache_space_invalidate(&storage->space);
 688 }
 689
 690 int server_vacuum(Server *s, bool verbose) {
 691         assert(s);
 692
 693         log_debug("Vacuuming...");
 694
 695         s->oldest_file_usec = 0;
 696
 697         if (s->system_journal)
 698                 do_vacuum(s, &s->system_storage, verbose);
 699         if (s->runtime_journal)
 700                 do_vacuum(s, &s->runtime_storage, verbose);
 701
 702         return 0;
 703 }
 704
 705 static void server_cache_machine_id(Server *s) {
 706         sd_id128_t id;
 707         int r;
 708
 709         assert(s);
 710
 711         r = sd_id128_get_machine(&id);
 712         if (r < 0)
 713                 return;
 714
 715         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
 716 }
 717
 718 static void server_cache_boot_id(Server *s) {
 719         sd_id128_t id;
 720         int r;
 721
 722         assert(s);
 723
 724         r = sd_id128_get_boot(&id);
 725         if (r < 0)
 726                 return;
 727
 728         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
 729 }
 730
 731 static void server_cache_hostname(Server *s) {
 732         _cleanup_free_ char *t = NULL;
 733         char *x;
 734
 735         assert(s);
 736
 737         t = gethostname_malloc();
 738         if (!t)
 739                 return;
 740
 741         x = strappend("_HOSTNAME=", t);
 742         if (!x)
 743                 return;
 744
 745         free(s->hostname_field);
 746         s->hostname_field = x;
 747 }
 748
 749 static bool shall_try_append_again(JournalFile *f, int r) {
 750         switch(r) {
 751
 752         case -E2BIG:           /* Hit configured limit          */
 753         case -EFBIG:           /* Hit fs limit                  */
 754         case -EDQUOT:          /* Quota limit hit               */
 755         case -ENOSPC:          /* Disk full                     */
 756                 log_debug("%s: Allocation limit reached, rotating.", f->path);
 757                 return true;
 758
 759         case -EIO:             /* I/O error of some kind (mmap) */
 760                 log_warning("%s: IO error, rotating.", f->path);
 761                 return true;
 762
 763         case -EHOSTDOWN:       /* Other machine                 */
 764                 log_info("%s: Journal file from other machine, rotating.", f->path);
 765                 return true;
 766
 767         case -EBUSY:           /* Unclean shutdown              */
 768                 log_info("%s: Unclean shutdown, rotating.", f->path);
 769                 return true;
 770
 771         case -EPROTONOSUPPORT: /* Unsupported feature           */
 772                 log_info("%s: Unsupported feature, rotating.", f->path);
 773                 return true;
 774
 775         case -EBADMSG:         /* Corrupted                     */
 776         case -ENODATA:         /* Truncated                     */
 777         case -ESHUTDOWN:       /* Already archived              */
 778                 log_warning("%s: Journal file corrupted, rotating.", f->path);
 779                 return true;
 780
 781         case -EIDRM:           /* Journal file has been deleted */
 782                 log_warning("%s: Journal file has been deleted, rotating.", f->path);
 783                 return true;
 784
 785         case -ETXTBSY:         /* Journal file is from the future */
 786                 log_warning("%s: Journal file is from the future, rotating.", f->path);
 787                 return true;
 788
 789         default:
 790                 return false;
 791         }
 792 }
 793
 794 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n, int priority) {
 795         bool vacuumed = false, rotate = false;
 796         struct dual_timestamp ts;
 797         JournalFile *f;
 798         int r;
 799
 800         assert(s);
 801         assert(iovec);
 802         assert(n > 0);
 803
 804         /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
 805          * the source time, and not even the time the event was originally seen, but instead simply the time we started
 806          * processing it, as we want strictly linear ordering in what we write out.) */
 807         assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
 808         assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
 809
 810         if (ts.realtime < s->last_realtime_clock) {
 811                 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
 812                  * regular operation. However, when it does happen, then we should make sure that we start fresh files
 813                  * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
 814                  * bisection works correctly. */
 815
 816                 log_debug("Time jumped backwards, rotating.");
 817                 rotate = true;
 818         } else {
 819
 820                 f = find_journal(s, uid);
 821                 if (!f)
 822                         return;
 823
 824                 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
 825                         log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
 826                         rotate = true;
 827                 }
 828         }
 829
 830         if (rotate) {
 831                 server_rotate(s);
 832                 server_vacuum(s, false);
 833                 vacuumed = true;
 834
 835                 f = find_journal(s, uid);
 836                 if (!f)
 837                         return;
 838         }
 839
 840         s->last_realtime_clock = ts.realtime;
 841
 842         r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
 843         if (r >= 0) {
 844                 server_schedule_sync(s, priority);
 845                 return;
 846         }
 847
 848         if (vacuumed || !shall_try_append_again(f, r)) {
 849                 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
 850                 return;
 851         }
 852
 853         server_rotate(s);
 854         server_vacuum(s, false);
 855
 856         f = find_journal(s, uid);
 857         if (!f)
 858                 return;
 859
 860         log_debug("Retrying write.");
 861         r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
 862         if (r < 0)
 863                 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
 864         else
 865                 server_schedule_sync(s, priority);
 866 }
 867
 868 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field)  \
 869         if (isset(value)) {                                             \
 870                 char *k;                                                \
 871                 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
 872                 sprintf(k, field "=" format, value);                    \
 873                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 874         }
 875
 876 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field)                  \
 877         if (!isempty(value)) {                                          \
 878                 char *k;                                                \
 879                 k = strjoina(field "=", value);                         \
 880                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 881         }
 882
 883 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field)                   \
 884         if (!sd_id128_is_null(value)) {                                 \
 885                 char *k;                                                \
 886                 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
 887                 sd_id128_to_string(value, stpcpy(k, field "="));        \
 888                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 889         }
 890
 891 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field)       \
 892         if (value_size > 0) {                                           \
 893                 char *k;                                                \
 894                 k = newa(char, STRLEN(field "=") + value_size + 1);     \
 895                 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
 896                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 897         }                                                               \
 898
 899 static void dispatch_message_real(
 900                 Server *s,
 901                 struct iovec *iovec, size_t n, size_t m,
 902                 const ClientContext *c,
 903                 const struct timeval *tv,
 904                 int priority,
 905                 pid_t object_pid) {
 906
 907         char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
 908         uid_t journal_uid;
 909         ClientContext *o;
 910
 911         assert(s);
 912         assert(iovec);
 913         assert(n > 0);
 914         assert(n +
 915                N_IOVEC_META_FIELDS +
 916                (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) +
 917                client_context_extra_fields_n_iovec(c) <= m);
 918
 919         if (c) {
 920                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
 921                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
 922                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
 923
 924                 IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM");
 925                 IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE");
 926                 IOVEC_ADD_STRING_FIELD(iovec, n, c->cmdline, "_CMDLINE");
 927                 IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE");
 928
 929                 IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
 930
 931                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
 932                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
 933
 934                 IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP");
 935                 IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
 936                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
 937                 IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT");
 938                 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
 939                 IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
 940                 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
 941
 942                 IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
 943
 944                 if (c->extra_fields_n_iovec > 0) {
 945                         memcpy(iovec + n, c->extra_fields_iovec, c->extra_fields_n_iovec * sizeof(struct iovec));
 946                         n += c->extra_fields_n_iovec;
 947                 }
 948         }
 949
 950         assert(n <= m);
 951
 952         if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
 953
 954                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
 955                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
 956                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
 957
 958                 IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
 959                 IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
 960                 IOVEC_ADD_STRING_FIELD(iovec, n, o->cmdline, "OBJECT_CMDLINE");
 961                 IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
 962
 963                 IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
 964
 965                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
 966                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
 967
 968                 IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
 969                 IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
 970                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
 971                 IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
 972                 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
 973                 IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
 974                 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
 975
 976                 IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
 977         }
 978
 979         assert(n <= m);
 980
 981         if (tv) {
 982                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
 983                 iovec[n++] = IOVEC_MAKE_STRING(source_time);
 984         }
 985
 986         /* Note that strictly speaking storing the boot id here is
 987          * redundant since the entry includes this in-line
 988          * anyway. However, we need this indexed, too. */
 989         if (!isempty(s->boot_id_field))
 990                 iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
 991
 992         if (!isempty(s->machine_id_field))
 993                 iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
 994
 995         if (!isempty(s->hostname_field))
 996                 iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
 997
 998         assert(n <= m);
 999
1000         if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
1001                 /* Split up strictly by (non-root) UID */
1002                 journal_uid = c->uid;
1003         else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
1004                 /* Split up by login UIDs.  We do this only if the
1005                  * realuid is not root, in order not to accidentally
1006                  * leak privileged information to the user that is
1007                  * logged by a privileged process that is part of an
1008                  * unprivileged session. */
1009                 journal_uid = c->owner_uid;
1010         else
1011                 journal_uid = 0;
1012
1013         write_to_journal(s, journal_uid, iovec, n, priority);
1014 }
1015
1016 void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) {
1017
1018         struct iovec *iovec;
1019         size_t n = 0, k, m;
1020         va_list ap;
1021         int r;
1022
1023         assert(s);
1024         assert(format);
1025
1026         m = N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS + client_context_extra_fields_n_iovec(s->my_context) + N_IOVEC_OBJECT_FIELDS;
1027         iovec = newa(struct iovec, m);
1028
1029         assert_cc(3 == LOG_FAC(LOG_DAEMON));
1030         iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
1031         iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
1032
1033         iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
1034         assert_cc(6 == LOG_INFO);
1035         iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
1036
1037         if (message_id)
1038                 iovec[n++] = IOVEC_MAKE_STRING(message_id);
1039         k = n;
1040
1041         va_start(ap, format);
1042         r = log_format_iovec(iovec, m, &n, false, 0, format, ap);
1043         /* Error handling below */
1044         va_end(ap);
1045
1046         if (r >= 0)
1047                 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
1048
1049         while (k < n)
1050                 free(iovec[k++].iov_base);
1051
1052         if (r < 0) {
1053                 /* We failed to format the message. Emit a warning instead. */
1054                 char buf[LINE_MAX];
1055
1056                 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
1057
1058                 n = 3;
1059                 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
1060                 iovec[n++] = IOVEC_MAKE_STRING(buf);
1061                 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
1062         }
1063 }
1064
1065 void server_dispatch_message(
1066                 Server *s,
1067                 struct iovec *iovec, size_t n, size_t m,
1068                 ClientContext *c,
1069                 const struct timeval *tv,
1070                 int priority,
1071                 pid_t object_pid) {
1072
1073         uint64_t available = 0;
1074         int rl;
1075
1076         assert(s);
1077         assert(iovec || n == 0);
1078
1079         if (n == 0)
1080                 return;
1081
1082         if (LOG_PRI(priority) > s->max_level_store)
1083                 return;
1084
1085         /* Stop early in case the information will not be stored
1086          * in a journal. */
1087         if (s->storage == STORAGE_NONE)
1088                 return;
1089
1090         if (c && c->unit) {
1091                 (void) determine_space(s, &available, NULL);
1092
1093                 rl = journal_rate_limit_test(s->rate_limit, c->unit, c->log_rate_limit_interval, c->log_rate_limit_burst, priority & LOG_PRIMASK, available);
1094                 if (rl == 0)
1095                         return;
1096
1097                 /* Write a suppression message if we suppressed something */
1098                 if (rl > 1)
1099                         server_driver_message(s, c->pid,
1100                                               "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
1101                                               LOG_MESSAGE("Suppressed %i messages from %s", rl - 1, c->unit),
1102                                               "N_DROPPED=%i", rl - 1,
1103                                               NULL);
1104         }
1105
1106         dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
1107 }
1108
1109 int server_flush_to_var(Server *s, bool require_flag_file) {
1110         sd_id128_t machine;
1111         sd_journal *j = NULL;
1112         char ts[FORMAT_TIMESPAN_MAX];
1113         usec_t start;
1114         unsigned n = 0;
1115         int r;
1116
1117         assert(s);
1118
1119         if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
1120                 return 0;
1121
1122         if (!s->runtime_journal)
1123                 return 0;
1124
1125         if (require_flag_file && !flushed_flag_is_set())
1126                 return 0;
1127
1128         (void) system_journal_open(s, true);
1129
1130         if (!s->system_journal)
1131                 return 0;
1132
1133         log_debug("Flushing to /var...");
1134
1135         start = now(CLOCK_MONOTONIC);
1136
1137         r = sd_id128_get_machine(&machine);
1138         if (r < 0)
1139                 return r;
1140
1141         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1142         if (r < 0)
1143                 return log_error_errno(r, "Failed to read runtime journal: %m");
1144
1145         sd_journal_set_data_threshold(j, 0);
1146
1147         SD_JOURNAL_FOREACH(j) {
1148                 Object *o = NULL;
1149                 JournalFile *f;
1150
1151                 f = j->current_file;
1152                 assert(f && f->current_offset > 0);
1153
1154                 n++;
1155
1156                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1157                 if (r < 0) {
1158                         log_error_errno(r, "Can't read entry: %m");
1159                         goto finish;
1160                 }
1161
1162                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
1163                 if (r >= 0)
1164                         continue;
1165
1166                 if (!shall_try_append_again(s->system_journal, r)) {
1167                         log_error_errno(r, "Can't write entry: %m");
1168                         goto finish;
1169                 }
1170
1171                 server_rotate(s);
1172                 server_vacuum(s, false);
1173
1174                 if (!s->system_journal) {
1175                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1176                         r = -EIO;
1177                         goto finish;
1178                 }
1179
1180                 log_debug("Retrying write.");
1181                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
1182                 if (r < 0) {
1183                         log_error_errno(r, "Can't write entry: %m");
1184                         goto finish;
1185                 }
1186         }
1187
1188         r = 0;
1189
1190 finish:
1191         if (s->system_journal)
1192                 journal_file_post_change(s->system_journal);
1193
1194         s->runtime_journal = journal_file_close(s->runtime_journal);
1195
1196         if (r >= 0)
1197                 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1198
1199         sd_journal_close(j);
1200
1201         server_driver_message(s, 0, NULL,
1202                               LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1203                                           format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1204                                           n),
1205                               NULL);
1206
1207         return r;
1208 }
1209
1210 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1211         Server *s = userdata;
1212         struct ucred *ucred = NULL;
1213         struct timeval *tv = NULL;
1214         struct cmsghdr *cmsg;
1215         char *label = NULL;
1216         size_t label_len = 0, m;
1217         struct iovec iovec;
1218         ssize_t n;
1219         int *fds = NULL, v = 0;
1220         size_t n_fds = 0;
1221
1222         union {
1223                 struct cmsghdr cmsghdr;
1224
1225                 /* We use NAME_MAX space for the SELinux label
1226                  * here. The kernel currently enforces no
1227                  * limit, but according to suggestions from
1228                  * the SELinux people this will change and it
1229                  * will probably be identical to NAME_MAX. For
1230                  * now we use that, but this should be updated
1231                  * one day when the final limit is known. */
1232                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1233                             CMSG_SPACE(sizeof(struct timeval)) +
1234                             CMSG_SPACE(sizeof(int)) + /* fd */
1235                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
1236         } control = {};
1237
1238         union sockaddr_union sa = {};
1239
1240         struct msghdr msghdr = {
1241                 .msg_iov = &iovec,
1242                 .msg_iovlen = 1,
1243                 .msg_control = &control,
1244                 .msg_controllen = sizeof(control),
1245                 .msg_name = &sa,
1246                 .msg_namelen = sizeof(sa),
1247         };
1248
1249         assert(s);
1250         assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1251
1252         if (revents != EPOLLIN)
1253                 return log_error_errno(SYNTHETIC_ERRNO(EIO),
1254                                        "Got invalid event from epoll for datagram fd: %" PRIx32,
1255                                        revents);
1256
1257         /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1258          * it.) */
1259         (void) ioctl(fd, SIOCINQ, &v);
1260
1261         /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1262         m = PAGE_ALIGN(MAX3((size_t) v + 1,
1263                             (size_t) LINE_MAX,
1264                             ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1265
1266         if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1267                 return log_oom();
1268
1269         iovec = IOVEC_MAKE(s->buffer, s->buffer_size - 1); /* Leave room for trailing NUL we add later */
1270
1271         n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1272         if (n < 0) {
1273                 if (IN_SET(errno, EINTR, EAGAIN))
1274                         return 0;
1275
1276                 return log_error_errno(errno, "recvmsg() failed: %m");
1277         }
1278
1279         CMSG_FOREACH(cmsg, &msghdr)
1280                 if (cmsg->cmsg_level == SOL_SOCKET &&
1281                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1282                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1283                         ucred = (struct ucred*) CMSG_DATA(cmsg);
1284                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1285                          cmsg->cmsg_type == SCM_SECURITY) {
1286                         label = (char*) CMSG_DATA(cmsg);
1287                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
1288                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1289                            cmsg->cmsg_type == SO_TIMESTAMP &&
1290                            cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1291                         tv = (struct timeval*) CMSG_DATA(cmsg);
1292                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1293                          cmsg->cmsg_type == SCM_RIGHTS) {
1294                         fds = (int*) CMSG_DATA(cmsg);
1295                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1296                 }
1297
1298         /* And a trailing NUL, just in case */
1299         s->buffer[n] = 0;
1300
1301         if (fd == s->syslog_fd) {
1302                 if (n > 0 && n_fds == 0)
1303                         server_process_syslog_message(s, s->buffer, n, ucred, tv, label, label_len);
1304                 else if (n_fds > 0)
1305                         log_warning("Got file descriptors via syslog socket. Ignoring.");
1306
1307         } else if (fd == s->native_fd) {
1308                 if (n > 0 && n_fds == 0)
1309                         server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1310                 else if (n == 0 && n_fds == 1)
1311                         server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1312                 else if (n_fds > 0)
1313                         log_warning("Got too many file descriptors via native socket. Ignoring.");
1314
1315         } else {
1316                 assert(fd == s->audit_fd);
1317
1318                 if (n > 0 && n_fds == 0)
1319                         server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1320                 else if (n_fds > 0)
1321                         log_warning("Got file descriptors via audit socket. Ignoring.");
1322         }
1323
1324         close_many(fds, n_fds);
1325         return 0;
1326 }
1327
1328 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1329         Server *s = userdata;
1330         int r;
1331
1332         assert(s);
1333
1334         log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1335
1336         (void) server_flush_to_var(s, false);
1337         server_sync(s);
1338         server_vacuum(s, false);
1339
1340         r = touch("/run/systemd/journal/flushed");
1341         if (r < 0)
1342                 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1343
1344         server_space_usage_message(s, NULL);
1345         return 0;
1346 }
1347
1348 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1349         Server *s = userdata;
1350         int r;
1351
1352         assert(s);
1353
1354         log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1355         server_rotate(s);
1356         server_vacuum(s, true);
1357
1358         if (s->system_journal)
1359                 patch_min_use(&s->system_storage);
1360         if (s->runtime_journal)
1361                 patch_min_use(&s->runtime_storage);
1362
1363         /* Let clients know when the most recent rotation happened. */
1364         r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1365         if (r < 0)
1366                 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1367
1368         return 0;
1369 }
1370
1371 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1372         Server *s = userdata;
1373
1374         assert(s);
1375
1376         log_received_signal(LOG_INFO, si);
1377
1378         sd_event_exit(s->event, 0);
1379         return 0;
1380 }
1381
1382 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1383         Server *s = userdata;
1384         int r;
1385
1386         assert(s);
1387
1388         log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1389
1390         server_sync(s);
1391
1392         /* Let clients know when the most recent sync happened. */
1393         r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1394         if (r < 0)
1395                 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1396
1397         return 0;
1398 }
1399
1400 static int setup_signals(Server *s) {
1401         int r;
1402
1403         assert(s);
1404
1405         assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1406
1407         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1408         if (r < 0)
1409                 return r;
1410
1411         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1412         if (r < 0)
1413                 return r;
1414
1415         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1416         if (r < 0)
1417                 return r;
1418
1419         /* Let's process SIGTERM late, so that we flush all queued
1420          * messages to disk before we exit */
1421         r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1422         if (r < 0)
1423                 return r;
1424
1425         /* When journald is invoked on the terminal (when debugging),
1426          * it's useful if C-c is handled equivalent to SIGTERM. */
1427         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1428         if (r < 0)
1429                 return r;
1430
1431         r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1432         if (r < 0)
1433                 return r;
1434
1435         /* SIGRTMIN+1 causes an immediate sync. We process this very
1436          * late, so that everything else queued at this point is
1437          * really written to disk. Clients can watch
1438          * /run/systemd/journal/synced with inotify until its mtime
1439          * changes to see when a sync happened. */
1440         r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1441         if (r < 0)
1442                 return r;
1443
1444         r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1445         if (r < 0)
1446                 return r;
1447
1448         return 0;
1449 }
1450
1451 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1452         Server *s = data;
1453         int r;
1454
1455         assert(s);
1456
1457         if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1458
1459                 r = value ? parse_boolean(value) : true;
1460                 if (r < 0)
1461                         log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1462                 else
1463                         s->forward_to_syslog = r;
1464
1465         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1466
1467                 r = value ? parse_boolean(value) : true;
1468                 if (r < 0)
1469                         log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1470                 else
1471                         s->forward_to_kmsg = r;
1472
1473         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1474
1475                 r = value ? parse_boolean(value) : true;
1476                 if (r < 0)
1477                         log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1478                 else
1479                         s->forward_to_console = r;
1480
1481         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1482
1483                 r = value ? parse_boolean(value) : true;
1484                 if (r < 0)
1485                         log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1486                 else
1487                         s->forward_to_wall = r;
1488
1489         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1490
1491                 if (proc_cmdline_value_missing(key, value))
1492                         return 0;
1493
1494                 r = log_level_from_string(value);
1495                 if (r < 0)
1496                         log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1497                 else
1498                         s->max_level_console = r;
1499
1500         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1501
1502                 if (proc_cmdline_value_missing(key, value))
1503                         return 0;
1504
1505                 r = log_level_from_string(value);
1506                 if (r < 0)
1507                         log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1508                 else
1509                         s->max_level_store = r;
1510
1511         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1512
1513                 if (proc_cmdline_value_missing(key, value))
1514                         return 0;
1515
1516                 r = log_level_from_string(value);
1517                 if (r < 0)
1518                         log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1519                 else
1520                         s->max_level_syslog = r;
1521
1522         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1523
1524                 if (proc_cmdline_value_missing(key, value))
1525                         return 0;
1526
1527                 r = log_level_from_string(value);
1528                 if (r < 0)
1529                         log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1530                 else
1531                         s->max_level_kmsg = r;
1532
1533         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1534
1535                 if (proc_cmdline_value_missing(key, value))
1536                         return 0;
1537
1538                 r = log_level_from_string(value);
1539                 if (r < 0)
1540                         log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1541                 else
1542                         s->max_level_wall = r;
1543
1544         } else if (startswith(key, "systemd.journald"))
1545                 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
1546
1547         /* do not warn about state here, since probably systemd already did */
1548         return 0;
1549 }
1550
1551 static int server_parse_config_file(Server *s) {
1552         assert(s);
1553
1554         return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
1555                                         CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1556                                         "Journal\0",
1557                                         config_item_perf_lookup, journald_gperf_lookup,
1558                                         CONFIG_PARSE_WARN, s);
1559 }
1560
1561 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1562         Server *s = userdata;
1563
1564         assert(s);
1565
1566         server_sync(s);
1567         return 0;
1568 }
1569
1570 int server_schedule_sync(Server *s, int priority) {
1571         int r;
1572
1573         assert(s);
1574
1575         if (priority <= LOG_CRIT) {
1576                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1577                 server_sync(s);
1578                 return 0;
1579         }
1580
1581         if (s->sync_scheduled)
1582                 return 0;
1583
1584         if (s->sync_interval_usec > 0) {
1585                 usec_t when;
1586
1587                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1588                 if (r < 0)
1589                         return r;
1590
1591                 when += s->sync_interval_usec;
1592
1593                 if (!s->sync_event_source) {
1594                         r = sd_event_add_time(
1595                                         s->event,
1596                                         &s->sync_event_source,
1597                                         CLOCK_MONOTONIC,
1598                                         when, 0,
1599                                         server_dispatch_sync, s);
1600                         if (r < 0)
1601                                 return r;
1602
1603                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1604                 } else {
1605                         r = sd_event_source_set_time(s->sync_event_source, when);
1606                         if (r < 0)
1607                                 return r;
1608
1609                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1610                 }
1611                 if (r < 0)
1612                         return r;
1613
1614                 s->sync_scheduled = true;
1615         }
1616
1617         return 0;
1618 }
1619
1620 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1621         Server *s = userdata;
1622
1623         assert(s);
1624
1625         server_cache_hostname(s);
1626         return 0;
1627 }
1628
1629 static int server_open_hostname(Server *s) {
1630         int r;
1631
1632         assert(s);
1633
1634         s->hostname_fd = open("/proc/sys/kernel/hostname",
1635                               O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
1636         if (s->hostname_fd < 0)
1637                 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1638
1639         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1640         if (r < 0) {
1641                 /* kernels prior to 3.2 don't support polling this file. Ignore
1642                  * the failure. */
1643                 if (r == -EPERM) {
1644                         log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1645                         s->hostname_fd = safe_close(s->hostname_fd);
1646                         return 0;
1647                 }
1648
1649                 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1650         }
1651
1652         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1653         if (r < 0)
1654                 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1655
1656         return 0;
1657 }
1658
1659 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1660         Server *s = userdata;
1661         int r;
1662
1663         assert(s);
1664         assert(s->notify_event_source == es);
1665         assert(s->notify_fd == fd);
1666
1667         /* The $NOTIFY_SOCKET is writable again, now send exactly one
1668          * message on it. Either it's the watchdog event, the initial
1669          * READY=1 event or an stdout stream event. If there's nothing
1670          * to write anymore, turn our event source off. The next time
1671          * there's something to send it will be turned on again. */
1672
1673         if (!s->sent_notify_ready) {
1674                 static const char p[] =
1675                         "READY=1\n"
1676                         "STATUS=Processing requests...";
1677                 ssize_t l;
1678
1679                 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1680                 if (l < 0) {
1681                         if (errno == EAGAIN)
1682                                 return 0;
1683
1684                         return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1685                 }
1686
1687                 s->sent_notify_ready = true;
1688                 log_debug("Sent READY=1 notification.");
1689
1690         } else if (s->send_watchdog) {
1691
1692                 static const char p[] =
1693                         "WATCHDOG=1";
1694
1695                 ssize_t l;
1696
1697                 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1698                 if (l < 0) {
1699                         if (errno == EAGAIN)
1700                                 return 0;
1701
1702                         return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1703                 }
1704
1705                 s->send_watchdog = false;
1706                 log_debug("Sent WATCHDOG=1 notification.");
1707
1708         } else if (s->stdout_streams_notify_queue)
1709                 /* Dispatch one stream notification event */
1710                 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1711
1712         /* Leave us enabled if there's still more to do. */
1713         if (s->send_watchdog || s->stdout_streams_notify_queue)
1714                 return 0;
1715
1716         /* There was nothing to do anymore, let's turn ourselves off. */
1717         r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1718         if (r < 0)
1719                 return log_error_errno(r, "Failed to turn off notify event source: %m");
1720
1721         return 0;
1722 }
1723
1724 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1725         Server *s = userdata;
1726         int r;
1727
1728         assert(s);
1729
1730         s->send_watchdog = true;
1731
1732         r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1733         if (r < 0)
1734                 log_warning_errno(r, "Failed to turn on notify event source: %m");
1735
1736         r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1737         if (r < 0)
1738                 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1739
1740         r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1741         if (r < 0)
1742                 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1743
1744         return 0;
1745 }
1746
1747 static int server_connect_notify(Server *s) {
1748         union sockaddr_union sa = {};
1749         const char *e;
1750         int r, salen;
1751
1752         assert(s);
1753         assert(s->notify_fd < 0);
1754         assert(!s->notify_event_source);
1755
1756         /*
1757           So here's the problem: we'd like to send notification
1758           messages to PID 1, but we cannot do that via sd_notify(),
1759           since that's synchronous, and we might end up blocking on
1760           it. Specifically: given that PID 1 might block on
1761           dbus-daemon during IPC, and dbus-daemon is logging to us,
1762           and might hence block on us, we might end up in a deadlock
1763           if we block on sending PID 1 notification messages — by
1764           generating a full blocking circle. To avoid this, let's
1765           create a non-blocking socket, and connect it to the
1766           notification socket, and then wait for POLLOUT before we
1767           send anything. This should efficiently avoid any deadlocks,
1768           as we'll never block on PID 1, hence PID 1 can safely block
1769           on dbus-daemon which can safely block on us again.
1770
1771           Don't think that this issue is real? It is, see:
1772           https://github.com/systemd/systemd/issues/1505
1773         */
1774
1775         e = getenv("NOTIFY_SOCKET");
1776         if (!e)
1777                 return 0;
1778
1779         salen = sockaddr_un_set_path(&sa.un, e);
1780         if (salen < 0)
1781                 return log_error_errno(salen, "NOTIFY_SOCKET set to invalid value '%s': %m", e);
1782
1783         s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1784         if (s->notify_fd < 0)
1785                 return log_error_errno(errno, "Failed to create notify socket: %m");
1786
1787         (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1788
1789         r = connect(s->notify_fd, &sa.sa, salen);
1790         if (r < 0)
1791                 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1792
1793         r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1794         if (r < 0)
1795                 return log_error_errno(r, "Failed to watch notification socket: %m");
1796
1797         if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1798                 s->send_watchdog = true;
1799
1800                 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1801                 if (r < 0)
1802                         return log_error_errno(r, "Failed to add watchdog time event: %m");
1803         }
1804
1805         /* This should fire pretty soon, which we'll use to send the
1806          * READY=1 event. */
1807
1808         return 0;
1809 }
1810
1811 int server_init(Server *s) {
1812         _cleanup_fdset_free_ FDSet *fds = NULL;
1813         int n, r, fd;
1814         bool no_sockets;
1815
1816         assert(s);
1817
1818         zero(*s);
1819         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1820         s->compress.enabled = true;
1821         s->compress.threshold_bytes = (uint64_t) -1;
1822         s->seal = true;
1823         s->read_kmsg = true;
1824
1825         s->watchdog_usec = USEC_INFINITY;
1826
1827         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1828         s->sync_scheduled = false;
1829
1830         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1831         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1832
1833         s->forward_to_wall = true;
1834
1835         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1836
1837         s->max_level_store = LOG_DEBUG;
1838         s->max_level_syslog = LOG_DEBUG;
1839         s->max_level_kmsg = LOG_NOTICE;
1840         s->max_level_console = LOG_INFO;
1841         s->max_level_wall = LOG_EMERG;
1842
1843         s->line_max = DEFAULT_LINE_MAX;
1844
1845         journal_reset_metrics(&s->system_storage.metrics);
1846         journal_reset_metrics(&s->runtime_storage.metrics);
1847
1848         server_parse_config_file(s);
1849
1850         r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1851         if (r < 0)
1852                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
1853
1854         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1855                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1856                           s->rate_limit_interval, s->rate_limit_burst);
1857                 s->rate_limit_interval = s->rate_limit_burst = 0;
1858         }
1859
1860         (void) mkdir_p("/run/systemd/journal", 0755);
1861
1862         s->user_journals = ordered_hashmap_new(NULL);
1863         if (!s->user_journals)
1864                 return log_oom();
1865
1866         s->mmap = mmap_cache_new();
1867         if (!s->mmap)
1868                 return log_oom();
1869
1870         s->deferred_closes = set_new(NULL);
1871         if (!s->deferred_closes)
1872                 return log_oom();
1873
1874         r = sd_event_default(&s->event);
1875         if (r < 0)
1876                 return log_error_errno(r, "Failed to create event loop: %m");
1877
1878         n = sd_listen_fds(true);
1879         if (n < 0)
1880                 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1881
1882         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1883
1884                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1885
1886                         if (s->native_fd >= 0)
1887                                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1888                                                        "Too many native sockets passed.");
1889
1890                         s->native_fd = fd;
1891
1892                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1893
1894                         if (s->stdout_fd >= 0)
1895                                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1896                                                        "Too many stdout sockets passed.");
1897
1898                         s->stdout_fd = fd;
1899
1900                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1901                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1902
1903                         if (s->syslog_fd >= 0)
1904                                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1905                                                        "Too many /dev/log sockets passed.");
1906
1907                         s->syslog_fd = fd;
1908
1909                 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1910
1911                         if (s->audit_fd >= 0)
1912                                 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1913                                                        "Too many audit sockets passed.");
1914
1915                         s->audit_fd = fd;
1916
1917                 } else {
1918
1919                         if (!fds) {
1920                                 fds = fdset_new();
1921                                 if (!fds)
1922                                         return log_oom();
1923                         }
1924
1925                         r = fdset_put(fds, fd);
1926                         if (r < 0)
1927                                 return log_oom();
1928                 }
1929         }
1930
1931         /* Try to restore streams, but don't bother if this fails */
1932         (void) server_restore_streams(s, fds);
1933
1934         if (fdset_size(fds) > 0) {
1935                 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1936                 fds = fdset_free(fds);
1937         }
1938
1939         no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1940
1941         /* always open stdout, syslog, native, and kmsg sockets */
1942
1943         /* systemd-journald.socket: /run/systemd/journal/stdout */
1944         r = server_open_stdout_socket(s);
1945         if (r < 0)
1946                 return r;
1947
1948         /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1949         r = server_open_syslog_socket(s);
1950         if (r < 0)
1951                 return r;
1952
1953         /* systemd-journald.socket: /run/systemd/journal/socket */
1954         r = server_open_native_socket(s);
1955         if (r < 0)
1956                 return r;
1957
1958         /* /dev/kmsg */
1959         r = server_open_dev_kmsg(s);
1960         if (r < 0)
1961                 return r;
1962
1963         /* Unless we got *some* sockets and not audit, open audit socket */
1964         if (s->audit_fd >= 0 || no_sockets) {
1965                 r = server_open_audit(s);
1966                 if (r < 0)
1967                         return r;
1968         }
1969
1970         r = server_open_kernel_seqnum(s);
1971         if (r < 0)
1972                 return r;
1973
1974         r = server_open_hostname(s);
1975         if (r < 0)
1976                 return r;
1977
1978         r = setup_signals(s);
1979         if (r < 0)
1980                 return r;
1981
1982         s->rate_limit = journal_rate_limit_new();
1983         if (!s->rate_limit)
1984                 return -ENOMEM;
1985
1986         r = cg_get_root_path(&s->cgroup_root);
1987         if (r < 0)
1988                 return r;
1989
1990         server_cache_hostname(s);
1991         server_cache_boot_id(s);
1992         server_cache_machine_id(s);
1993
1994         s->runtime_storage.name = "Runtime journal";
1995         s->system_storage.name = "System journal";
1996
1997         s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
1998         s->system_storage.path  = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
1999         if (!s->runtime_storage.path || !s->system_storage.path)
2000                 return -ENOMEM;
2001
2002         (void) server_connect_notify(s);
2003
2004         (void) client_context_acquire_default(s);
2005
2006         return system_journal_open(s, false);
2007 }
2008
2009 void server_maybe_append_tags(Server *s) {
2010 #if HAVE_GCRYPT
2011         JournalFile *f;
2012         Iterator i;
2013         usec_t n;
2014
2015         n = now(CLOCK_REALTIME);
2016
2017         if (s->system_journal)
2018                 journal_file_maybe_append_tag(s->system_journal, n);
2019
2020         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
2021                 journal_file_maybe_append_tag(f, n);
2022 #endif
2023 }
2024
2025 void server_done(Server *s) {
2026         assert(s);
2027
2028         set_free_with_destructor(s->deferred_closes, journal_file_close);
2029
2030         while (s->stdout_streams)
2031                 stdout_stream_free(s->stdout_streams);
2032
2033         client_context_flush_all(s);
2034
2035         if (s->system_journal)
2036                 (void) journal_file_close(s->system_journal);
2037
2038         if (s->runtime_journal)
2039                 (void) journal_file_close(s->runtime_journal);
2040
2041         ordered_hashmap_free_with_destructor(s->user_journals, journal_file_close);
2042
2043         sd_event_source_unref(s->syslog_event_source);
2044         sd_event_source_unref(s->native_event_source);
2045         sd_event_source_unref(s->stdout_event_source);
2046         sd_event_source_unref(s->dev_kmsg_event_source);
2047         sd_event_source_unref(s->audit_event_source);
2048         sd_event_source_unref(s->sync_event_source);
2049         sd_event_source_unref(s->sigusr1_event_source);
2050         sd_event_source_unref(s->sigusr2_event_source);
2051         sd_event_source_unref(s->sigterm_event_source);
2052         sd_event_source_unref(s->sigint_event_source);
2053         sd_event_source_unref(s->sigrtmin1_event_source);
2054         sd_event_source_unref(s->hostname_event_source);
2055         sd_event_source_unref(s->notify_event_source);
2056         sd_event_source_unref(s->watchdog_event_source);
2057         sd_event_unref(s->event);
2058
2059         safe_close(s->syslog_fd);
2060         safe_close(s->native_fd);
2061         safe_close(s->stdout_fd);
2062         safe_close(s->dev_kmsg_fd);
2063         safe_close(s->audit_fd);
2064         safe_close(s->hostname_fd);
2065         safe_close(s->notify_fd);
2066
2067         if (s->rate_limit)
2068                 journal_rate_limit_free(s->rate_limit);
2069
2070         if (s->kernel_seqnum)
2071                 munmap(s->kernel_seqnum, sizeof(uint64_t));
2072
2073         free(s->buffer);
2074         free(s->tty_path);
2075         free(s->cgroup_root);
2076         free(s->hostname_field);
2077         free(s->runtime_storage.path);
2078         free(s->system_storage.path);
2079
2080         if (s->mmap)
2081                 mmap_cache_unref(s->mmap);
2082 }
2083
2084 static const char* const storage_table[_STORAGE_MAX] = {
2085         [STORAGE_AUTO] = "auto",
2086         [STORAGE_VOLATILE] = "volatile",
2087         [STORAGE_PERSISTENT] = "persistent",
2088         [STORAGE_NONE] = "none"
2089 };
2090
2091 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2092 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2093
2094 static const char* const split_mode_table[_SPLIT_MAX] = {
2095         [SPLIT_LOGIN] = "login",
2096         [SPLIT_UID] = "uid",
2097         [SPLIT_NONE] = "none",
2098 };
2099
2100 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2101 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
2102
2103 int config_parse_line_max(
2104                 const char* unit,
2105                 const char *filename,
2106                 unsigned line,
2107                 const char *section,
2108                 unsigned section_line,
2109                 const char *lvalue,
2110                 int ltype,
2111                 const char *rvalue,
2112                 void *data,
2113                 void *userdata) {
2114
2115         size_t *sz = data;
2116         int r;
2117
2118         assert(filename);
2119         assert(lvalue);
2120         assert(rvalue);
2121         assert(data);
2122
2123         if (isempty(rvalue))
2124                 /* Empty assignment means default */
2125                 *sz = DEFAULT_LINE_MAX;
2126         else {
2127                 uint64_t v;
2128
2129                 r = parse_size(rvalue, 1024, &v);
2130                 if (r < 0) {
2131                         log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
2132                         return 0;
2133                 }
2134
2135                 if (v < 79) {
2136                         /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2137                          * terminal size is 80ch, and it might make sense to break one character before the natural
2138                          * line break would occur on that. */
2139                         log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
2140                         *sz = 79;
2141                 } else if (v > (uint64_t) (SSIZE_MAX-1)) {
2142                         /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2143                          * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2144                          * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2145                          * fail much earlier anyway. */
2146                         log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
2147                         *sz = SSIZE_MAX-1;
2148                 } else
2149                         *sz = (size_t) v;
2150         }
2151
2152         return 0;
2153 }
2154
2155 int config_parse_compress(const char* unit,
2156                           const char *filename,
2157                           unsigned line,
2158                           const char *section,
2159                           unsigned section_line,
2160                           const char *lvalue,
2161                           int ltype,
2162                           const char *rvalue,
2163                           void *data,
2164                           void *userdata) {
2165         JournalCompressOptions* compress = data;
2166         int r;
2167
2168         if (streq(rvalue, "1")) {
2169                 log_syntax(unit, LOG_WARNING, filename, line, 0,
2170                            "Compress= ambiguously specified as 1, enabling compression with default threshold");
2171                 compress->enabled = true;
2172         } else if (streq(rvalue, "0")) {
2173                 log_syntax(unit, LOG_WARNING, filename, line, 0,
2174                            "Compress= ambiguously specified as 0, disabling compression");
2175                 compress->enabled = false;
2176         } else if ((r = parse_boolean(rvalue)) >= 0)
2177                 compress->enabled = r;
2178         else if (parse_size(rvalue, 1024, &compress->threshold_bytes) == 0)
2179                 compress->enabled = true;
2180         else if (isempty(rvalue)) {
2181                 compress->enabled = true;
2182                 compress->threshold_bytes = (uint64_t) -1;
2183         } else
2184                 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse Compress= value, ignoring: %s", rvalue);
2185
2186         return 0;
2187 }