src/journal/journald-server.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2
   3 #if HAVE_SELINUX
   4 #include <selinux/selinux.h>
   5 #endif
   6 #include <sys/ioctl.h>
   7 #include <sys/mman.h>
   8 #include <sys/signalfd.h>
   9 #include <sys/statvfs.h>
  10 #include <linux/sockios.h>
  11
  12 #include "sd-daemon.h"
  13 #include "sd-journal.h"
  14 #include "sd-messages.h"
  15
  16 #include "acl-util.h"
  17 #include "alloc-util.h"
  18 #include "audit-util.h"
  19 #include "cgroup-util.h"
  20 #include "conf-parser.h"
  21 #include "dirent-util.h"
  22 #include "extract-word.h"
  23 #include "fd-util.h"
  24 #include "fileio.h"
  25 #include "format-util.h"
  26 #include "fs-util.h"
  27 #include "hashmap.h"
  28 #include "hostname-util.h"
  29 #include "id128-util.h"
  30 #include "io-util.h"
  31 #include "journal-authenticate.h"
  32 #include "journal-file.h"
  33 #include "journal-internal.h"
  34 #include "journal-vacuum.h"
  35 #include "journald-audit.h"
  36 #include "journald-context.h"
  37 #include "journald-kmsg.h"
  38 #include "journald-native.h"
  39 #include "journald-rate-limit.h"
  40 #include "journald-server.h"
  41 #include "journald-stream.h"
  42 #include "journald-syslog.h"
  43 #include "log.h"
  44 #include "missing.h"
  45 #include "mkdir.h"
  46 #include "parse-util.h"
  47 #include "proc-cmdline.h"
  48 #include "process-util.h"
  49 #include "rm-rf.h"
  50 #include "selinux-util.h"
  51 #include "signal-util.h"
  52 #include "socket-util.h"
  53 #include "stdio-util.h"
  54 #include "string-table.h"
  55 #include "string-util.h"
  56 #include "syslog-util.h"
  57 #include "user-util.h"
  58
  59 #define USER_JOURNALS_MAX 1024
  60
  61 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
  62 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
  63 #define DEFAULT_RATE_LIMIT_BURST 10000
  64 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
  65
  66 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
  67
  68 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
  69
  70 /* The period to insert between posting changes for coalescing */
  71 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
  72
  73 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
  74  * for a bit of additional metadata. */
  75 #define DEFAULT_LINE_MAX (48*1024)
  76
  77 #define DEFERRED_CLOSES_MAX (4096)
  78
  79 static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
  80         _cleanup_closedir_ DIR *d = NULL;
  81         struct dirent *de;
  82         struct statvfs ss;
  83
  84         assert(ret_used);
  85         assert(ret_free);
  86
  87         d = opendir(path);
  88         if (!d)
  89                 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
  90                                       errno, "Failed to open %s: %m", path);
  91
  92         if (fstatvfs(dirfd(d), &ss) < 0)
  93                 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
  94
  95         *ret_free = ss.f_bsize * ss.f_bavail;
  96         *ret_used = 0;
  97         FOREACH_DIRENT_ALL(de, d, break) {
  98                 struct stat st;
  99
 100                 if (!endswith(de->d_name, ".journal") &&
 101                     !endswith(de->d_name, ".journal~"))
 102                         continue;
 103
 104                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
 105                         log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
 106                         continue;
 107                 }
 108
 109                 if (!S_ISREG(st.st_mode))
 110                         continue;
 111
 112                 *ret_used += (uint64_t) st.st_blocks * 512UL;
 113         }
 114
 115         return 0;
 116 }
 117
 118 static void cache_space_invalidate(JournalStorageSpace *space) {
 119         zero(*space);
 120 }
 121
 122 static int cache_space_refresh(Server *s, JournalStorage *storage) {
 123         JournalStorageSpace *space;
 124         JournalMetrics *metrics;
 125         uint64_t vfs_used, vfs_avail, avail;
 126         usec_t ts;
 127         int r;
 128
 129         assert(s);
 130
 131         metrics = &storage->metrics;
 132         space = &storage->space;
 133
 134         ts = now(CLOCK_MONOTONIC);
 135
 136         if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
 137                 return 0;
 138
 139         r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
 140         if (r < 0)
 141                 return r;
 142
 143         space->vfs_used = vfs_used;
 144         space->vfs_available = vfs_avail;
 145
 146         avail = LESS_BY(vfs_avail, metrics->keep_free);
 147
 148         space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
 149         space->available = LESS_BY(space->limit, vfs_used);
 150         space->timestamp = ts;
 151         return 1;
 152 }
 153
 154 static void patch_min_use(JournalStorage *storage) {
 155         assert(storage);
 156
 157         /* Let's bump the min_use limit to the current usage on disk. We do
 158          * this when starting up and first opening the journal files. This way
 159          * sudden spikes in disk usage will not cause journald to vacuum files
 160          * without bounds. Note that this means that only a restart of journald
 161          * will make it reset this value. */
 162
 163         storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
 164 }
 165
 166 static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
 167         JournalStorage *js;
 168         int r;
 169
 170         assert(s);
 171
 172         js = s->system_journal ? &s->system_storage : &s->runtime_storage;
 173
 174         r = cache_space_refresh(s, js);
 175         if (r >= 0) {
 176                 if (available)
 177                         *available = js->space.available;
 178                 if (limit)
 179                         *limit = js->space.limit;
 180         }
 181         return r;
 182 }
 183
 184 void server_space_usage_message(Server *s, JournalStorage *storage) {
 185         char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
 186              fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
 187         JournalMetrics *metrics;
 188
 189         assert(s);
 190
 191         if (!storage)
 192                 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
 193
 194         if (cache_space_refresh(s, storage) < 0)
 195                 return;
 196
 197         metrics = &storage->metrics;
 198         format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
 199         format_bytes(fb2, sizeof(fb2), metrics->max_use);
 200         format_bytes(fb3, sizeof(fb3), metrics->keep_free);
 201         format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
 202         format_bytes(fb5, sizeof(fb5), storage->space.limit);
 203         format_bytes(fb6, sizeof(fb6), storage->space.available);
 204
 205         server_driver_message(s, 0,
 206                               "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
 207                               LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
 208                                           storage->name, storage->path, fb1, fb5, fb6),
 209                               "JOURNAL_NAME=%s", storage->name,
 210                               "JOURNAL_PATH=%s", storage->path,
 211                               "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
 212                               "CURRENT_USE_PRETTY=%s", fb1,
 213                               "MAX_USE=%"PRIu64, metrics->max_use,
 214                               "MAX_USE_PRETTY=%s", fb2,
 215                               "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
 216                               "DISK_KEEP_FREE_PRETTY=%s", fb3,
 217                               "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
 218                               "DISK_AVAILABLE_PRETTY=%s", fb4,
 219                               "LIMIT=%"PRIu64, storage->space.limit,
 220                               "LIMIT_PRETTY=%s", fb5,
 221                               "AVAILABLE=%"PRIu64, storage->space.available,
 222                               "AVAILABLE_PRETTY=%s", fb6,
 223                               NULL);
 224 }
 225
 226 static bool uid_for_system_journal(uid_t uid) {
 227
 228         /* Returns true if the specified UID shall get its data stored in the system journal*/
 229
 230         return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY;
 231 }
 232
 233 static void server_add_acls(JournalFile *f, uid_t uid) {
 234 #if HAVE_ACL
 235         int r;
 236 #endif
 237         assert(f);
 238
 239 #if HAVE_ACL
 240         if (uid_for_system_journal(uid))
 241                 return;
 242
 243         r = add_acls_for_user(f->fd, uid);
 244         if (r < 0)
 245                 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
 246 #endif
 247 }
 248
 249 static int open_journal(
 250                 Server *s,
 251                 bool reliably,
 252                 const char *fname,
 253                 int flags,
 254                 bool seal,
 255                 JournalMetrics *metrics,
 256                 JournalFile **ret) {
 257
 258         JournalFile *f;
 259         int r;
 260
 261         assert(s);
 262         assert(fname);
 263         assert(ret);
 264
 265         if (reliably)
 266                 r = journal_file_open_reliably(fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes,
 267                                                seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
 268         else
 269                 r = journal_file_open(-1, fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes, seal,
 270                                       metrics, s->mmap, s->deferred_closes, NULL, &f);
 271
 272         if (r < 0)
 273                 return r;
 274
 275         r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
 276         if (r < 0) {
 277                 (void) journal_file_close(f);
 278                 return r;
 279         }
 280
 281         *ret = f;
 282         return r;
 283 }
 284
 285 static bool flushed_flag_is_set(void) {
 286         return access("/run/systemd/journal/flushed", F_OK) >= 0;
 287 }
 288
 289 static int system_journal_open(Server *s, bool flush_requested) {
 290         const char *fn;
 291         int r = 0;
 292
 293         if (!s->system_journal &&
 294             IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
 295             (flush_requested || flushed_flag_is_set())) {
 296
 297                 /* If in auto mode: first try to create the machine
 298                  * path, but not the prefix.
 299                  *
 300                  * If in persistent mode: create /var/log/journal and
 301                  * the machine path */
 302
 303                 if (s->storage == STORAGE_PERSISTENT)
 304                         (void) mkdir_p("/var/log/journal/", 0755);
 305
 306                 (void) mkdir(s->system_storage.path, 0755);
 307
 308                 fn = strjoina(s->system_storage.path, "/system.journal");
 309                 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
 310                 if (r >= 0) {
 311                         server_add_acls(s->system_journal, 0);
 312                         (void) cache_space_refresh(s, &s->system_storage);
 313                         patch_min_use(&s->system_storage);
 314                 } else {
 315                         if (!IN_SET(r, -ENOENT, -EROFS))
 316                                 log_warning_errno(r, "Failed to open system journal: %m");
 317
 318                         r = 0;
 319                 }
 320
 321                 /* If the runtime journal is open, and we're post-flush, we're
 322                  * recovering from a failed system journal rotate (ENOSPC)
 323                  * for which the runtime journal was reopened.
 324                  *
 325                  * Perform an implicit flush to var, leaving the runtime
 326                  * journal closed, now that the system journal is back.
 327                  */
 328                 if (!flush_requested)
 329                         (void) server_flush_to_var(s, true);
 330         }
 331
 332         if (!s->runtime_journal &&
 333             (s->storage != STORAGE_NONE)) {
 334
 335                 fn = strjoina(s->runtime_storage.path, "/system.journal");
 336
 337                 if (s->system_journal) {
 338
 339                         /* Try to open the runtime journal, but only
 340                          * if it already exists, so that we can flush
 341                          * it into the system journal */
 342
 343                         r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
 344                         if (r < 0) {
 345                                 if (r != -ENOENT)
 346                                         log_warning_errno(r, "Failed to open runtime journal: %m");
 347
 348                                 r = 0;
 349                         }
 350
 351                 } else {
 352
 353                         /* OK, we really need the runtime journal, so create
 354                          * it if necessary. */
 355
 356                         (void) mkdir("/run/log", 0755);
 357                         (void) mkdir("/run/log/journal", 0755);
 358                         (void) mkdir_parents(fn, 0750);
 359
 360                         r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
 361                         if (r < 0)
 362                                 return log_error_errno(r, "Failed to open runtime journal: %m");
 363                 }
 364
 365                 if (s->runtime_journal) {
 366                         server_add_acls(s->runtime_journal, 0);
 367                         (void) cache_space_refresh(s, &s->runtime_storage);
 368                         patch_min_use(&s->runtime_storage);
 369                 }
 370         }
 371
 372         return r;
 373 }
 374
 375 static JournalFile* find_journal(Server *s, uid_t uid) {
 376         _cleanup_free_ char *p = NULL;
 377         int r;
 378         JournalFile *f;
 379         sd_id128_t machine;
 380
 381         assert(s);
 382
 383         /* A rotate that fails to create the new journal (ENOSPC) leaves the
 384          * rotated journal as NULL.  Unless we revisit opening, even after
 385          * space is made available we'll continue to return NULL indefinitely.
 386          *
 387          * system_journal_open() is a noop if the journals are already open, so
 388          * we can just call it here to recover from failed rotates (or anything
 389          * else that's left the journals as NULL).
 390          *
 391          * Fixes https://github.com/systemd/systemd/issues/3968 */
 392         (void) system_journal_open(s, false);
 393
 394         /* We split up user logs only on /var, not on /run. If the
 395          * runtime file is open, we write to it exclusively, in order
 396          * to guarantee proper order as soon as we flush /run to
 397          * /var and close the runtime file. */
 398
 399         if (s->runtime_journal)
 400                 return s->runtime_journal;
 401
 402         if (uid_for_system_journal(uid))
 403                 return s->system_journal;
 404
 405         f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
 406         if (f)
 407                 return f;
 408
 409         r = sd_id128_get_machine(&machine);
 410         if (r < 0) {
 411                 log_debug_errno(r, "Failed to determine machine ID, using system log: %m");
 412                 return s->system_journal;
 413         }
 414
 415         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
 416                      SD_ID128_FORMAT_VAL(machine), uid) < 0) {
 417                 log_oom();
 418                 return s->system_journal;
 419         }
 420
 421         while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
 422                 /* Too many open? Then let's close one */
 423                 f = ordered_hashmap_steal_first(s->user_journals);
 424                 assert(f);
 425                 (void) journal_file_close(f);
 426         }
 427
 428         r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
 429         if (r < 0)
 430                 return s->system_journal;
 431
 432         server_add_acls(f, uid);
 433
 434         r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
 435         if (r < 0) {
 436                 (void) journal_file_close(f);
 437                 return s->system_journal;
 438         }
 439
 440         return f;
 441 }
 442
 443 static int do_rotate(
 444                 Server *s,
 445                 JournalFile **f,
 446                 const char* name,
 447                 bool seal,
 448                 uint32_t uid) {
 449
 450         int r;
 451         assert(s);
 452
 453         if (!*f)
 454                 return -EINVAL;
 455
 456         r = journal_file_rotate(f, s->compress.enabled, s->compress.threshold_bytes, seal, s->deferred_closes);
 457         if (r < 0) {
 458                 if (*f)
 459                         return log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
 460                 else
 461                         return log_error_errno(r, "Failed to create new %s journal: %m", name);
 462         }
 463
 464         server_add_acls(*f, uid);
 465
 466         return r;
 467 }
 468
 469 static void server_process_deferred_closes(Server *s) {
 470         JournalFile *f;
 471         Iterator i;
 472
 473         /* Perform any deferred closes which aren't still offlining. */
 474         SET_FOREACH(f, s->deferred_closes, i) {
 475                 if (journal_file_is_offlining(f))
 476                         continue;
 477
 478                 (void) set_remove(s->deferred_closes, f);
 479                 (void) journal_file_close(f);
 480         }
 481 }
 482
 483 static void server_vacuum_deferred_closes(Server *s) {
 484         assert(s);
 485
 486         /* Make some room in the deferred closes list, so that it doesn't grow without bounds */
 487         if (set_size(s->deferred_closes) < DEFERRED_CLOSES_MAX)
 488                 return;
 489
 490         /* Let's first remove all journal files that might already have completed closing */
 491         server_process_deferred_closes(s);
 492
 493         /* And now, let's close some more until we reach the limit again. */
 494         while (set_size(s->deferred_closes) >= DEFERRED_CLOSES_MAX) {
 495                 JournalFile *f;
 496
 497                 assert_se(f = set_steal_first(s->deferred_closes));
 498                 journal_file_close(f);
 499         }
 500 }
 501
 502 static int open_user_journal_directory(Server *s, DIR **ret_dir, char **ret_path) {
 503         _cleanup_closedir_ DIR *dir = NULL;
 504         _cleanup_free_ char *path = NULL;
 505         sd_id128_t machine;
 506         int r;
 507
 508         assert(s);
 509
 510         r = sd_id128_get_machine(&machine);
 511         if (r < 0)
 512                 return log_error_errno(r, "Failed to determine machine ID, ignoring: %m");
 513
 514         if (asprintf(&path, "/var/log/journal/" SD_ID128_FORMAT_STR "/", SD_ID128_FORMAT_VAL(machine)) < 0)
 515                 return log_oom();
 516
 517         dir = opendir(path);
 518         if (!dir)
 519                 return log_error_errno(errno, "Failed to open user journal directory '%s': %m", path);
 520
 521         if (ret_dir)
 522                 *ret_dir = TAKE_PTR(dir);
 523         if (ret_path)
 524                 *ret_path = TAKE_PTR(path);
 525
 526         return 0;
 527 }
 528
 529 void server_rotate(Server *s) {
 530         _cleanup_free_ char *path = NULL;
 531         _cleanup_closedir_ DIR *d = NULL;
 532         JournalFile *f;
 533         Iterator i;
 534         void *k;
 535         int r;
 536
 537         log_debug("Rotating...");
 538
 539         /* First, rotate the system journal (either in its runtime flavour or in its runtime flavour) */
 540         (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
 541         (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
 542
 543         /* Then, rotate all user journals we have open (keeping them open) */
 544         ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
 545                 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
 546                 if (r >= 0)
 547                         ordered_hashmap_replace(s->user_journals, k, f);
 548                 else if (!f)
 549                         /* Old file has been closed and deallocated */
 550                         ordered_hashmap_remove(s->user_journals, k);
 551         }
 552
 553         /* Finally, also rotate all user journals we currently do not have open. */
 554         r = open_user_journal_directory(s, &d, &path);
 555         if (r >= 0) {
 556                 struct dirent *de;
 557
 558                 FOREACH_DIRENT(de, d, log_warning_errno(errno, "Failed to enumerate %s, ignoring: %m", path)) {
 559                         _cleanup_free_ char *u = NULL, *full = NULL;
 560                         _cleanup_close_ int fd = -1;
 561                         const char *a, *b;
 562                         uid_t uid;
 563
 564                         a = startswith(de->d_name, "user-");
 565                         if (!a)
 566                                 continue;
 567                         b = endswith(de->d_name, ".journal");
 568                         if (!b)
 569                                 continue;
 570
 571                         u = strndup(a, b-a);
 572                         if (!u) {
 573                                 log_oom();
 574                                 break;
 575                         }
 576
 577                         r = parse_uid(u, &uid);
 578                         if (r < 0) {
 579                                 log_debug_errno(r, "Failed to parse UID from file name '%s', ignoring: %m", de->d_name);
 580                                 continue;
 581                         }
 582
 583                         /* Already rotated in the above loop? i.e. is it an open user journal? */
 584                         if (ordered_hashmap_contains(s->user_journals, UID_TO_PTR(uid)))
 585                                 continue;
 586
 587                         full = strjoin(path, de->d_name);
 588                         if (!full) {
 589                                 log_oom();
 590                                 break;
 591                         }
 592
 593                         fd = openat(dirfd(d), de->d_name, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK);
 594                         if (fd < 0) {
 595                                 log_full_errno(IN_SET(errno, ELOOP, ENOENT) ? LOG_DEBUG : LOG_WARNING, errno,
 596                                                "Failed to open journal file '%s' for rotation: %m", full);
 597                                 continue;
 598                         }
 599
 600                         /* Make some room in the set of deferred close()s */
 601                         server_vacuum_deferred_closes(s);
 602
 603                         /* Open the file briefly, so that we can archive it */
 604                         r = journal_file_open(fd,
 605                                               full,
 606                                               O_RDWR,
 607                                               0640,
 608                                               s->compress.enabled,
 609                                               s->compress.threshold_bytes,
 610                                               s->seal,
 611                                               &s->system_storage.metrics,
 612                                               s->mmap,
 613                                               s->deferred_closes,
 614                                               NULL,
 615                                               &f);
 616                         if (r < 0) {
 617                                 log_warning_errno(r, "Failed to read journal file %s for rotation, trying to move it out of the way: %m", full);
 618
 619                                 r = journal_file_dispose(dirfd(d), de->d_name);
 620                                 if (r < 0)
 621                                         log_warning_errno(r, "Failed to move %s out of the way, ignoring: %m", full);
 622                                 else
 623                                         log_debug("Successfully moved %s out of the way.", full);
 624
 625                                 continue;
 626                         }
 627
 628                         TAKE_FD(fd); /* Donated to journal_file_open() */
 629
 630                         r = journal_file_archive(f);
 631                         if (r < 0)
 632                                 log_debug_errno(r, "Failed to archive journal file '%s', ignoring: %m", full);
 633
 634                         f = journal_initiate_close(f, s->deferred_closes);
 635                 }
 636         }
 637
 638         server_process_deferred_closes(s);
 639 }
 640
 641 void server_sync(Server *s) {
 642         JournalFile *f;
 643         Iterator i;
 644         int r;
 645
 646         if (s->system_journal) {
 647                 r = journal_file_set_offline(s->system_journal, false);
 648                 if (r < 0)
 649                         log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
 650         }
 651
 652         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
 653                 r = journal_file_set_offline(f, false);
 654                 if (r < 0)
 655                         log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
 656         }
 657
 658         if (s->sync_event_source) {
 659                 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
 660                 if (r < 0)
 661                         log_error_errno(r, "Failed to disable sync timer source: %m");
 662         }
 663
 664         s->sync_scheduled = false;
 665 }
 666
 667 static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
 668
 669         int r;
 670
 671         assert(s);
 672         assert(storage);
 673
 674         (void) cache_space_refresh(s, storage);
 675
 676         if (verbose)
 677                 server_space_usage_message(s, storage);
 678
 679         r = journal_directory_vacuum(storage->path, storage->space.limit,
 680                                      storage->metrics.n_max_files, s->max_retention_usec,
 681                                      &s->oldest_file_usec, verbose);
 682         if (r < 0 && r != -ENOENT)
 683                 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
 684
 685         cache_space_invalidate(&storage->space);
 686 }
 687
 688 int server_vacuum(Server *s, bool verbose) {
 689         assert(s);
 690
 691         log_debug("Vacuuming...");
 692
 693         s->oldest_file_usec = 0;
 694
 695         if (s->system_journal)
 696                 do_vacuum(s, &s->system_storage, verbose);
 697         if (s->runtime_journal)
 698                 do_vacuum(s, &s->runtime_storage, verbose);
 699
 700         return 0;
 701 }
 702
 703 static void server_cache_machine_id(Server *s) {
 704         sd_id128_t id;
 705         int r;
 706
 707         assert(s);
 708
 709         r = sd_id128_get_machine(&id);
 710         if (r < 0)
 711                 return;
 712
 713         sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
 714 }
 715
 716 static void server_cache_boot_id(Server *s) {
 717         sd_id128_t id;
 718         int r;
 719
 720         assert(s);
 721
 722         r = sd_id128_get_boot(&id);
 723         if (r < 0)
 724                 return;
 725
 726         sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
 727 }
 728
 729 static void server_cache_hostname(Server *s) {
 730         _cleanup_free_ char *t = NULL;
 731         char *x;
 732
 733         assert(s);
 734
 735         t = gethostname_malloc();
 736         if (!t)
 737                 return;
 738
 739         x = strappend("_HOSTNAME=", t);
 740         if (!x)
 741                 return;
 742
 743         free(s->hostname_field);
 744         s->hostname_field = x;
 745 }
 746
 747 static bool shall_try_append_again(JournalFile *f, int r) {
 748         switch(r) {
 749
 750         case -E2BIG:           /* Hit configured limit          */
 751         case -EFBIG:           /* Hit fs limit                  */
 752         case -EDQUOT:          /* Quota limit hit               */
 753         case -ENOSPC:          /* Disk full                     */
 754                 log_debug("%s: Allocation limit reached, rotating.", f->path);
 755                 return true;
 756
 757         case -EIO:             /* I/O error of some kind (mmap) */
 758                 log_warning("%s: IO error, rotating.", f->path);
 759                 return true;
 760
 761         case -EHOSTDOWN:       /* Other machine                 */
 762                 log_info("%s: Journal file from other machine, rotating.", f->path);
 763                 return true;
 764
 765         case -EBUSY:           /* Unclean shutdown              */
 766                 log_info("%s: Unclean shutdown, rotating.", f->path);
 767                 return true;
 768
 769         case -EPROTONOSUPPORT: /* Unsupported feature           */
 770                 log_info("%s: Unsupported feature, rotating.", f->path);
 771                 return true;
 772
 773         case -EBADMSG:         /* Corrupted                     */
 774         case -ENODATA:         /* Truncated                     */
 775         case -ESHUTDOWN:       /* Already archived              */
 776                 log_warning("%s: Journal file corrupted, rotating.", f->path);
 777                 return true;
 778
 779         case -EIDRM:           /* Journal file has been deleted */
 780                 log_warning("%s: Journal file has been deleted, rotating.", f->path);
 781                 return true;
 782
 783         case -ETXTBSY:         /* Journal file is from the future */
 784                 log_warning("%s: Journal file is from the future, rotating.", f->path);
 785                 return true;
 786
 787         default:
 788                 return false;
 789         }
 790 }
 791
 792 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n, int priority) {
 793         bool vacuumed = false, rotate = false;
 794         struct dual_timestamp ts;
 795         JournalFile *f;
 796         int r;
 797
 798         assert(s);
 799         assert(iovec);
 800         assert(n > 0);
 801
 802         /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
 803          * the source time, and not even the time the event was originally seen, but instead simply the time we started
 804          * processing it, as we want strictly linear ordering in what we write out.) */
 805         assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
 806         assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
 807
 808         if (ts.realtime < s->last_realtime_clock) {
 809                 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
 810                  * regular operation. However, when it does happen, then we should make sure that we start fresh files
 811                  * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
 812                  * bisection works correctly. */
 813
 814                 log_debug("Time jumped backwards, rotating.");
 815                 rotate = true;
 816         } else {
 817
 818                 f = find_journal(s, uid);
 819                 if (!f)
 820                         return;
 821
 822                 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
 823                         log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
 824                         rotate = true;
 825                 }
 826         }
 827
 828         if (rotate) {
 829                 server_rotate(s);
 830                 server_vacuum(s, false);
 831                 vacuumed = true;
 832
 833                 f = find_journal(s, uid);
 834                 if (!f)
 835                         return;
 836         }
 837
 838         s->last_realtime_clock = ts.realtime;
 839
 840         r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
 841         if (r >= 0) {
 842                 server_schedule_sync(s, priority);
 843                 return;
 844         }
 845
 846         if (vacuumed || !shall_try_append_again(f, r)) {
 847                 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
 848                 return;
 849         }
 850
 851         server_rotate(s);
 852         server_vacuum(s, false);
 853
 854         f = find_journal(s, uid);
 855         if (!f)
 856                 return;
 857
 858         log_debug("Retrying write.");
 859         r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
 860         if (r < 0)
 861                 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
 862         else
 863                 server_schedule_sync(s, priority);
 864 }
 865
 866 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field)  \
 867         if (isset(value)) {                                             \
 868                 char *k;                                                \
 869                 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
 870                 sprintf(k, field "=" format, value);                    \
 871                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 872         }
 873
 874 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field)                  \
 875         if (!isempty(value)) {                                          \
 876                 char *k;                                                \
 877                 k = strjoina(field "=", value);                         \
 878                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 879         }
 880
 881 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field)                   \
 882         if (!sd_id128_is_null(value)) {                                 \
 883                 char *k;                                                \
 884                 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
 885                 sd_id128_to_string(value, stpcpy(k, field "="));        \
 886                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 887         }
 888
 889 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field)       \
 890         if (value_size > 0) {                                           \
 891                 char *k;                                                \
 892                 k = newa(char, STRLEN(field "=") + value_size + 1);     \
 893                 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
 894                 iovec[n++] = IOVEC_MAKE_STRING(k);                      \
 895         }                                                               \
 896
 897 static void dispatch_message_real(
 898                 Server *s,
 899                 struct iovec *iovec, size_t n, size_t m,
 900                 const ClientContext *c,
 901                 const struct timeval *tv,
 902                 int priority,
 903                 pid_t object_pid) {
 904
 905         char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
 906         uid_t journal_uid;
 907         ClientContext *o;
 908
 909         assert(s);
 910         assert(iovec);
 911         assert(n > 0);
 912         assert(n +
 913                N_IOVEC_META_FIELDS +
 914                (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) +
 915                client_context_extra_fields_n_iovec(c) <= m);
 916
 917         if (c) {
 918                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
 919                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
 920                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
 921
 922                 IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM");
 923                 IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE");
 924                 IOVEC_ADD_STRING_FIELD(iovec, n, c->cmdline, "_CMDLINE");
 925                 IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE");
 926
 927                 IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
 928
 929                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
 930                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
 931
 932                 IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP");
 933                 IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
 934                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
 935                 IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT");
 936                 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
 937                 IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
 938                 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
 939
 940                 IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
 941
 942                 if (c->extra_fields_n_iovec > 0) {
 943                         memcpy(iovec + n, c->extra_fields_iovec, c->extra_fields_n_iovec * sizeof(struct iovec));
 944                         n += c->extra_fields_n_iovec;
 945                 }
 946         }
 947
 948         assert(n <= m);
 949
 950         if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
 951
 952                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
 953                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
 954                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
 955
 956                 IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
 957                 IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
 958                 IOVEC_ADD_STRING_FIELD(iovec, n, o->cmdline, "OBJECT_CMDLINE");
 959                 IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
 960
 961                 IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
 962
 963                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
 964                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
 965
 966                 IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
 967                 IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
 968                 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
 969                 IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
 970                 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
 971                 IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
 972                 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
 973
 974                 IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
 975         }
 976
 977         assert(n <= m);
 978
 979         if (tv) {
 980                 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
 981                 iovec[n++] = IOVEC_MAKE_STRING(source_time);
 982         }
 983
 984         /* Note that strictly speaking storing the boot id here is
 985          * redundant since the entry includes this in-line
 986          * anyway. However, we need this indexed, too. */
 987         if (!isempty(s->boot_id_field))
 988                 iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
 989
 990         if (!isempty(s->machine_id_field))
 991                 iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
 992
 993         if (!isempty(s->hostname_field))
 994                 iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
 995
 996         assert(n <= m);
 997
 998         if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
 999                 /* Split up strictly by (non-root) UID */
1000                 journal_uid = c->uid;
1001         else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
1002                 /* Split up by login UIDs.  We do this only if the
1003                  * realuid is not root, in order not to accidentally
1004                  * leak privileged information to the user that is
1005                  * logged by a privileged process that is part of an
1006                  * unprivileged session. */
1007                 journal_uid = c->owner_uid;
1008         else
1009                 journal_uid = 0;
1010
1011         write_to_journal(s, journal_uid, iovec, n, priority);
1012 }
1013
1014 void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) {
1015
1016         struct iovec *iovec;
1017         size_t n = 0, k, m;
1018         va_list ap;
1019         int r;
1020
1021         assert(s);
1022         assert(format);
1023
1024         m = N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS + client_context_extra_fields_n_iovec(s->my_context) + N_IOVEC_OBJECT_FIELDS;
1025         iovec = newa(struct iovec, m);
1026
1027         assert_cc(3 == LOG_FAC(LOG_DAEMON));
1028         iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
1029         iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
1030
1031         iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
1032         assert_cc(6 == LOG_INFO);
1033         iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
1034
1035         if (message_id)
1036                 iovec[n++] = IOVEC_MAKE_STRING(message_id);
1037         k = n;
1038
1039         va_start(ap, format);
1040         r = log_format_iovec(iovec, m, &n, false, 0, format, ap);
1041         /* Error handling below */
1042         va_end(ap);
1043
1044         if (r >= 0)
1045                 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
1046
1047         while (k < n)
1048                 free(iovec[k++].iov_base);
1049
1050         if (r < 0) {
1051                 /* We failed to format the message. Emit a warning instead. */
1052                 char buf[LINE_MAX];
1053
1054                 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
1055
1056                 n = 3;
1057                 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
1058                 iovec[n++] = IOVEC_MAKE_STRING(buf);
1059                 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
1060         }
1061 }
1062
1063 void server_dispatch_message(
1064                 Server *s,
1065                 struct iovec *iovec, size_t n, size_t m,
1066                 ClientContext *c,
1067                 const struct timeval *tv,
1068                 int priority,
1069                 pid_t object_pid) {
1070
1071         uint64_t available = 0;
1072         int rl;
1073
1074         assert(s);
1075         assert(iovec || n == 0);
1076
1077         if (n == 0)
1078                 return;
1079
1080         if (LOG_PRI(priority) > s->max_level_store)
1081                 return;
1082
1083         /* Stop early in case the information will not be stored
1084          * in a journal. */
1085         if (s->storage == STORAGE_NONE)
1086                 return;
1087
1088         if (c && c->unit) {
1089                 (void) determine_space(s, &available, NULL);
1090
1091                 rl = journal_rate_limit_test(s->rate_limit, c->unit, c->log_rate_limit_interval, c->log_rate_limit_burst, priority & LOG_PRIMASK, available);
1092                 if (rl == 0)
1093                         return;
1094
1095                 /* Write a suppression message if we suppressed something */
1096                 if (rl > 1)
1097                         server_driver_message(s, c->pid,
1098                                               "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
1099                                               LOG_MESSAGE("Suppressed %i messages from %s", rl - 1, c->unit),
1100                                               "N_DROPPED=%i", rl - 1,
1101                                               NULL);
1102         }
1103
1104         dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
1105 }
1106
1107 int server_flush_to_var(Server *s, bool require_flag_file) {
1108         sd_id128_t machine;
1109         sd_journal *j = NULL;
1110         char ts[FORMAT_TIMESPAN_MAX];
1111         usec_t start;
1112         unsigned n = 0;
1113         int r;
1114
1115         assert(s);
1116
1117         if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
1118                 return 0;
1119
1120         if (!s->runtime_journal)
1121                 return 0;
1122
1123         if (require_flag_file && !flushed_flag_is_set())
1124                 return 0;
1125
1126         (void) system_journal_open(s, true);
1127
1128         if (!s->system_journal)
1129                 return 0;
1130
1131         log_debug("Flushing to /var...");
1132
1133         start = now(CLOCK_MONOTONIC);
1134
1135         r = sd_id128_get_machine(&machine);
1136         if (r < 0)
1137                 return r;
1138
1139         r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1140         if (r < 0)
1141                 return log_error_errno(r, "Failed to read runtime journal: %m");
1142
1143         sd_journal_set_data_threshold(j, 0);
1144
1145         SD_JOURNAL_FOREACH(j) {
1146                 Object *o = NULL;
1147                 JournalFile *f;
1148
1149                 f = j->current_file;
1150                 assert(f && f->current_offset > 0);
1151
1152                 n++;
1153
1154                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1155                 if (r < 0) {
1156                         log_error_errno(r, "Can't read entry: %m");
1157                         goto finish;
1158                 }
1159
1160                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
1161                 if (r >= 0)
1162                         continue;
1163
1164                 if (!shall_try_append_again(s->system_journal, r)) {
1165                         log_error_errno(r, "Can't write entry: %m");
1166                         goto finish;
1167                 }
1168
1169                 server_rotate(s);
1170                 server_vacuum(s, false);
1171
1172                 if (!s->system_journal) {
1173                         log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1174                         r = -EIO;
1175                         goto finish;
1176                 }
1177
1178                 log_debug("Retrying write.");
1179                 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
1180                 if (r < 0) {
1181                         log_error_errno(r, "Can't write entry: %m");
1182                         goto finish;
1183                 }
1184         }
1185
1186         r = 0;
1187
1188 finish:
1189         if (s->system_journal)
1190                 journal_file_post_change(s->system_journal);
1191
1192         s->runtime_journal = journal_file_close(s->runtime_journal);
1193
1194         if (r >= 0)
1195                 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1196
1197         sd_journal_close(j);
1198
1199         server_driver_message(s, 0, NULL,
1200                               LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1201                                           format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1202                                           n),
1203                               NULL);
1204
1205         return r;
1206 }
1207
1208 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1209         Server *s = userdata;
1210         struct ucred *ucred = NULL;
1211         struct timeval *tv = NULL;
1212         struct cmsghdr *cmsg;
1213         char *label = NULL;
1214         size_t label_len = 0, m;
1215         struct iovec iovec;
1216         ssize_t n;
1217         int *fds = NULL, v = 0;
1218         size_t n_fds = 0;
1219
1220         union {
1221                 struct cmsghdr cmsghdr;
1222
1223                 /* We use NAME_MAX space for the SELinux label
1224                  * here. The kernel currently enforces no
1225                  * limit, but according to suggestions from
1226                  * the SELinux people this will change and it
1227                  * will probably be identical to NAME_MAX. For
1228                  * now we use that, but this should be updated
1229                  * one day when the final limit is known. */
1230                 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1231                             CMSG_SPACE(sizeof(struct timeval)) +
1232                             CMSG_SPACE(sizeof(int)) + /* fd */
1233                             CMSG_SPACE(NAME_MAX)]; /* selinux label */
1234         } control = {};
1235
1236         union sockaddr_union sa = {};
1237
1238         struct msghdr msghdr = {
1239                 .msg_iov = &iovec,
1240                 .msg_iovlen = 1,
1241                 .msg_control = &control,
1242                 .msg_controllen = sizeof(control),
1243                 .msg_name = &sa,
1244                 .msg_namelen = sizeof(sa),
1245         };
1246
1247         assert(s);
1248         assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1249
1250         if (revents != EPOLLIN) {
1251                 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1252                 return -EIO;
1253         }
1254
1255         /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1256          * it.) */
1257         (void) ioctl(fd, SIOCINQ, &v);
1258
1259         /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1260         m = PAGE_ALIGN(MAX3((size_t) v + 1,
1261                             (size_t) LINE_MAX,
1262                             ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1263
1264         if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1265                 return log_oom();
1266
1267         iovec.iov_base = s->buffer;
1268         iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1269
1270         n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1271         if (n < 0) {
1272                 if (IN_SET(errno, EINTR, EAGAIN))
1273                         return 0;
1274
1275                 return log_error_errno(errno, "recvmsg() failed: %m");
1276         }
1277
1278         CMSG_FOREACH(cmsg, &msghdr) {
1279
1280                 if (cmsg->cmsg_level == SOL_SOCKET &&
1281                     cmsg->cmsg_type == SCM_CREDENTIALS &&
1282                     cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1283                         ucred = (struct ucred*) CMSG_DATA(cmsg);
1284                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1285                          cmsg->cmsg_type == SCM_SECURITY) {
1286                         label = (char*) CMSG_DATA(cmsg);
1287                         label_len = cmsg->cmsg_len - CMSG_LEN(0);
1288                 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1289                            cmsg->cmsg_type == SO_TIMESTAMP &&
1290                            cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1291                         tv = (struct timeval*) CMSG_DATA(cmsg);
1292                 else if (cmsg->cmsg_level == SOL_SOCKET &&
1293                          cmsg->cmsg_type == SCM_RIGHTS) {
1294                         fds = (int*) CMSG_DATA(cmsg);
1295                         n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1296                 }
1297         }
1298
1299         /* And a trailing NUL, just in case */
1300         s->buffer[n] = 0;
1301
1302         if (fd == s->syslog_fd) {
1303                 if (n > 0 && n_fds == 0)
1304                         server_process_syslog_message(s, s->buffer, n, ucred, tv, label, label_len);
1305                 else if (n_fds > 0)
1306                         log_warning("Got file descriptors via syslog socket. Ignoring.");
1307
1308         } else if (fd == s->native_fd) {
1309                 if (n > 0 && n_fds == 0)
1310                         server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1311                 else if (n == 0 && n_fds == 1)
1312                         server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1313                 else if (n_fds > 0)
1314                         log_warning("Got too many file descriptors via native socket. Ignoring.");
1315
1316         } else {
1317                 assert(fd == s->audit_fd);
1318
1319                 if (n > 0 && n_fds == 0)
1320                         server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1321                 else if (n_fds > 0)
1322                         log_warning("Got file descriptors via audit socket. Ignoring.");
1323         }
1324
1325         close_many(fds, n_fds);
1326         return 0;
1327 }
1328
1329 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1330         Server *s = userdata;
1331         int r;
1332
1333         assert(s);
1334
1335         log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1336
1337         (void) server_flush_to_var(s, false);
1338         server_sync(s);
1339         server_vacuum(s, false);
1340
1341         r = touch("/run/systemd/journal/flushed");
1342         if (r < 0)
1343                 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1344
1345         server_space_usage_message(s, NULL);
1346         return 0;
1347 }
1348
1349 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1350         Server *s = userdata;
1351         int r;
1352
1353         assert(s);
1354
1355         log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1356         server_rotate(s);
1357         server_vacuum(s, true);
1358
1359         if (s->system_journal)
1360                 patch_min_use(&s->system_storage);
1361         if (s->runtime_journal)
1362                 patch_min_use(&s->runtime_storage);
1363
1364         /* Let clients know when the most recent rotation happened. */
1365         r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1366         if (r < 0)
1367                 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1368
1369         return 0;
1370 }
1371
1372 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1373         Server *s = userdata;
1374
1375         assert(s);
1376
1377         log_received_signal(LOG_INFO, si);
1378
1379         sd_event_exit(s->event, 0);
1380         return 0;
1381 }
1382
1383 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1384         Server *s = userdata;
1385         int r;
1386
1387         assert(s);
1388
1389         log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1390
1391         server_sync(s);
1392
1393         /* Let clients know when the most recent sync happened. */
1394         r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1395         if (r < 0)
1396                 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1397
1398         return 0;
1399 }
1400
1401 static int setup_signals(Server *s) {
1402         int r;
1403
1404         assert(s);
1405
1406         assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1407
1408         r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1409         if (r < 0)
1410                 return r;
1411
1412         r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1413         if (r < 0)
1414                 return r;
1415
1416         r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1417         if (r < 0)
1418                 return r;
1419
1420         /* Let's process SIGTERM late, so that we flush all queued
1421          * messages to disk before we exit */
1422         r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1423         if (r < 0)
1424                 return r;
1425
1426         /* When journald is invoked on the terminal (when debugging),
1427          * it's useful if C-c is handled equivalent to SIGTERM. */
1428         r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1429         if (r < 0)
1430                 return r;
1431
1432         r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1433         if (r < 0)
1434                 return r;
1435
1436         /* SIGRTMIN+1 causes an immediate sync. We process this very
1437          * late, so that everything else queued at this point is
1438          * really written to disk. Clients can watch
1439          * /run/systemd/journal/synced with inotify until its mtime
1440          * changes to see when a sync happened. */
1441         r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1442         if (r < 0)
1443                 return r;
1444
1445         r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1446         if (r < 0)
1447                 return r;
1448
1449         return 0;
1450 }
1451
1452 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1453         Server *s = data;
1454         int r;
1455
1456         assert(s);
1457
1458         if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1459
1460                 r = value ? parse_boolean(value) : true;
1461                 if (r < 0)
1462                         log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1463                 else
1464                         s->forward_to_syslog = r;
1465
1466         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1467
1468                 r = value ? parse_boolean(value) : true;
1469                 if (r < 0)
1470                         log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1471                 else
1472                         s->forward_to_kmsg = r;
1473
1474         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1475
1476                 r = value ? parse_boolean(value) : true;
1477                 if (r < 0)
1478                         log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1479                 else
1480                         s->forward_to_console = r;
1481
1482         } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1483
1484                 r = value ? parse_boolean(value) : true;
1485                 if (r < 0)
1486                         log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1487                 else
1488                         s->forward_to_wall = r;
1489
1490         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1491
1492                 if (proc_cmdline_value_missing(key, value))
1493                         return 0;
1494
1495                 r = log_level_from_string(value);
1496                 if (r < 0)
1497                         log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1498                 else
1499                         s->max_level_console = r;
1500
1501         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1502
1503                 if (proc_cmdline_value_missing(key, value))
1504                         return 0;
1505
1506                 r = log_level_from_string(value);
1507                 if (r < 0)
1508                         log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1509                 else
1510                         s->max_level_store = r;
1511
1512         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1513
1514                 if (proc_cmdline_value_missing(key, value))
1515                         return 0;
1516
1517                 r = log_level_from_string(value);
1518                 if (r < 0)
1519                         log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1520                 else
1521                         s->max_level_syslog = r;
1522
1523         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1524
1525                 if (proc_cmdline_value_missing(key, value))
1526                         return 0;
1527
1528                 r = log_level_from_string(value);
1529                 if (r < 0)
1530                         log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1531                 else
1532                         s->max_level_kmsg = r;
1533
1534         } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1535
1536                 if (proc_cmdline_value_missing(key, value))
1537                         return 0;
1538
1539                 r = log_level_from_string(value);
1540                 if (r < 0)
1541                         log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1542                 else
1543                         s->max_level_wall = r;
1544
1545         } else if (startswith(key, "systemd.journald"))
1546                 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
1547
1548         /* do not warn about state here, since probably systemd already did */
1549         return 0;
1550 }
1551
1552 static int server_parse_config_file(Server *s) {
1553         assert(s);
1554
1555         return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
1556                                         CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1557                                         "Journal\0",
1558                                         config_item_perf_lookup, journald_gperf_lookup,
1559                                         CONFIG_PARSE_WARN, s);
1560 }
1561
1562 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1563         Server *s = userdata;
1564
1565         assert(s);
1566
1567         server_sync(s);
1568         return 0;
1569 }
1570
1571 int server_schedule_sync(Server *s, int priority) {
1572         int r;
1573
1574         assert(s);
1575
1576         if (priority <= LOG_CRIT) {
1577                 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1578                 server_sync(s);
1579                 return 0;
1580         }
1581
1582         if (s->sync_scheduled)
1583                 return 0;
1584
1585         if (s->sync_interval_usec > 0) {
1586                 usec_t when;
1587
1588                 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1589                 if (r < 0)
1590                         return r;
1591
1592                 when += s->sync_interval_usec;
1593
1594                 if (!s->sync_event_source) {
1595                         r = sd_event_add_time(
1596                                         s->event,
1597                                         &s->sync_event_source,
1598                                         CLOCK_MONOTONIC,
1599                                         when, 0,
1600                                         server_dispatch_sync, s);
1601                         if (r < 0)
1602                                 return r;
1603
1604                         r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1605                 } else {
1606                         r = sd_event_source_set_time(s->sync_event_source, when);
1607                         if (r < 0)
1608                                 return r;
1609
1610                         r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1611                 }
1612                 if (r < 0)
1613                         return r;
1614
1615                 s->sync_scheduled = true;
1616         }
1617
1618         return 0;
1619 }
1620
1621 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1622         Server *s = userdata;
1623
1624         assert(s);
1625
1626         server_cache_hostname(s);
1627         return 0;
1628 }
1629
1630 static int server_open_hostname(Server *s) {
1631         int r;
1632
1633         assert(s);
1634
1635         s->hostname_fd = open("/proc/sys/kernel/hostname",
1636                               O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
1637         if (s->hostname_fd < 0)
1638                 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1639
1640         r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1641         if (r < 0) {
1642                 /* kernels prior to 3.2 don't support polling this file. Ignore
1643                  * the failure. */
1644                 if (r == -EPERM) {
1645                         log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1646                         s->hostname_fd = safe_close(s->hostname_fd);
1647                         return 0;
1648                 }
1649
1650                 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1651         }
1652
1653         r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1654         if (r < 0)
1655                 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1656
1657         return 0;
1658 }
1659
1660 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1661         Server *s = userdata;
1662         int r;
1663
1664         assert(s);
1665         assert(s->notify_event_source == es);
1666         assert(s->notify_fd == fd);
1667
1668         /* The $NOTIFY_SOCKET is writable again, now send exactly one
1669          * message on it. Either it's the watchdog event, the initial
1670          * READY=1 event or an stdout stream event. If there's nothing
1671          * to write anymore, turn our event source off. The next time
1672          * there's something to send it will be turned on again. */
1673
1674         if (!s->sent_notify_ready) {
1675                 static const char p[] =
1676                         "READY=1\n"
1677                         "STATUS=Processing requests...";
1678                 ssize_t l;
1679
1680                 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1681                 if (l < 0) {
1682                         if (errno == EAGAIN)
1683                                 return 0;
1684
1685                         return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1686                 }
1687
1688                 s->sent_notify_ready = true;
1689                 log_debug("Sent READY=1 notification.");
1690
1691         } else if (s->send_watchdog) {
1692
1693                 static const char p[] =
1694                         "WATCHDOG=1";
1695
1696                 ssize_t l;
1697
1698                 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1699                 if (l < 0) {
1700                         if (errno == EAGAIN)
1701                                 return 0;
1702
1703                         return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1704                 }
1705
1706                 s->send_watchdog = false;
1707                 log_debug("Sent WATCHDOG=1 notification.");
1708
1709         } else if (s->stdout_streams_notify_queue)
1710                 /* Dispatch one stream notification event */
1711                 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1712
1713         /* Leave us enabled if there's still more to do. */
1714         if (s->send_watchdog || s->stdout_streams_notify_queue)
1715                 return 0;
1716
1717         /* There was nothing to do anymore, let's turn ourselves off. */
1718         r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1719         if (r < 0)
1720                 return log_error_errno(r, "Failed to turn off notify event source: %m");
1721
1722         return 0;
1723 }
1724
1725 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1726         Server *s = userdata;
1727         int r;
1728
1729         assert(s);
1730
1731         s->send_watchdog = true;
1732
1733         r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1734         if (r < 0)
1735                 log_warning_errno(r, "Failed to turn on notify event source: %m");
1736
1737         r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1738         if (r < 0)
1739                 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1740
1741         r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1742         if (r < 0)
1743                 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1744
1745         return 0;
1746 }
1747
1748 static int server_connect_notify(Server *s) {
1749         union sockaddr_union sa = {};
1750         const char *e;
1751         int r, salen;
1752
1753         assert(s);
1754         assert(s->notify_fd < 0);
1755         assert(!s->notify_event_source);
1756
1757         /*
1758           So here's the problem: we'd like to send notification
1759           messages to PID 1, but we cannot do that via sd_notify(),
1760           since that's synchronous, and we might end up blocking on
1761           it. Specifically: given that PID 1 might block on
1762           dbus-daemon during IPC, and dbus-daemon is logging to us,
1763           and might hence block on us, we might end up in a deadlock
1764           if we block on sending PID 1 notification messages — by
1765           generating a full blocking circle. To avoid this, let's
1766           create a non-blocking socket, and connect it to the
1767           notification socket, and then wait for POLLOUT before we
1768           send anything. This should efficiently avoid any deadlocks,
1769           as we'll never block on PID 1, hence PID 1 can safely block
1770           on dbus-daemon which can safely block on us again.
1771
1772           Don't think that this issue is real? It is, see:
1773           https://github.com/systemd/systemd/issues/1505
1774         */
1775
1776         e = getenv("NOTIFY_SOCKET");
1777         if (!e)
1778                 return 0;
1779
1780         salen = sockaddr_un_set_path(&sa.un, e);
1781         if (salen < 0)
1782                 return log_error_errno(salen, "NOTIFY_SOCKET set to invalid value '%s': %m", e);
1783
1784         s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1785         if (s->notify_fd < 0)
1786                 return log_error_errno(errno, "Failed to create notify socket: %m");
1787
1788         (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1789
1790         r = connect(s->notify_fd, &sa.sa, salen);
1791         if (r < 0)
1792                 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1793
1794         r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1795         if (r < 0)
1796                 return log_error_errno(r, "Failed to watch notification socket: %m");
1797
1798         if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1799                 s->send_watchdog = true;
1800
1801                 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1802                 if (r < 0)
1803                         return log_error_errno(r, "Failed to add watchdog time event: %m");
1804         }
1805
1806         /* This should fire pretty soon, which we'll use to send the
1807          * READY=1 event. */
1808
1809         return 0;
1810 }
1811
1812 int server_init(Server *s) {
1813         _cleanup_fdset_free_ FDSet *fds = NULL;
1814         int n, r, fd;
1815         bool no_sockets;
1816
1817         assert(s);
1818
1819         zero(*s);
1820         s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1821         s->compress.enabled = true;
1822         s->compress.threshold_bytes = (uint64_t) -1;
1823         s->seal = true;
1824         s->read_kmsg = true;
1825
1826         s->watchdog_usec = USEC_INFINITY;
1827
1828         s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1829         s->sync_scheduled = false;
1830
1831         s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1832         s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1833
1834         s->forward_to_wall = true;
1835
1836         s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1837
1838         s->max_level_store = LOG_DEBUG;
1839         s->max_level_syslog = LOG_DEBUG;
1840         s->max_level_kmsg = LOG_NOTICE;
1841         s->max_level_console = LOG_INFO;
1842         s->max_level_wall = LOG_EMERG;
1843
1844         s->line_max = DEFAULT_LINE_MAX;
1845
1846         journal_reset_metrics(&s->system_storage.metrics);
1847         journal_reset_metrics(&s->runtime_storage.metrics);
1848
1849         server_parse_config_file(s);
1850
1851         r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1852         if (r < 0)
1853                 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
1854
1855         if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1856                 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1857                           s->rate_limit_interval, s->rate_limit_burst);
1858                 s->rate_limit_interval = s->rate_limit_burst = 0;
1859         }
1860
1861         (void) mkdir_p("/run/systemd/journal", 0755);
1862
1863         s->user_journals = ordered_hashmap_new(NULL);
1864         if (!s->user_journals)
1865                 return log_oom();
1866
1867         s->mmap = mmap_cache_new();
1868         if (!s->mmap)
1869                 return log_oom();
1870
1871         s->deferred_closes = set_new(NULL);
1872         if (!s->deferred_closes)
1873                 return log_oom();
1874
1875         r = sd_event_default(&s->event);
1876         if (r < 0)
1877                 return log_error_errno(r, "Failed to create event loop: %m");
1878
1879         n = sd_listen_fds(true);
1880         if (n < 0)
1881                 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1882
1883         for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1884
1885                 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1886
1887                         if (s->native_fd >= 0) {
1888                                 log_error("Too many native sockets passed.");
1889                                 return -EINVAL;
1890                         }
1891
1892                         s->native_fd = fd;
1893
1894                 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1895
1896                         if (s->stdout_fd >= 0) {
1897                                 log_error("Too many stdout sockets passed.");
1898                                 return -EINVAL;
1899                         }
1900
1901                         s->stdout_fd = fd;
1902
1903                 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1904                            sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1905
1906                         if (s->syslog_fd >= 0) {
1907                                 log_error("Too many /dev/log sockets passed.");
1908                                 return -EINVAL;
1909                         }
1910
1911                         s->syslog_fd = fd;
1912
1913                 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1914
1915                         if (s->audit_fd >= 0) {
1916                                 log_error("Too many audit sockets passed.");
1917                                 return -EINVAL;
1918                         }
1919
1920                         s->audit_fd = fd;
1921
1922                 } else {
1923
1924                         if (!fds) {
1925                                 fds = fdset_new();
1926                                 if (!fds)
1927                                         return log_oom();
1928                         }
1929
1930                         r = fdset_put(fds, fd);
1931                         if (r < 0)
1932                                 return log_oom();
1933                 }
1934         }
1935
1936         /* Try to restore streams, but don't bother if this fails */
1937         (void) server_restore_streams(s, fds);
1938
1939         if (fdset_size(fds) > 0) {
1940                 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1941                 fds = fdset_free(fds);
1942         }
1943
1944         no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1945
1946         /* always open stdout, syslog, native, and kmsg sockets */
1947
1948         /* systemd-journald.socket: /run/systemd/journal/stdout */
1949         r = server_open_stdout_socket(s);
1950         if (r < 0)
1951                 return r;
1952
1953         /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1954         r = server_open_syslog_socket(s);
1955         if (r < 0)
1956                 return r;
1957
1958         /* systemd-journald.socket: /run/systemd/journal/socket */
1959         r = server_open_native_socket(s);
1960         if (r < 0)
1961                 return r;
1962
1963         /* /dev/kmsg */
1964         r = server_open_dev_kmsg(s);
1965         if (r < 0)
1966                 return r;
1967
1968         /* Unless we got *some* sockets and not audit, open audit socket */
1969         if (s->audit_fd >= 0 || no_sockets) {
1970                 r = server_open_audit(s);
1971                 if (r < 0)
1972                         return r;
1973         }
1974
1975         r = server_open_kernel_seqnum(s);
1976         if (r < 0)
1977                 return r;
1978
1979         r = server_open_hostname(s);
1980         if (r < 0)
1981                 return r;
1982
1983         r = setup_signals(s);
1984         if (r < 0)
1985                 return r;
1986
1987         s->rate_limit = journal_rate_limit_new();
1988         if (!s->rate_limit)
1989                 return -ENOMEM;
1990
1991         r = cg_get_root_path(&s->cgroup_root);
1992         if (r < 0)
1993                 return r;
1994
1995         server_cache_hostname(s);
1996         server_cache_boot_id(s);
1997         server_cache_machine_id(s);
1998
1999         s->runtime_storage.name = "Runtime journal";
2000         s->system_storage.name = "System journal";
2001
2002         s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
2003         s->system_storage.path  = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
2004         if (!s->runtime_storage.path || !s->system_storage.path)
2005                 return -ENOMEM;
2006
2007         (void) server_connect_notify(s);
2008
2009         (void) client_context_acquire_default(s);
2010
2011         return system_journal_open(s, false);
2012 }
2013
2014 void server_maybe_append_tags(Server *s) {
2015 #if HAVE_GCRYPT
2016         JournalFile *f;
2017         Iterator i;
2018         usec_t n;
2019
2020         n = now(CLOCK_REALTIME);
2021
2022         if (s->system_journal)
2023                 journal_file_maybe_append_tag(s->system_journal, n);
2024
2025         ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
2026                 journal_file_maybe_append_tag(f, n);
2027 #endif
2028 }
2029
2030 void server_done(Server *s) {
2031         assert(s);
2032
2033         set_free_with_destructor(s->deferred_closes, journal_file_close);
2034
2035         while (s->stdout_streams)
2036                 stdout_stream_free(s->stdout_streams);
2037
2038         client_context_flush_all(s);
2039
2040         if (s->system_journal)
2041                 (void) journal_file_close(s->system_journal);
2042
2043         if (s->runtime_journal)
2044                 (void) journal_file_close(s->runtime_journal);
2045
2046         ordered_hashmap_free_with_destructor(s->user_journals, journal_file_close);
2047
2048         sd_event_source_unref(s->syslog_event_source);
2049         sd_event_source_unref(s->native_event_source);
2050         sd_event_source_unref(s->stdout_event_source);
2051         sd_event_source_unref(s->dev_kmsg_event_source);
2052         sd_event_source_unref(s->audit_event_source);
2053         sd_event_source_unref(s->sync_event_source);
2054         sd_event_source_unref(s->sigusr1_event_source);
2055         sd_event_source_unref(s->sigusr2_event_source);
2056         sd_event_source_unref(s->sigterm_event_source);
2057         sd_event_source_unref(s->sigint_event_source);
2058         sd_event_source_unref(s->sigrtmin1_event_source);
2059         sd_event_source_unref(s->hostname_event_source);
2060         sd_event_source_unref(s->notify_event_source);
2061         sd_event_source_unref(s->watchdog_event_source);
2062         sd_event_unref(s->event);
2063
2064         safe_close(s->syslog_fd);
2065         safe_close(s->native_fd);
2066         safe_close(s->stdout_fd);
2067         safe_close(s->dev_kmsg_fd);
2068         safe_close(s->audit_fd);
2069         safe_close(s->hostname_fd);
2070         safe_close(s->notify_fd);
2071
2072         if (s->rate_limit)
2073                 journal_rate_limit_free(s->rate_limit);
2074
2075         if (s->kernel_seqnum)
2076                 munmap(s->kernel_seqnum, sizeof(uint64_t));
2077
2078         free(s->buffer);
2079         free(s->tty_path);
2080         free(s->cgroup_root);
2081         free(s->hostname_field);
2082         free(s->runtime_storage.path);
2083         free(s->system_storage.path);
2084
2085         if (s->mmap)
2086                 mmap_cache_unref(s->mmap);
2087 }
2088
2089 static const char* const storage_table[_STORAGE_MAX] = {
2090         [STORAGE_AUTO] = "auto",
2091         [STORAGE_VOLATILE] = "volatile",
2092         [STORAGE_PERSISTENT] = "persistent",
2093         [STORAGE_NONE] = "none"
2094 };
2095
2096 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2097 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2098
2099 static const char* const split_mode_table[_SPLIT_MAX] = {
2100         [SPLIT_LOGIN] = "login",
2101         [SPLIT_UID] = "uid",
2102         [SPLIT_NONE] = "none",
2103 };
2104
2105 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2106 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
2107
2108 int config_parse_line_max(
2109                 const char* unit,
2110                 const char *filename,
2111                 unsigned line,
2112                 const char *section,
2113                 unsigned section_line,
2114                 const char *lvalue,
2115                 int ltype,
2116                 const char *rvalue,
2117                 void *data,
2118                 void *userdata) {
2119
2120         size_t *sz = data;
2121         int r;
2122
2123         assert(filename);
2124         assert(lvalue);
2125         assert(rvalue);
2126         assert(data);
2127
2128         if (isempty(rvalue))
2129                 /* Empty assignment means default */
2130                 *sz = DEFAULT_LINE_MAX;
2131         else {
2132                 uint64_t v;
2133
2134                 r = parse_size(rvalue, 1024, &v);
2135                 if (r < 0) {
2136                         log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
2137                         return 0;
2138                 }
2139
2140                 if (v < 79) {
2141                         /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2142                          * terminal size is 80ch, and it might make sense to break one character before the natural
2143                          * line break would occur on that. */
2144                         log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
2145                         *sz = 79;
2146                 } else if (v > (uint64_t) (SSIZE_MAX-1)) {
2147                         /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2148                          * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2149                          * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2150                          * fail much earlier anyway. */
2151                         log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
2152                         *sz = SSIZE_MAX-1;
2153                 } else
2154                         *sz = (size_t) v;
2155         }
2156
2157         return 0;
2158 }
2159
2160 int config_parse_compress(const char* unit,
2161                           const char *filename,
2162                           unsigned line,
2163                           const char *section,
2164                           unsigned section_line,
2165                           const char *lvalue,
2166                           int ltype,
2167                           const char *rvalue,
2168                           void *data,
2169                           void *userdata) {
2170         JournalCompressOptions* compress = data;
2171         int r;
2172
2173         if (streq(rvalue, "1")) {
2174                 log_syntax(unit, LOG_WARNING, filename, line, 0,
2175                            "Compress= ambiguously specified as 1, enabling compression with default threshold");
2176                 compress->enabled = true;
2177         } else if (streq(rvalue, "0")) {
2178                 log_syntax(unit, LOG_WARNING, filename, line, 0,
2179                            "Compress= ambiguously specified as 0, disabling compression");
2180                 compress->enabled = false;
2181         } else if ((r = parse_boolean(rvalue)) >= 0)
2182                 compress->enabled = r;
2183         else if (parse_size(rvalue, 1024, &compress->threshold_bytes) == 0)
2184                 compress->enabled = true;
2185         else if (isempty(rvalue)) {
2186                 compress->enabled = true;
2187                 compress->threshold_bytes = (uint64_t) -1;
2188         } else
2189                 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse Compress= value, ignoring: %s", rvalue);
2190
2191         return 0;
2192 }