]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/journald-file.c
Merge pull request #21838 from lnussel/logind-refactor
[thirdparty/systemd.git] / src / journal / journald-file.c
CommitLineData
035b0f8f
DDM
1/* SPDX-License-Identifier: LGPL-2.1-or-later */
2
764721cc
DDM
3#include <pthread.h>
4#include <unistd.h>
5
035b0f8f 6#include "chattr-util.h"
d71ece3f 7#include "copy.h"
035b0f8f
DDM
8#include "fd-util.h"
9#include "format-util.h"
10#include "journal-authenticate.h"
11#include "journald-file.h"
12#include "path-util.h"
13#include "random-util.h"
14#include "set.h"
d71ece3f 15#include "stat-util.h"
035b0f8f
DDM
16#include "sync-util.h"
17
d951ac55 18#define PAYLOAD_BUFFER_SIZE (16U * 1024U)
a2799cc5
DDM
19#define MINIMUM_HOLE_SIZE (1U * 1024U * 1024U / 2U)
20
764721cc
DDM
21static int journald_file_truncate(JournalFile *f) {
22 uint64_t p;
23 int r;
24
25 /* truncate excess from the end of archives */
26 r = journal_file_tail_end(f, &p);
27 if (r < 0)
28 return log_debug_errno(r, "Failed to determine end of tail object: %m");
29
30 /* arena_size can't exceed the file size, ensure it's updated before truncating */
31 f->header->arena_size = htole64(p - le64toh(f->header->header_size));
32
33 if (ftruncate(f->fd, p) < 0)
3a787b5e 34 return log_debug_errno(errno, "Failed to truncate %s: %m", f->path);
764721cc 35
3a787b5e 36 return journal_file_fstat(f);
764721cc
DDM
37}
38
39static int journald_file_entry_array_punch_hole(JournalFile *f, uint64_t p, uint64_t n_entries) {
40 Object o;
41 uint64_t offset, sz, n_items = 0, n_unused;
42 int r;
43
44 if (n_entries == 0)
45 return 0;
46
47 for (uint64_t q = p; q != 0; q = le64toh(o.entry_array.next_entry_array_offset)) {
48 r = journal_file_read_object(f, OBJECT_ENTRY_ARRAY, q, &o);
49 if (r < 0)
50 return r;
51
52 n_items += journal_file_entry_array_n_items(&o);
53 p = q;
54 }
55
56 if (p == 0)
57 return 0;
58
59 if (n_entries > n_items)
60 return -EBADMSG;
61
62 /* Amount of unused items in the final entry array. */
63 n_unused = n_items - n_entries;
64
65 if (n_unused == 0)
66 return 0;
67
68 offset = p + offsetof(Object, entry_array.items) +
69 (journal_file_entry_array_n_items(&o) - n_unused) * sizeof(le64_t);
70 sz = p + le64toh(o.object.size) - offset;
71
a2799cc5
DDM
72 if (sz < MINIMUM_HOLE_SIZE)
73 return 0;
74
d93abf46
DDM
75 if (p == le64toh(f->header->tail_object_offset) && !f->seal) {
76 o.object.size = htole64(offset - p);
77 if (pwrite(f->fd, &o, sizeof(EntryArrayObject), p) < 0)
78 return log_debug_errno(errno, "Failed to modify entry array object size: %m");
79
80 f->header->arena_size = htole64(ALIGN64(offset) - le64toh(f->header->header_size));
81
82 if (ftruncate(f->fd, ALIGN64(offset)) < 0)
83 return log_debug_errno(errno, "Failed to truncate %s: %m", f->path);
84
85 return 0;
86 }
87
764721cc
DDM
88 if (fallocate(f->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, sz) < 0)
89 return log_debug_errno(errno, "Failed to punch hole in entry array of %s: %m", f->path);
90
91 return 0;
92}
93
94static int journald_file_punch_holes(JournalFile *f) {
d951ac55 95 HashItem items[PAYLOAD_BUFFER_SIZE / sizeof(HashItem)];
764721cc 96 uint64_t p, sz;
94c5a83c 97 ssize_t n = SSIZE_MAX;
764721cc
DDM
98 int r;
99
100 r = journald_file_entry_array_punch_hole(
101 f, le64toh(f->header->entry_array_offset), le64toh(f->header->n_entries));
102 if (r < 0)
103 return r;
104
105 p = le64toh(f->header->data_hash_table_offset);
106 sz = le64toh(f->header->data_hash_table_size);
764721cc 107
94c5a83c 108 for (uint64_t i = p; i < p + sz && n > 0; i += n) {
24040269 109 n = pread(f->fd, items, MIN(sizeof(items), p + sz - i), i);
cdbba448
DDM
110 if (n < 0)
111 return n;
764721cc 112
94c5a83c
DDM
113 /* Let's ignore any partial hash items by rounding down to the nearest multiple of HashItem. */
114 n -= n % sizeof(HashItem);
115
cdbba448 116 for (size_t j = 0; j < (size_t) n / sizeof(HashItem); j++) {
764721cc
DDM
117 Object o;
118
119 for (uint64_t q = le64toh(items[j].head_hash_offset); q != 0;
120 q = le64toh(o.data.next_hash_offset)) {
121
122 r = journal_file_read_object(f, OBJECT_DATA, q, &o);
123 if (r < 0) {
124 log_debug_errno(r, "Invalid data object: %m, ignoring");
125 break;
126 }
127
128 if (le64toh(o.data.n_entries) == 0)
129 continue;
130
131 (void) journald_file_entry_array_punch_hole(
132 f, le64toh(o.data.entry_array_offset), le64toh(o.data.n_entries) - 1);
133 }
134 }
135 }
136
137 return 0;
138}
139
140/* This may be called from a separate thread to prevent blocking the caller for the duration of fsync().
141 * As a result we use atomic operations on f->offline_state for inter-thread communications with
142 * journal_file_set_offline() and journal_file_set_online(). */
143static void journald_file_set_offline_internal(JournaldFile *f) {
d71ece3f
DDM
144 int r;
145
764721cc
DDM
146 assert(f);
147 assert(f->file->fd >= 0);
148 assert(f->file->header);
149
150 for (;;) {
151 switch (f->file->offline_state) {
152 case OFFLINE_CANCEL:
153 if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_CANCEL, OFFLINE_DONE))
154 continue;
155 return;
156
157 case OFFLINE_AGAIN_FROM_SYNCING:
158 if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_AGAIN_FROM_SYNCING, OFFLINE_SYNCING))
159 continue;
160 break;
161
162 case OFFLINE_AGAIN_FROM_OFFLINING:
163 if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_AGAIN_FROM_OFFLINING, OFFLINE_SYNCING))
164 continue;
165 break;
166
167 case OFFLINE_SYNCING:
168 if (f->file->archive) {
169 (void) journald_file_truncate(f->file);
170 (void) journald_file_punch_holes(f->file);
171 }
172
173 (void) fsync(f->file->fd);
174
175 if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_SYNCING, OFFLINE_OFFLINING))
176 continue;
177
178 f->file->header->state = f->file->archive ? STATE_ARCHIVED : STATE_OFFLINE;
179 (void) fsync(f->file->fd);
d71ece3f
DDM
180
181 /* If we've archived the journal file, first try to re-enable COW on the file. If the
a96fc72d 182 * FS_NOCOW_FL flag was never set or we successfully removed it, continue. If we fail
d71ece3f
DDM
183 * to remove the flag on the archived file, rewrite the file without the NOCOW flag.
184 * We need this fallback because on some filesystems (BTRFS), the NOCOW flag cannot
185 * be removed after data has been written to a file. The only way to remove it is to
186 * copy all data to a new file without the NOCOW flag set. */
187
188 if (f->file->archive) {
189 r = chattr_fd(f->file->fd, 0, FS_NOCOW_FL, NULL);
190 if (r >= 0)
191 continue;
192
193 log_debug_errno(r, "Failed to re-enable copy-on-write for %s: %m, rewriting file", f->file->path);
194
12727c2b 195 r = copy_file_atomic(f->file->path, f->file->path, f->file->mode, 0, FS_NOCOW_FL, COPY_REPLACE | COPY_FSYNC | COPY_HOLES);
d71ece3f
DDM
196 if (r < 0) {
197 log_debug_errno(r, "Failed to rewrite %s: %m", f->file->path);
198 continue;
199 }
200 }
201
764721cc
DDM
202 break;
203
204 case OFFLINE_OFFLINING:
205 if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_OFFLINING, OFFLINE_DONE))
206 continue;
207 _fallthrough_;
208 case OFFLINE_DONE:
209 return;
210
211 case OFFLINE_JOINED:
212 log_debug("OFFLINE_JOINED unexpected offline state for journal_file_set_offline_internal()");
213 return;
214 }
215 }
216}
217
218static void * journald_file_set_offline_thread(void *arg) {
219 JournaldFile *f = arg;
220
221 (void) pthread_setname_np(pthread_self(), "journal-offline");
222
223 journald_file_set_offline_internal(f);
224
225 return NULL;
226}
227
228/* Trigger a restart if the offline thread is mid-flight in a restartable state. */
229static bool journald_file_set_offline_try_restart(JournaldFile *f) {
230 for (;;) {
231 switch (f->file->offline_state) {
232 case OFFLINE_AGAIN_FROM_SYNCING:
233 case OFFLINE_AGAIN_FROM_OFFLINING:
234 return true;
235
236 case OFFLINE_CANCEL:
237 if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_CANCEL, OFFLINE_AGAIN_FROM_SYNCING))
238 continue;
239 return true;
240
241 case OFFLINE_SYNCING:
242 if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_SYNCING, OFFLINE_AGAIN_FROM_SYNCING))
243 continue;
244 return true;
245
246 case OFFLINE_OFFLINING:
247 if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_OFFLINING, OFFLINE_AGAIN_FROM_OFFLINING))
248 continue;
249 return true;
250
251 default:
252 return false;
253 }
254 }
255}
256
257/* Sets a journal offline.
258 *
259 * If wait is false then an offline is dispatched in a separate thread for a
260 * subsequent journal_file_set_offline() or journal_file_set_online() of the
261 * same journal to synchronize with.
262 *
263 * If wait is true, then either an existing offline thread will be restarted
264 * and joined, or if none exists the offline is simply performed in this
265 * context without involving another thread.
266 */
267int journald_file_set_offline(JournaldFile *f, bool wait) {
268 int target_state;
269 bool restarted;
270 int r;
271
272 assert(f);
273
274 if (!f->file->writable)
275 return -EPERM;
276
277 if (f->file->fd < 0 || !f->file->header)
278 return -EINVAL;
279
280 target_state = f->file->archive ? STATE_ARCHIVED : STATE_OFFLINE;
281
282 /* An offlining journal is implicitly online and may modify f->header->state,
283 * we must also join any potentially lingering offline thread when already in
284 * the desired offline state.
285 */
286 if (!journald_file_is_offlining(f) && f->file->header->state == target_state)
287 return journal_file_set_offline_thread_join(f->file);
288
289 /* Restart an in-flight offline thread and wait if needed, or join a lingering done one. */
290 restarted = journald_file_set_offline_try_restart(f);
291 if ((restarted && wait) || !restarted) {
292 r = journal_file_set_offline_thread_join(f->file);
293 if (r < 0)
294 return r;
295 }
296
297 if (restarted)
298 return 0;
299
300 /* Initiate a new offline. */
301 f->file->offline_state = OFFLINE_SYNCING;
302
303 if (wait) /* Without using a thread if waiting. */
304 journald_file_set_offline_internal(f);
305 else {
306 sigset_t ss, saved_ss;
307 int k;
308
309 assert_se(sigfillset(&ss) >= 0);
310 /* Don't block SIGBUS since the offlining thread accesses a memory mapped file.
311 * Asynchronous SIGBUS signals can safely be handled by either thread. */
312 assert_se(sigdelset(&ss, SIGBUS) >= 0);
313
314 r = pthread_sigmask(SIG_BLOCK, &ss, &saved_ss);
315 if (r > 0)
316 return -r;
317
318 r = pthread_create(&f->file->offline_thread, NULL, journald_file_set_offline_thread, f);
319
320 k = pthread_sigmask(SIG_SETMASK, &saved_ss, NULL);
321 if (r > 0) {
322 f->file->offline_state = OFFLINE_JOINED;
323 return -r;
324 }
325 if (k > 0)
326 return -k;
327 }
328
329 return 0;
330}
331
332bool journald_file_is_offlining(JournaldFile *f) {
333 assert(f);
334
335 __sync_synchronize();
336
337 if (IN_SET(f->file->offline_state, OFFLINE_DONE, OFFLINE_JOINED))
338 return false;
339
340 return true;
341}
342
035b0f8f
DDM
343JournaldFile* journald_file_close(JournaldFile *f) {
344 if (!f)
345 return NULL;
346
764721cc
DDM
347#if HAVE_GCRYPT
348 /* Write the final tag */
349 if (f->file->seal && f->file->writable) {
350 int r;
351
352 r = journal_file_append_tag(f->file);
353 if (r < 0)
354 log_error_errno(r, "Failed to append tag when closing journal: %m");
355 }
356#endif
357
358 if (f->file->post_change_timer) {
359 if (sd_event_source_get_enabled(f->file->post_change_timer, NULL) > 0)
360 journal_file_post_change(f->file);
361
362 sd_event_source_disable_unref(f->file->post_change_timer);
363 }
364
365 journald_file_set_offline(f, true);
366
035b0f8f
DDM
367 journal_file_close(f->file);
368
369 return mfree(f);
370}
371
372int journald_file_open(
373 int fd,
374 const char *fname,
375 int flags,
376 mode_t mode,
377 bool compress,
378 uint64_t compress_threshold_bytes,
379 bool seal,
380 JournalMetrics *metrics,
381 MMapCache *mmap_cache,
382 Set *deferred_closes,
383 JournaldFile *template,
384 JournaldFile **ret) {
385 _cleanup_free_ JournaldFile *f = NULL;
386 int r;
387
388 set_clear_with_destructor(deferred_closes, journald_file_close);
389
390 f = new0(JournaldFile, 1);
391 if (!f)
392 return -ENOMEM;
393
394 r = journal_file_open(fd, fname, flags, mode, compress, compress_threshold_bytes, seal, metrics,
395 mmap_cache, template ? template->file : NULL, &f->file);
396 if (r < 0)
397 return r;
398
399 *ret = TAKE_PTR(f);
400
401 return 0;
402}
403
404
405JournaldFile* journald_file_initiate_close(JournaldFile *f, Set *deferred_closes) {
406 int r;
407
408 assert(f);
409
410 if (deferred_closes) {
411 r = set_put(deferred_closes, f);
412 if (r < 0)
413 log_debug_errno(r, "Failed to add file to deferred close set, closing immediately.");
414 else {
764721cc 415 (void) journald_file_set_offline(f, false);
035b0f8f
DDM
416 return NULL;
417 }
418 }
419
420 return journald_file_close(f);
421}
422
423int journald_file_rotate(
424 JournaldFile **f,
8b4fbbb0 425 MMapCache *mmap_cache,
035b0f8f
DDM
426 bool compress,
427 uint64_t compress_threshold_bytes,
428 bool seal,
429 Set *deferred_closes) {
430
461955ef 431 _cleanup_free_ char *path = NULL;
035b0f8f
DDM
432 JournaldFile *new_file = NULL;
433 int r;
434
435 assert(f);
436 assert(*f);
437
461955ef 438 r = journal_file_archive((*f)->file, &path);
035b0f8f
DDM
439 if (r < 0)
440 return r;
441
442 r = journald_file_open(
443 -1,
461955ef 444 path,
035b0f8f
DDM
445 (*f)->file->flags,
446 (*f)->file->mode,
447 compress,
448 compress_threshold_bytes,
449 seal,
450 NULL, /* metrics */
8b4fbbb0 451 mmap_cache,
035b0f8f
DDM
452 deferred_closes,
453 *f, /* template */
454 &new_file);
455
456 journald_file_initiate_close(*f, deferred_closes);
457 *f = new_file;
458
459 return r;
460}
461
462int journald_file_open_reliably(
463 const char *fname,
464 int flags,
465 mode_t mode,
466 bool compress,
467 uint64_t compress_threshold_bytes,
468 bool seal,
469 JournalMetrics *metrics,
470 MMapCache *mmap_cache,
471 Set *deferred_closes,
472 JournaldFile *template,
473 JournaldFile **ret) {
474
475 int r;
476
477 r = journald_file_open(-1, fname, flags, mode, compress, compress_threshold_bytes, seal, metrics,
478 mmap_cache, deferred_closes, template, ret);
479 if (!IN_SET(r,
480 -EBADMSG, /* Corrupted */
481 -ENODATA, /* Truncated */
482 -EHOSTDOWN, /* Other machine */
483 -EPROTONOSUPPORT, /* Incompatible feature */
484 -EBUSY, /* Unclean shutdown */
485 -ESHUTDOWN, /* Already archived */
486 -EIO, /* IO error, including SIGBUS on mmap */
487 -EIDRM, /* File has been deleted */
488 -ETXTBSY)) /* File is from the future */
489 return r;
490
491 if ((flags & O_ACCMODE) == O_RDONLY)
492 return r;
493
494 if (!(flags & O_CREAT))
495 return r;
496
497 if (!endswith(fname, ".journal"))
498 return r;
499
500 /* The file is corrupted. Rotate it away and try it again (but only once) */
501 log_warning_errno(r, "File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
502
503 r = journal_file_dispose(AT_FDCWD, fname);
504 if (r < 0)
505 return r;
506
507 return journald_file_open(-1, fname, flags, mode, compress, compress_threshold_bytes, seal, metrics,
508 mmap_cache, deferred_closes, template, ret);
509}