]>
Commit | Line | Data |
---|---|---|
035b0f8f DDM |
1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
2 | ||
764721cc DDM |
3 | #include <pthread.h> |
4 | #include <unistd.h> | |
5 | ||
035b0f8f | 6 | #include "chattr-util.h" |
d71ece3f | 7 | #include "copy.h" |
035b0f8f DDM |
8 | #include "fd-util.h" |
9 | #include "format-util.h" | |
10 | #include "journal-authenticate.h" | |
11 | #include "journald-file.h" | |
12 | #include "path-util.h" | |
13 | #include "random-util.h" | |
14 | #include "set.h" | |
d71ece3f | 15 | #include "stat-util.h" |
035b0f8f DDM |
16 | #include "sync-util.h" |
17 | ||
d951ac55 | 18 | #define PAYLOAD_BUFFER_SIZE (16U * 1024U) |
a2799cc5 DDM |
19 | #define MINIMUM_HOLE_SIZE (1U * 1024U * 1024U / 2U) |
20 | ||
764721cc DDM |
21 | static int journald_file_truncate(JournalFile *f) { |
22 | uint64_t p; | |
23 | int r; | |
24 | ||
25 | /* truncate excess from the end of archives */ | |
26 | r = journal_file_tail_end(f, &p); | |
27 | if (r < 0) | |
28 | return log_debug_errno(r, "Failed to determine end of tail object: %m"); | |
29 | ||
30 | /* arena_size can't exceed the file size, ensure it's updated before truncating */ | |
31 | f->header->arena_size = htole64(p - le64toh(f->header->header_size)); | |
32 | ||
33 | if (ftruncate(f->fd, p) < 0) | |
3a787b5e | 34 | return log_debug_errno(errno, "Failed to truncate %s: %m", f->path); |
764721cc | 35 | |
3a787b5e | 36 | return journal_file_fstat(f); |
764721cc DDM |
37 | } |
38 | ||
39 | static int journald_file_entry_array_punch_hole(JournalFile *f, uint64_t p, uint64_t n_entries) { | |
40 | Object o; | |
41 | uint64_t offset, sz, n_items = 0, n_unused; | |
42 | int r; | |
43 | ||
44 | if (n_entries == 0) | |
45 | return 0; | |
46 | ||
47 | for (uint64_t q = p; q != 0; q = le64toh(o.entry_array.next_entry_array_offset)) { | |
48 | r = journal_file_read_object(f, OBJECT_ENTRY_ARRAY, q, &o); | |
49 | if (r < 0) | |
50 | return r; | |
51 | ||
52 | n_items += journal_file_entry_array_n_items(&o); | |
53 | p = q; | |
54 | } | |
55 | ||
56 | if (p == 0) | |
57 | return 0; | |
58 | ||
59 | if (n_entries > n_items) | |
60 | return -EBADMSG; | |
61 | ||
62 | /* Amount of unused items in the final entry array. */ | |
63 | n_unused = n_items - n_entries; | |
64 | ||
65 | if (n_unused == 0) | |
66 | return 0; | |
67 | ||
68 | offset = p + offsetof(Object, entry_array.items) + | |
69 | (journal_file_entry_array_n_items(&o) - n_unused) * sizeof(le64_t); | |
70 | sz = p + le64toh(o.object.size) - offset; | |
71 | ||
a2799cc5 DDM |
72 | if (sz < MINIMUM_HOLE_SIZE) |
73 | return 0; | |
74 | ||
d93abf46 DDM |
75 | if (p == le64toh(f->header->tail_object_offset) && !f->seal) { |
76 | o.object.size = htole64(offset - p); | |
77 | if (pwrite(f->fd, &o, sizeof(EntryArrayObject), p) < 0) | |
78 | return log_debug_errno(errno, "Failed to modify entry array object size: %m"); | |
79 | ||
80 | f->header->arena_size = htole64(ALIGN64(offset) - le64toh(f->header->header_size)); | |
81 | ||
82 | if (ftruncate(f->fd, ALIGN64(offset)) < 0) | |
83 | return log_debug_errno(errno, "Failed to truncate %s: %m", f->path); | |
84 | ||
85 | return 0; | |
86 | } | |
87 | ||
764721cc DDM |
88 | if (fallocate(f->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, sz) < 0) |
89 | return log_debug_errno(errno, "Failed to punch hole in entry array of %s: %m", f->path); | |
90 | ||
91 | return 0; | |
92 | } | |
93 | ||
94 | static int journald_file_punch_holes(JournalFile *f) { | |
d951ac55 | 95 | HashItem items[PAYLOAD_BUFFER_SIZE / sizeof(HashItem)]; |
764721cc | 96 | uint64_t p, sz; |
94c5a83c | 97 | ssize_t n = SSIZE_MAX; |
764721cc DDM |
98 | int r; |
99 | ||
100 | r = journald_file_entry_array_punch_hole( | |
101 | f, le64toh(f->header->entry_array_offset), le64toh(f->header->n_entries)); | |
102 | if (r < 0) | |
103 | return r; | |
104 | ||
105 | p = le64toh(f->header->data_hash_table_offset); | |
106 | sz = le64toh(f->header->data_hash_table_size); | |
764721cc | 107 | |
94c5a83c | 108 | for (uint64_t i = p; i < p + sz && n > 0; i += n) { |
24040269 | 109 | n = pread(f->fd, items, MIN(sizeof(items), p + sz - i), i); |
cdbba448 DDM |
110 | if (n < 0) |
111 | return n; | |
764721cc | 112 | |
94c5a83c DDM |
113 | /* Let's ignore any partial hash items by rounding down to the nearest multiple of HashItem. */ |
114 | n -= n % sizeof(HashItem); | |
115 | ||
cdbba448 | 116 | for (size_t j = 0; j < (size_t) n / sizeof(HashItem); j++) { |
764721cc DDM |
117 | Object o; |
118 | ||
119 | for (uint64_t q = le64toh(items[j].head_hash_offset); q != 0; | |
120 | q = le64toh(o.data.next_hash_offset)) { | |
121 | ||
122 | r = journal_file_read_object(f, OBJECT_DATA, q, &o); | |
123 | if (r < 0) { | |
124 | log_debug_errno(r, "Invalid data object: %m, ignoring"); | |
125 | break; | |
126 | } | |
127 | ||
128 | if (le64toh(o.data.n_entries) == 0) | |
129 | continue; | |
130 | ||
131 | (void) journald_file_entry_array_punch_hole( | |
132 | f, le64toh(o.data.entry_array_offset), le64toh(o.data.n_entries) - 1); | |
133 | } | |
134 | } | |
135 | } | |
136 | ||
137 | return 0; | |
138 | } | |
139 | ||
140 | /* This may be called from a separate thread to prevent blocking the caller for the duration of fsync(). | |
141 | * As a result we use atomic operations on f->offline_state for inter-thread communications with | |
142 | * journal_file_set_offline() and journal_file_set_online(). */ | |
143 | static void journald_file_set_offline_internal(JournaldFile *f) { | |
d71ece3f DDM |
144 | int r; |
145 | ||
764721cc DDM |
146 | assert(f); |
147 | assert(f->file->fd >= 0); | |
148 | assert(f->file->header); | |
149 | ||
150 | for (;;) { | |
151 | switch (f->file->offline_state) { | |
152 | case OFFLINE_CANCEL: | |
153 | if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_CANCEL, OFFLINE_DONE)) | |
154 | continue; | |
155 | return; | |
156 | ||
157 | case OFFLINE_AGAIN_FROM_SYNCING: | |
158 | if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_AGAIN_FROM_SYNCING, OFFLINE_SYNCING)) | |
159 | continue; | |
160 | break; | |
161 | ||
162 | case OFFLINE_AGAIN_FROM_OFFLINING: | |
163 | if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_AGAIN_FROM_OFFLINING, OFFLINE_SYNCING)) | |
164 | continue; | |
165 | break; | |
166 | ||
167 | case OFFLINE_SYNCING: | |
168 | if (f->file->archive) { | |
169 | (void) journald_file_truncate(f->file); | |
170 | (void) journald_file_punch_holes(f->file); | |
171 | } | |
172 | ||
173 | (void) fsync(f->file->fd); | |
174 | ||
175 | if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_SYNCING, OFFLINE_OFFLINING)) | |
176 | continue; | |
177 | ||
178 | f->file->header->state = f->file->archive ? STATE_ARCHIVED : STATE_OFFLINE; | |
179 | (void) fsync(f->file->fd); | |
d71ece3f DDM |
180 | |
181 | /* If we've archived the journal file, first try to re-enable COW on the file. If the | |
a96fc72d | 182 | * FS_NOCOW_FL flag was never set or we successfully removed it, continue. If we fail |
d71ece3f DDM |
183 | * to remove the flag on the archived file, rewrite the file without the NOCOW flag. |
184 | * We need this fallback because on some filesystems (BTRFS), the NOCOW flag cannot | |
185 | * be removed after data has been written to a file. The only way to remove it is to | |
186 | * copy all data to a new file without the NOCOW flag set. */ | |
187 | ||
188 | if (f->file->archive) { | |
189 | r = chattr_fd(f->file->fd, 0, FS_NOCOW_FL, NULL); | |
190 | if (r >= 0) | |
191 | continue; | |
192 | ||
193 | log_debug_errno(r, "Failed to re-enable copy-on-write for %s: %m, rewriting file", f->file->path); | |
194 | ||
12727c2b | 195 | r = copy_file_atomic(f->file->path, f->file->path, f->file->mode, 0, FS_NOCOW_FL, COPY_REPLACE | COPY_FSYNC | COPY_HOLES); |
d71ece3f DDM |
196 | if (r < 0) { |
197 | log_debug_errno(r, "Failed to rewrite %s: %m", f->file->path); | |
198 | continue; | |
199 | } | |
200 | } | |
201 | ||
764721cc DDM |
202 | break; |
203 | ||
204 | case OFFLINE_OFFLINING: | |
205 | if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_OFFLINING, OFFLINE_DONE)) | |
206 | continue; | |
207 | _fallthrough_; | |
208 | case OFFLINE_DONE: | |
209 | return; | |
210 | ||
211 | case OFFLINE_JOINED: | |
212 | log_debug("OFFLINE_JOINED unexpected offline state for journal_file_set_offline_internal()"); | |
213 | return; | |
214 | } | |
215 | } | |
216 | } | |
217 | ||
218 | static void * journald_file_set_offline_thread(void *arg) { | |
219 | JournaldFile *f = arg; | |
220 | ||
221 | (void) pthread_setname_np(pthread_self(), "journal-offline"); | |
222 | ||
223 | journald_file_set_offline_internal(f); | |
224 | ||
225 | return NULL; | |
226 | } | |
227 | ||
228 | /* Trigger a restart if the offline thread is mid-flight in a restartable state. */ | |
229 | static bool journald_file_set_offline_try_restart(JournaldFile *f) { | |
230 | for (;;) { | |
231 | switch (f->file->offline_state) { | |
232 | case OFFLINE_AGAIN_FROM_SYNCING: | |
233 | case OFFLINE_AGAIN_FROM_OFFLINING: | |
234 | return true; | |
235 | ||
236 | case OFFLINE_CANCEL: | |
237 | if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_CANCEL, OFFLINE_AGAIN_FROM_SYNCING)) | |
238 | continue; | |
239 | return true; | |
240 | ||
241 | case OFFLINE_SYNCING: | |
242 | if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_SYNCING, OFFLINE_AGAIN_FROM_SYNCING)) | |
243 | continue; | |
244 | return true; | |
245 | ||
246 | case OFFLINE_OFFLINING: | |
247 | if (!__sync_bool_compare_and_swap(&f->file->offline_state, OFFLINE_OFFLINING, OFFLINE_AGAIN_FROM_OFFLINING)) | |
248 | continue; | |
249 | return true; | |
250 | ||
251 | default: | |
252 | return false; | |
253 | } | |
254 | } | |
255 | } | |
256 | ||
257 | /* Sets a journal offline. | |
258 | * | |
259 | * If wait is false then an offline is dispatched in a separate thread for a | |
260 | * subsequent journal_file_set_offline() or journal_file_set_online() of the | |
261 | * same journal to synchronize with. | |
262 | * | |
263 | * If wait is true, then either an existing offline thread will be restarted | |
264 | * and joined, or if none exists the offline is simply performed in this | |
265 | * context without involving another thread. | |
266 | */ | |
267 | int journald_file_set_offline(JournaldFile *f, bool wait) { | |
268 | int target_state; | |
269 | bool restarted; | |
270 | int r; | |
271 | ||
272 | assert(f); | |
273 | ||
274 | if (!f->file->writable) | |
275 | return -EPERM; | |
276 | ||
277 | if (f->file->fd < 0 || !f->file->header) | |
278 | return -EINVAL; | |
279 | ||
280 | target_state = f->file->archive ? STATE_ARCHIVED : STATE_OFFLINE; | |
281 | ||
282 | /* An offlining journal is implicitly online and may modify f->header->state, | |
283 | * we must also join any potentially lingering offline thread when already in | |
284 | * the desired offline state. | |
285 | */ | |
286 | if (!journald_file_is_offlining(f) && f->file->header->state == target_state) | |
287 | return journal_file_set_offline_thread_join(f->file); | |
288 | ||
289 | /* Restart an in-flight offline thread and wait if needed, or join a lingering done one. */ | |
290 | restarted = journald_file_set_offline_try_restart(f); | |
291 | if ((restarted && wait) || !restarted) { | |
292 | r = journal_file_set_offline_thread_join(f->file); | |
293 | if (r < 0) | |
294 | return r; | |
295 | } | |
296 | ||
297 | if (restarted) | |
298 | return 0; | |
299 | ||
300 | /* Initiate a new offline. */ | |
301 | f->file->offline_state = OFFLINE_SYNCING; | |
302 | ||
303 | if (wait) /* Without using a thread if waiting. */ | |
304 | journald_file_set_offline_internal(f); | |
305 | else { | |
306 | sigset_t ss, saved_ss; | |
307 | int k; | |
308 | ||
309 | assert_se(sigfillset(&ss) >= 0); | |
310 | /* Don't block SIGBUS since the offlining thread accesses a memory mapped file. | |
311 | * Asynchronous SIGBUS signals can safely be handled by either thread. */ | |
312 | assert_se(sigdelset(&ss, SIGBUS) >= 0); | |
313 | ||
314 | r = pthread_sigmask(SIG_BLOCK, &ss, &saved_ss); | |
315 | if (r > 0) | |
316 | return -r; | |
317 | ||
318 | r = pthread_create(&f->file->offline_thread, NULL, journald_file_set_offline_thread, f); | |
319 | ||
320 | k = pthread_sigmask(SIG_SETMASK, &saved_ss, NULL); | |
321 | if (r > 0) { | |
322 | f->file->offline_state = OFFLINE_JOINED; | |
323 | return -r; | |
324 | } | |
325 | if (k > 0) | |
326 | return -k; | |
327 | } | |
328 | ||
329 | return 0; | |
330 | } | |
331 | ||
332 | bool journald_file_is_offlining(JournaldFile *f) { | |
333 | assert(f); | |
334 | ||
335 | __sync_synchronize(); | |
336 | ||
337 | if (IN_SET(f->file->offline_state, OFFLINE_DONE, OFFLINE_JOINED)) | |
338 | return false; | |
339 | ||
340 | return true; | |
341 | } | |
342 | ||
035b0f8f DDM |
343 | JournaldFile* journald_file_close(JournaldFile *f) { |
344 | if (!f) | |
345 | return NULL; | |
346 | ||
764721cc DDM |
347 | #if HAVE_GCRYPT |
348 | /* Write the final tag */ | |
349 | if (f->file->seal && f->file->writable) { | |
350 | int r; | |
351 | ||
352 | r = journal_file_append_tag(f->file); | |
353 | if (r < 0) | |
354 | log_error_errno(r, "Failed to append tag when closing journal: %m"); | |
355 | } | |
356 | #endif | |
357 | ||
358 | if (f->file->post_change_timer) { | |
359 | if (sd_event_source_get_enabled(f->file->post_change_timer, NULL) > 0) | |
360 | journal_file_post_change(f->file); | |
361 | ||
362 | sd_event_source_disable_unref(f->file->post_change_timer); | |
363 | } | |
364 | ||
365 | journald_file_set_offline(f, true); | |
366 | ||
035b0f8f DDM |
367 | journal_file_close(f->file); |
368 | ||
369 | return mfree(f); | |
370 | } | |
371 | ||
372 | int journald_file_open( | |
373 | int fd, | |
374 | const char *fname, | |
375 | int flags, | |
376 | mode_t mode, | |
377 | bool compress, | |
378 | uint64_t compress_threshold_bytes, | |
379 | bool seal, | |
380 | JournalMetrics *metrics, | |
381 | MMapCache *mmap_cache, | |
382 | Set *deferred_closes, | |
383 | JournaldFile *template, | |
384 | JournaldFile **ret) { | |
385 | _cleanup_free_ JournaldFile *f = NULL; | |
386 | int r; | |
387 | ||
388 | set_clear_with_destructor(deferred_closes, journald_file_close); | |
389 | ||
390 | f = new0(JournaldFile, 1); | |
391 | if (!f) | |
392 | return -ENOMEM; | |
393 | ||
394 | r = journal_file_open(fd, fname, flags, mode, compress, compress_threshold_bytes, seal, metrics, | |
395 | mmap_cache, template ? template->file : NULL, &f->file); | |
396 | if (r < 0) | |
397 | return r; | |
398 | ||
399 | *ret = TAKE_PTR(f); | |
400 | ||
401 | return 0; | |
402 | } | |
403 | ||
404 | ||
405 | JournaldFile* journald_file_initiate_close(JournaldFile *f, Set *deferred_closes) { | |
406 | int r; | |
407 | ||
408 | assert(f); | |
409 | ||
410 | if (deferred_closes) { | |
411 | r = set_put(deferred_closes, f); | |
412 | if (r < 0) | |
413 | log_debug_errno(r, "Failed to add file to deferred close set, closing immediately."); | |
414 | else { | |
764721cc | 415 | (void) journald_file_set_offline(f, false); |
035b0f8f DDM |
416 | return NULL; |
417 | } | |
418 | } | |
419 | ||
420 | return journald_file_close(f); | |
421 | } | |
422 | ||
423 | int journald_file_rotate( | |
424 | JournaldFile **f, | |
8b4fbbb0 | 425 | MMapCache *mmap_cache, |
035b0f8f DDM |
426 | bool compress, |
427 | uint64_t compress_threshold_bytes, | |
428 | bool seal, | |
429 | Set *deferred_closes) { | |
430 | ||
461955ef | 431 | _cleanup_free_ char *path = NULL; |
035b0f8f DDM |
432 | JournaldFile *new_file = NULL; |
433 | int r; | |
434 | ||
435 | assert(f); | |
436 | assert(*f); | |
437 | ||
461955ef | 438 | r = journal_file_archive((*f)->file, &path); |
035b0f8f DDM |
439 | if (r < 0) |
440 | return r; | |
441 | ||
442 | r = journald_file_open( | |
443 | -1, | |
461955ef | 444 | path, |
035b0f8f DDM |
445 | (*f)->file->flags, |
446 | (*f)->file->mode, | |
447 | compress, | |
448 | compress_threshold_bytes, | |
449 | seal, | |
450 | NULL, /* metrics */ | |
8b4fbbb0 | 451 | mmap_cache, |
035b0f8f DDM |
452 | deferred_closes, |
453 | *f, /* template */ | |
454 | &new_file); | |
455 | ||
456 | journald_file_initiate_close(*f, deferred_closes); | |
457 | *f = new_file; | |
458 | ||
459 | return r; | |
460 | } | |
461 | ||
462 | int journald_file_open_reliably( | |
463 | const char *fname, | |
464 | int flags, | |
465 | mode_t mode, | |
466 | bool compress, | |
467 | uint64_t compress_threshold_bytes, | |
468 | bool seal, | |
469 | JournalMetrics *metrics, | |
470 | MMapCache *mmap_cache, | |
471 | Set *deferred_closes, | |
472 | JournaldFile *template, | |
473 | JournaldFile **ret) { | |
474 | ||
475 | int r; | |
476 | ||
477 | r = journald_file_open(-1, fname, flags, mode, compress, compress_threshold_bytes, seal, metrics, | |
478 | mmap_cache, deferred_closes, template, ret); | |
479 | if (!IN_SET(r, | |
480 | -EBADMSG, /* Corrupted */ | |
481 | -ENODATA, /* Truncated */ | |
482 | -EHOSTDOWN, /* Other machine */ | |
483 | -EPROTONOSUPPORT, /* Incompatible feature */ | |
484 | -EBUSY, /* Unclean shutdown */ | |
485 | -ESHUTDOWN, /* Already archived */ | |
486 | -EIO, /* IO error, including SIGBUS on mmap */ | |
487 | -EIDRM, /* File has been deleted */ | |
488 | -ETXTBSY)) /* File is from the future */ | |
489 | return r; | |
490 | ||
491 | if ((flags & O_ACCMODE) == O_RDONLY) | |
492 | return r; | |
493 | ||
494 | if (!(flags & O_CREAT)) | |
495 | return r; | |
496 | ||
497 | if (!endswith(fname, ".journal")) | |
498 | return r; | |
499 | ||
500 | /* The file is corrupted. Rotate it away and try it again (but only once) */ | |
501 | log_warning_errno(r, "File %s corrupted or uncleanly shut down, renaming and replacing.", fname); | |
502 | ||
503 | r = journal_file_dispose(AT_FDCWD, fname); | |
504 | if (r < 0) | |
505 | return r; | |
506 | ||
507 | return journald_file_open(-1, fname, flags, mode, compress, compress_threshold_bytes, seal, metrics, | |
508 | mmap_cache, deferred_closes, template, ret); | |
509 | } |