]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/shared/hibernate-util.c
mkosi: update arch commit reference
[thirdparty/systemd.git] / src / shared / hibernate-util.c
CommitLineData
54d7fcc6
MY
1/* SPDX-License-Identifier: LGPL-2.1-or-later */
2/***
3 Copyright © 2018 Dell Inc.
4***/
5
6#include <linux/fs.h>
7#include <linux/magic.h>
8#include <stddef.h>
9#include <sys/ioctl.h>
10#include <unistd.h>
11
12#include "alloc-util.h"
13#include "blockdev-util.h"
14#include "btrfs-util.h"
15#include "device-util.h"
16#include "devnum-util.h"
805deec0 17#include "efivars.h"
54d7fcc6
MY
18#include "env-util.h"
19#include "errno-util.h"
20#include "fd-util.h"
21#include "fileio.h"
22#include "hibernate-util.h"
23#include "log.h"
24#include "parse-util.h"
25#include "path-util.h"
b8b0704c 26#include "proc-cmdline.h"
54d7fcc6
MY
27#include "stat-util.h"
28#include "string-util.h"
29#include "strv.h"
30
31#define HIBERNATION_SWAP_THRESHOLD 0.98
32
596873c1
MY
33void hibernation_device_done(HibernationDevice *device) {
34 assert(device);
54d7fcc6 35
596873c1 36 free(device->path);
54d7fcc6
MY
37}
38
596873c1
MY
39int read_fiemap(int fd, struct fiemap **ret) {
40 _cleanup_free_ struct fiemap *fiemap = NULL, *result_fiemap = NULL;
41 struct stat statinfo;
42 uint32_t result_extents = 0;
43 uint64_t fiemap_start = 0, fiemap_length;
44 const size_t n_extra = DIV_ROUND_UP(sizeof(struct fiemap), sizeof(struct fiemap_extent));
54d7fcc6 45
596873c1
MY
46 assert(fd >= 0);
47 assert(ret);
54d7fcc6 48
596873c1
MY
49 if (fstat(fd, &statinfo) < 0)
50 return log_debug_errno(errno, "Cannot determine file size: %m");
51 if (!S_ISREG(statinfo.st_mode))
52 return -ENOTTY;
53 fiemap_length = statinfo.st_size;
54d7fcc6 54
596873c1
MY
55 /* Zero this out in case we run on a file with no extents */
56 fiemap = calloc(n_extra, sizeof(struct fiemap_extent));
57 if (!fiemap)
58 return -ENOMEM;
54d7fcc6 59
596873c1
MY
60 result_fiemap = malloc_multiply(n_extra, sizeof(struct fiemap_extent));
61 if (!result_fiemap)
62 return -ENOMEM;
54d7fcc6 63
596873c1
MY
64 /* XFS filesystem has incorrect implementation of fiemap ioctl and
65 * returns extents for only one block-group at a time, so we need
66 * to handle it manually, starting the next fiemap call from the end
67 * of the last extent
68 */
69 while (fiemap_start < fiemap_length) {
70 *fiemap = (struct fiemap) {
71 .fm_start = fiemap_start,
72 .fm_length = fiemap_length,
73 .fm_flags = FIEMAP_FLAG_SYNC,
74 };
54d7fcc6 75
596873c1
MY
76 /* Find out how many extents there are */
77 if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0)
78 return log_debug_errno(errno, "Failed to read extents: %m");
54d7fcc6 79
596873c1
MY
80 /* Nothing to process */
81 if (fiemap->fm_mapped_extents == 0)
82 break;
54d7fcc6 83
596873c1
MY
84 /* Resize fiemap to allow us to read in the extents, result fiemap has to hold all
85 * the extents for the whole file. Add space for the initial struct fiemap. */
86 if (!greedy_realloc0((void**) &fiemap, n_extra + fiemap->fm_mapped_extents, sizeof(struct fiemap_extent)))
87 return -ENOMEM;
54d7fcc6 88
596873c1
MY
89 fiemap->fm_extent_count = fiemap->fm_mapped_extents;
90 fiemap->fm_mapped_extents = 0;
54d7fcc6 91
596873c1
MY
92 if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0)
93 return log_debug_errno(errno, "Failed to read extents: %m");
54d7fcc6 94
596873c1
MY
95 /* Resize result_fiemap to allow us to copy in the extents */
96 if (!greedy_realloc((void**) &result_fiemap,
97 n_extra + result_extents + fiemap->fm_mapped_extents, sizeof(struct fiemap_extent)))
98 return -ENOMEM;
54d7fcc6 99
596873c1
MY
100 memcpy(result_fiemap->fm_extents + result_extents,
101 fiemap->fm_extents,
102 sizeof(struct fiemap_extent) * fiemap->fm_mapped_extents);
54d7fcc6 103
596873c1 104 result_extents += fiemap->fm_mapped_extents;
54d7fcc6 105
596873c1
MY
106 /* Highly unlikely that it is zero */
107 if (_likely_(fiemap->fm_mapped_extents > 0)) {
108 uint32_t i = fiemap->fm_mapped_extents - 1;
54d7fcc6 109
596873c1
MY
110 fiemap_start = fiemap->fm_extents[i].fe_logical +
111 fiemap->fm_extents[i].fe_length;
54d7fcc6 112
596873c1
MY
113 if (fiemap->fm_extents[i].fe_flags & FIEMAP_EXTENT_LAST)
114 break;
115 }
116 }
54d7fcc6 117
596873c1
MY
118 memcpy(result_fiemap, fiemap, sizeof(struct fiemap));
119 result_fiemap->fm_mapped_extents = result_extents;
120 *ret = TAKE_PTR(result_fiemap);
54d7fcc6
MY
121 return 0;
122}
123
596873c1
MY
124static int read_resume_config(dev_t *ret_devno, uint64_t *ret_offset) {
125 _cleanup_free_ char *devno_str = NULL, *offset_str = NULL;
126 uint64_t offset;
127 dev_t devno;
54d7fcc6
MY
128 int r;
129
596873c1
MY
130 assert(ret_devno);
131 assert(ret_offset);
54d7fcc6 132
b8b0704c
MY
133 r = proc_cmdline_get_key("noresume", /* flags = */ 0, /* ret_value = */ NULL);
134 if (r < 0)
135 return log_debug_errno(r, "Failed to check if 'noresume' kernel command line option is set: %m");
136 if (r > 0)
137 return log_debug_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE),
138 "'noresume' kernel command line option is set, refusing hibernation device lookup.");
139
596873c1 140 r = read_one_line_file("/sys/power/resume", &devno_str);
54d7fcc6 141 if (r < 0)
596873c1 142 return log_debug_errno(r, "Failed to read /sys/power/resume: %m");
54d7fcc6 143
596873c1 144 r = parse_devnum(devno_str, &devno);
54d7fcc6 145 if (r < 0)
596873c1 146 return log_debug_errno(r, "Failed to parse /sys/power/resume devno '%s': %m", devno_str);
54d7fcc6 147
596873c1 148 r = read_one_line_file("/sys/power/resume_offset", &offset_str);
54d7fcc6 149 if (r == -ENOENT) {
596873c1
MY
150 log_debug_errno(r, "Kernel does not expose resume_offset, skipping.");
151 offset = UINT64_MAX;
54d7fcc6 152 } else if (r < 0)
596873c1 153 return log_debug_errno(r, "Failed to read /sys/power/resume_offset: %m");
54d7fcc6 154 else {
596873c1 155 r = safe_atou64(offset_str, &offset);
54d7fcc6 156 if (r < 0)
596873c1
MY
157 return log_debug_errno(r,
158 "Failed to parse /sys/power/resume_offset '%s': %m", offset_str);
54d7fcc6
MY
159 }
160
596873c1 161 if (devno == 0 && offset > 0 && offset != UINT64_MAX)
3fce141c 162 return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
46ce85bf
MY
163 "Found populated /sys/power/resume_offset (%" PRIu64 ") but /sys/power/resume is not set, refusing.",
164 offset);
596873c1
MY
165
166 *ret_devno = devno;
167 *ret_offset = offset;
54d7fcc6 168
54d7fcc6
MY
169 return 0;
170}
171
596873c1
MY
172/* entry in /proc/swaps */
173typedef struct SwapEntry {
174 char *path;
175 bool swapfile;
176
177 uint64_t size;
178 uint64_t used;
179 int priority;
180
181 /* Not present in original entry */
182 dev_t devno;
183 uint64_t offset;
184} SwapEntry;
185
186typedef struct SwapEntries {
187 SwapEntry *swaps;
188 size_t n_swaps;
189} SwapEntries;
190
191static void swap_entry_done(SwapEntry *entry) {
192 assert(entry);
54d7fcc6 193
596873c1 194 free(entry->path);
54d7fcc6
MY
195}
196
596873c1
MY
197static void swap_entries_done(SwapEntries *entries) {
198 assert(entries);
199
200 FOREACH_ARRAY(i, entries->swaps, entries->n_swaps)
201 swap_entry_done(i);
202
203 free(entries->swaps);
204}
205
206static int swap_entry_get_resume_config(SwapEntry *swap) {
207 _cleanup_close_ int fd = -EBADF;
208 uint64_t offset_raw;
209 struct stat st;
54d7fcc6
MY
210 int r;
211
596873c1
MY
212 assert(swap);
213 assert(swap->path);
214
215 fd = open(swap->path, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
216 if (fd < 0)
217 return -errno;
218
219 if (fstat(fd, &st) < 0)
220 return -errno;
221
222 if (!swap->swapfile) {
223 if (!S_ISBLK(st.st_mode))
224 return -ENOTBLK;
225
226 swap->devno = st.st_rdev;
227 swap->offset = 0;
228 return 0;
229 }
230
231 r = stat_verify_regular(&st);
54d7fcc6
MY
232 if (r < 0)
233 return r;
234
596873c1
MY
235 r = get_block_device_fd(fd, &swap->devno);
236 if (r < 0)
237 return r;
238
239 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
240 if (r < 0)
241 return log_debug_errno(r, "Failed to check if swap file '%s' is on Btrfs: %m", swap->path);
242 if (r > 0) {
243 r = btrfs_get_file_physical_offset_fd(fd, &offset_raw);
244 if (r < 0)
245 return r;
246 } else {
247 _cleanup_free_ struct fiemap *fiemap = NULL;
248
249 r = read_fiemap(fd, &fiemap);
250 if (r < 0)
251 return log_debug_errno(r, "Failed to read extent map for swap file '%s': %m", swap->path);
252
253 offset_raw = fiemap->fm_extents[0].fe_physical;
54d7fcc6
MY
254 }
255
596873c1
MY
256 swap->offset = offset_raw / page_size();
257 return 0;
258}
259
260static int read_swap_entries(SwapEntries *ret) {
261 _cleanup_(swap_entries_done) SwapEntries entries = {};
262 _cleanup_fclose_ FILE *f = NULL;
263
264 assert(ret);
265
266 f = fopen("/proc/swaps", "re");
267 if (!f)
268 return log_debug_errno(errno, "Failed to open /proc/swaps: %m");
269
270 /* Remove header */
54d7fcc6 271 (void) fscanf(f, "%*s %*s %*s %*s %*s\n");
596873c1 272
54d7fcc6 273 for (unsigned i = 1;; i++) {
596873c1 274 _cleanup_(swap_entry_done) SwapEntry swap = {};
54d7fcc6 275 _cleanup_free_ char *type = NULL;
54d7fcc6
MY
276 int k;
277
54d7fcc6
MY
278 k = fscanf(f,
279 "%ms " /* device/file path */
280 "%ms " /* type of swap */
281 "%" PRIu64 /* swap size */
282 "%" PRIu64 /* used */
596873c1
MY
283 "%i" /* priority */
284 "\n",
285 &swap.path, &type, &swap.size, &swap.used, &swap.priority);
54d7fcc6
MY
286 if (k == EOF)
287 break;
596873c1
MY
288 if (k != 5)
289 return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Failed to parse /proc/swaps line %u.", i);
54d7fcc6
MY
290
291 if (streq(type, "file")) {
596873c1
MY
292 if (endswith(swap.path, "\\040(deleted)")) {
293 log_debug("Swap file '%s' has been deleted, ignoring.", swap.path);
54d7fcc6
MY
294 continue;
295 }
296
596873c1 297 swap.swapfile = true;
54d7fcc6
MY
298
299 } else if (streq(type, "partition")) {
596873c1 300 const char *node;
54d7fcc6 301
596873c1
MY
302 node = path_startswith(swap.path, "/dev/");
303 if (node && startswith(node, "zram")) {
304 log_debug("Swap partition '%s' is a zram device, ignoring.", swap.path);
54d7fcc6
MY
305 continue;
306 }
307
596873c1 308 swap.swapfile = false;
54d7fcc6
MY
309
310 } else {
596873c1
MY
311 log_debug("Swap type %s is not supported for hibernation, ignoring device: %s",
312 type, swap.path);
54d7fcc6
MY
313 continue;
314 }
315
596873c1
MY
316 if (!GREEDY_REALLOC(entries.swaps, entries.n_swaps + 1))
317 return log_oom_debug();
54d7fcc6 318
596873c1
MY
319 entries.swaps[entries.n_swaps++] = TAKE_STRUCT(swap);
320 }
54d7fcc6 321
596873c1
MY
322 *ret = TAKE_STRUCT(entries);
323 return 0;
324}
54d7fcc6 325
596873c1
MY
326/* Attempt to find a suitable device for hibernation by parsing /proc/swaps, /sys/power/resume, and
327 * /sys/power/resume_offset.
328 *
329 * Beware:
330 * Never use a device or file that hasn't been somehow specified by a user who would also be entrusted
331 * with full system memory access (for example via /sys/power/resume) or that isn't an already active
332 * swap area! Otherwise various security attacks might become possible, for example an attacker could
333 * silently attach such a device and circumvent full disk encryption when it would be automatically used
334 * for hibernation. Also, having a swap area on top of encryption is not per se enough to protect from all
335 * such attacks.
336 *
337 * Returns:
338 * 1 - Values are set in /sys/power/resume and /sys/power/resume_offset.
339 *
340 * 0 - No values are set in /sys/power/resume and /sys/power/resume_offset.
341 * ret will represent the highest priority swap with most remaining space discovered in /proc/swaps.
342 *
343 * Negative value in the case of error */
344int find_suitable_hibernation_device_full(HibernationDevice *ret_device, uint64_t *ret_size, uint64_t *ret_used) {
345 _cleanup_(swap_entries_done) SwapEntries entries = {};
346 SwapEntry *entry = NULL;
347 uint64_t resume_config_offset;
348 dev_t resume_config_devno;
349 int r;
54d7fcc6 350
596873c1 351 assert(!ret_size == !ret_used);
54d7fcc6 352
596873c1
MY
353 r = read_resume_config(&resume_config_devno, &resume_config_offset);
354 if (r < 0)
355 return r;
356
357 r = read_swap_entries(&entries);
358 if (r < 0)
359 return r;
360 if (entries.n_swaps == 0)
500ec22d 361 return log_debug_errno(SYNTHETIC_ERRNO(ENOSPC), "No swap space available for hibernation.");
54d7fcc6 362
596873c1
MY
363 FOREACH_ARRAY(swap, entries.swaps, entries.n_swaps) {
364 r = swap_entry_get_resume_config(swap);
365 if (r < 0)
366 return log_debug_errno(r, "Failed to get devno and offset for swap '%s': %m", swap->path);
367 if (swap->devno == 0) {
368 assert(swap->swapfile);
369
370 log_debug("Swap file '%s' is not backed by block device, ignoring: %m", swap->path);
371 continue;
372 }
54d7fcc6 373
596873c1
MY
374 if (resume_config_devno > 0) {
375 if (swap->devno == resume_config_devno &&
376 (!swap->swapfile || resume_config_offset == UINT64_MAX || swap->offset == resume_config_offset)) {
377 /* /sys/power/resume (resume=) is set, and the calculated swap file offset
378 * matches with /sys/power/resume_offset. If /sys/power/resume_offset is not
379 * exposed, we can't do proper check anyway, so use the found swap file too. */
380 entry = swap;
381 break;
382 }
54d7fcc6 383
596873c1
MY
384 /* If resume= is set, don't try to use other swap spaces. */
385 continue;
386 }
54d7fcc6 387
596873c1
MY
388 if (!entry ||
389 swap->priority > entry->priority ||
390 swap->size - swap->used > entry->size - entry->used)
391 entry = swap;
54d7fcc6
MY
392 }
393
596873c1
MY
394 if (!entry) {
395 /* No need to check n_swaps == 0, since it's rejected early */
396 assert(resume_config_devno > 0);
40eb83a8 397 return log_debug_errno(SYNTHETIC_ERRNO(ESTALE), "Cannot find swap entry corresponding to /sys/power/resume.");
596873c1 398 }
54d7fcc6 399
66b99560
MY
400 if (ret_device) {
401 char *path;
402
403 if (entry->swapfile) {
404 r = device_path_make_canonical(S_IFBLK, entry->devno, &path);
405 if (r < 0)
406 return log_debug_errno(r,
407 "Failed to format canonical device path for devno '" DEVNUM_FORMAT_STR "': %m",
408 DEVNUM_FORMAT_VAL(entry->devno));
409 } else
410 path = TAKE_PTR(entry->path);
411
596873c1
MY
412 *ret_device = (HibernationDevice) {
413 .devno = entry->devno,
414 .offset = entry->offset,
66b99560 415 .path = path,
596873c1 416 };
66b99560 417 }
54d7fcc6 418
596873c1
MY
419 if (ret_size) {
420 *ret_size = entry->size;
421 *ret_used = entry->used;
422 }
54d7fcc6 423
596873c1 424 return resume_config_devno > 0;
54d7fcc6
MY
425}
426
805deec0 427static int get_proc_meminfo_active(unsigned long long *ret) {
596873c1
MY
428 _cleanup_free_ char *active_str = NULL;
429 unsigned long long active;
54d7fcc6
MY
430 int r;
431
080105d7
MY
432 assert(ret);
433
805deec0
MY
434 r = get_proc_field("/proc/meminfo", "Active(anon)", WHITESPACE, &active_str);
435 if (r < 0)
436 return log_debug_errno(r, "Failed to retrieve Active(anon) from /proc/meminfo: %m");
437
438 r = safe_atollu(active_str, &active);
439 if (r < 0)
440 return log_debug_errno(r, "Failed to parse Active(anon) '%s' from /proc/meminfo: %m", active_str);
441
442 *ret = active;
443 return 0;
444}
445
446int hibernation_is_safe(void) {
447 unsigned long long active;
448 uint64_t size, used;
0e8f0956 449 bool resume_set, bypass_space_check;
805deec0 450 int r;
54d7fcc6 451
0e8f0956
MY
452 bypass_space_check = getenv_bool("SYSTEMD_BYPASS_HIBERNATION_MEMORY_CHECK") > 0;
453
596873c1 454 r = find_suitable_hibernation_device_full(NULL, &size, &used);
40eb83a8
MY
455 if (IN_SET(r, -ENOSPC, -ESTALE) && bypass_space_check)
456 /* If we don't have any available swap space at all, or the specified resume device is missing,
457 * and $SYSTEMD_BYPASS_HIBERNATION_MEMORY_CHECK is set, skip all remaining checks since
458 * we can't do that properly anyway. It is quite possible that the user is using a setup
459 * similar to #30083. When we actually perform hibernation in sleep.c we'll check everything again. */
0e8f0956 460 return 0;
54d7fcc6 461 if (r < 0)
805deec0
MY
462 return r;
463 resume_set = r > 0;
54d7fcc6 464
805deec0
MY
465 if (!resume_set && !is_efi_boot())
466 return log_debug_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE),
467 "Not running on EFI and resume= is not set. Hibernation is not safe.");
54d7fcc6 468
0e8f0956 469 if (bypass_space_check)
232efdc5 470 return 0;
805deec0
MY
471
472 r = get_proc_meminfo_active(&active);
473 if (r < 0)
474 return r;
54d7fcc6 475
596873c1
MY
476 r = active <= (size - used) * HIBERNATION_SWAP_THRESHOLD;
477 log_debug("Detected %s swap for hibernation: Active(anon)=%llu kB, size=%" PRIu64 " kB, used=%" PRIu64 " kB, threshold=%.2g%%",
478 r ? "enough" : "not enough", active, size, used, 100 * HIBERNATION_SWAP_THRESHOLD);
805deec0
MY
479 if (!r)
480 return -ENOSPC;
54d7fcc6 481
805deec0 482 return resume_set;
54d7fcc6
MY
483}
484
54d7fcc6
MY
485int write_resume_config(dev_t devno, uint64_t offset, const char *device) {
486 char offset_str[DECIMAL_STR_MAX(uint64_t)];
54d7fcc6
MY
487 const char *devno_str;
488 int r;
489
6e819bd2
MY
490 assert(devno > 0);
491 assert(device);
492
54d7fcc6
MY
493 devno_str = FORMAT_DEVNUM(devno);
494 xsprintf(offset_str, "%" PRIu64, offset);
495
54d7fcc6
MY
496 /* We write the offset first since it's safer. Note that this file is only available in 4.17+, so
497 * fail gracefully if it doesn't exist and we're only overwriting it with 0. */
498 r = write_string_file("/sys/power/resume_offset", offset_str, WRITE_STRING_FILE_DISABLE_BUFFER);
499 if (r == -ENOENT) {
500 if (offset != 0)
501 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
6e819bd2
MY
502 "Can't configure swap file offset %s, kernel does not support /sys/power/resume_offset. Refusing.",
503 offset_str);
54d7fcc6
MY
504
505 log_warning_errno(r, "/sys/power/resume_offset is unavailable, skipping writing swap file offset.");
506 } else if (r < 0)
507 return log_error_errno(r,
508 "Failed to write swap file offset %s to /sys/power/resume_offset for device '%s': %m",
509 offset_str, device);
510 else
511 log_debug("Wrote resume_offset=%s for device '%s' to /sys/power/resume_offset.",
512 offset_str, device);
513
514 r = write_string_file("/sys/power/resume", devno_str, WRITE_STRING_FILE_DISABLE_BUFFER);
515 if (r < 0)
516 return log_error_errno(r,
517 "Failed to write device '%s' (%s) to /sys/power/resume: %m",
518 device, devno_str);
519 log_debug("Wrote resume=%s for device '%s' to /sys/power/resume.", devno_str, device);
520
521 return 0;
522}
8ad973ed 523
fbc88824 524int clear_efi_hibernate_location_and_warn(void) {
8ad973ed
MY
525 int r;
526
527 if (!is_efi_boot())
fbc88824 528 return 0;
8ad973ed
MY
529
530 r = efi_set_variable(EFI_SYSTEMD_VARIABLE(HibernateLocation), NULL, 0);
fbc88824
MY
531 if (r == -ENOENT)
532 return 0;
8ad973ed 533 if (r < 0)
166ad35f 534 return log_warning_errno(r, "Failed to clear HibernateLocation EFI variable: %m");
fbc88824
MY
535
536 return 1;
8ad973ed 537}