1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
3 Copyright © 2018 Dell Inc.
7 #include <linux/magic.h>
12 #include "alloc-util.h"
13 #include "blockdev-util.h"
14 #include "btrfs-util.h"
15 #include "device-util.h"
16 #include "devnum-util.h"
19 #include "errno-util.h"
22 #include "hibernate-util.h"
24 #include "parse-util.h"
25 #include "path-util.h"
26 #include "proc-cmdline.h"
27 #include "stat-util.h"
28 #include "string-util.h"
31 #define HIBERNATION_SWAP_THRESHOLD 0.98
33 void hibernation_device_done(HibernationDevice
*device
) {
39 int read_fiemap(int fd
, struct fiemap
**ret
) {
40 _cleanup_free_
struct fiemap
*fiemap
= NULL
, *result_fiemap
= NULL
;
42 uint32_t result_extents
= 0;
43 uint64_t fiemap_start
= 0, fiemap_length
;
44 const size_t n_extra
= DIV_ROUND_UP(sizeof(struct fiemap
), sizeof(struct fiemap_extent
));
49 if (fstat(fd
, &statinfo
) < 0)
50 return log_debug_errno(errno
, "Cannot determine file size: %m");
51 if (!S_ISREG(statinfo
.st_mode
))
53 fiemap_length
= statinfo
.st_size
;
55 /* Zero this out in case we run on a file with no extents */
56 fiemap
= calloc(n_extra
, sizeof(struct fiemap_extent
));
60 result_fiemap
= malloc_multiply(n_extra
, sizeof(struct fiemap_extent
));
64 /* XFS filesystem has incorrect implementation of fiemap ioctl and
65 * returns extents for only one block-group at a time, so we need
66 * to handle it manually, starting the next fiemap call from the end
69 while (fiemap_start
< fiemap_length
) {
70 *fiemap
= (struct fiemap
) {
71 .fm_start
= fiemap_start
,
72 .fm_length
= fiemap_length
,
73 .fm_flags
= FIEMAP_FLAG_SYNC
,
76 /* Find out how many extents there are */
77 if (ioctl(fd
, FS_IOC_FIEMAP
, fiemap
) < 0)
78 return log_debug_errno(errno
, "Failed to read extents: %m");
80 /* Nothing to process */
81 if (fiemap
->fm_mapped_extents
== 0)
84 /* Resize fiemap to allow us to read in the extents, result fiemap has to hold all
85 * the extents for the whole file. Add space for the initial struct fiemap. */
86 if (!greedy_realloc0((void**) &fiemap
, n_extra
+ fiemap
->fm_mapped_extents
, sizeof(struct fiemap_extent
)))
89 fiemap
->fm_extent_count
= fiemap
->fm_mapped_extents
;
90 fiemap
->fm_mapped_extents
= 0;
92 if (ioctl(fd
, FS_IOC_FIEMAP
, fiemap
) < 0)
93 return log_debug_errno(errno
, "Failed to read extents: %m");
95 /* Resize result_fiemap to allow us to copy in the extents */
96 if (!greedy_realloc((void**) &result_fiemap
,
97 n_extra
+ result_extents
+ fiemap
->fm_mapped_extents
, sizeof(struct fiemap_extent
)))
100 memcpy(result_fiemap
->fm_extents
+ result_extents
,
102 sizeof(struct fiemap_extent
) * fiemap
->fm_mapped_extents
);
104 result_extents
+= fiemap
->fm_mapped_extents
;
106 /* Highly unlikely that it is zero */
107 if (_likely_(fiemap
->fm_mapped_extents
> 0)) {
108 uint32_t i
= fiemap
->fm_mapped_extents
- 1;
110 fiemap_start
= fiemap
->fm_extents
[i
].fe_logical
+
111 fiemap
->fm_extents
[i
].fe_length
;
113 if (fiemap
->fm_extents
[i
].fe_flags
& FIEMAP_EXTENT_LAST
)
118 memcpy(result_fiemap
, fiemap
, sizeof(struct fiemap
));
119 result_fiemap
->fm_mapped_extents
= result_extents
;
120 *ret
= TAKE_PTR(result_fiemap
);
124 static int read_resume_config(dev_t
*ret_devno
, uint64_t *ret_offset
) {
125 _cleanup_free_
char *devno_str
= NULL
, *offset_str
= NULL
;
133 r
= proc_cmdline_get_key("noresume", /* flags = */ 0, /* ret_value = */ NULL
);
135 return log_debug_errno(r
, "Failed to check if 'noresume' kernel command line option is set: %m");
137 return log_debug_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE
),
138 "'noresume' kernel command line option is set, refusing hibernation device lookup.");
140 r
= read_one_line_file("/sys/power/resume", &devno_str
);
142 return log_debug_errno(r
, "Failed to read /sys/power/resume: %m");
144 r
= parse_devnum(devno_str
, &devno
);
146 return log_debug_errno(r
, "Failed to parse /sys/power/resume devno '%s': %m", devno_str
);
148 r
= read_one_line_file("/sys/power/resume_offset", &offset_str
);
150 log_debug_errno(r
, "Kernel does not expose resume_offset, skipping.");
153 return log_debug_errno(r
, "Failed to read /sys/power/resume_offset: %m");
155 r
= safe_atou64(offset_str
, &offset
);
157 return log_debug_errno(r
,
158 "Failed to parse /sys/power/resume_offset '%s': %m", offset_str
);
161 if (devno
== 0 && offset
> 0 && offset
!= UINT64_MAX
)
162 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL
),
163 "Found populated /sys/power/resume_offset (%" PRIu64
") but /sys/power/resume is not set, refusing.",
167 *ret_offset
= offset
;
172 /* entry in /proc/swaps */
173 typedef struct SwapEntry
{
181 /* Not present in original entry */
186 typedef struct SwapEntries
{
191 static void swap_entry_done(SwapEntry
*entry
) {
197 static void swap_entries_done(SwapEntries
*entries
) {
200 FOREACH_ARRAY(i
, entries
->swaps
, entries
->n_swaps
)
203 free(entries
->swaps
);
206 static int swap_entry_get_resume_config(SwapEntry
*swap
) {
207 _cleanup_close_
int fd
= -EBADF
;
215 fd
= open(swap
->path
, O_RDONLY
|O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
);
219 if (fstat(fd
, &st
) < 0)
222 if (!swap
->swapfile
) {
223 if (!S_ISBLK(st
.st_mode
))
226 swap
->devno
= st
.st_rdev
;
231 r
= stat_verify_regular(&st
);
235 r
= get_block_device_fd(fd
, &swap
->devno
);
239 r
= fd_is_fs_type(fd
, BTRFS_SUPER_MAGIC
);
241 return log_debug_errno(r
, "Failed to check if swap file '%s' is on Btrfs: %m", swap
->path
);
243 r
= btrfs_get_file_physical_offset_fd(fd
, &offset_raw
);
247 _cleanup_free_
struct fiemap
*fiemap
= NULL
;
249 r
= read_fiemap(fd
, &fiemap
);
251 return log_debug_errno(r
, "Failed to read extent map for swap file '%s': %m", swap
->path
);
253 offset_raw
= fiemap
->fm_extents
[0].fe_physical
;
256 swap
->offset
= offset_raw
/ page_size();
260 static int read_swap_entries(SwapEntries
*ret
) {
261 _cleanup_(swap_entries_done
) SwapEntries entries
= {};
262 _cleanup_fclose_
FILE *f
= NULL
;
266 f
= fopen("/proc/swaps", "re");
268 return log_debug_errno(errno
, "Failed to open /proc/swaps: %m");
271 (void) fscanf(f
, "%*s %*s %*s %*s %*s\n");
273 for (unsigned i
= 1;; i
++) {
274 _cleanup_(swap_entry_done
) SwapEntry swap
= {};
275 _cleanup_free_
char *type
= NULL
;
279 "%ms " /* device/file path */
280 "%ms " /* type of swap */
281 "%" PRIu64
/* swap size */
282 "%" PRIu64
/* used */
285 &swap
.path
, &type
, &swap
.size
, &swap
.used
, &swap
.priority
);
289 return log_debug_errno(SYNTHETIC_ERRNO(EIO
), "Failed to parse /proc/swaps line %u.", i
);
291 if (streq(type
, "file")) {
292 if (endswith(swap
.path
, "\\040(deleted)")) {
293 log_debug("Swap file '%s' has been deleted, ignoring.", swap
.path
);
297 swap
.swapfile
= true;
299 } else if (streq(type
, "partition")) {
302 node
= path_startswith(swap
.path
, "/dev/");
303 if (node
&& startswith(node
, "zram")) {
304 log_debug("Swap partition '%s' is a zram device, ignoring.", swap
.path
);
308 swap
.swapfile
= false;
311 log_debug("Swap type %s is not supported for hibernation, ignoring device: %s",
316 if (!GREEDY_REALLOC(entries
.swaps
, entries
.n_swaps
+ 1))
317 return log_oom_debug();
319 entries
.swaps
[entries
.n_swaps
++] = TAKE_STRUCT(swap
);
322 *ret
= TAKE_STRUCT(entries
);
326 /* Attempt to find a suitable device for hibernation by parsing /proc/swaps, /sys/power/resume, and
327 * /sys/power/resume_offset.
330 * Never use a device or file that hasn't been somehow specified by a user who would also be entrusted
331 * with full system memory access (for example via /sys/power/resume) or that isn't an already active
332 * swap area! Otherwise various security attacks might become possible, for example an attacker could
333 * silently attach such a device and circumvent full disk encryption when it would be automatically used
334 * for hibernation. Also, having a swap area on top of encryption is not per se enough to protect from all
338 * 1 - Values are set in /sys/power/resume and /sys/power/resume_offset.
340 * 0 - No values are set in /sys/power/resume and /sys/power/resume_offset.
341 * ret will represent the highest priority swap with most remaining space discovered in /proc/swaps.
343 * Negative value in the case of error */
344 int find_suitable_hibernation_device_full(HibernationDevice
*ret_device
, uint64_t *ret_size
, uint64_t *ret_used
) {
345 _cleanup_(swap_entries_done
) SwapEntries entries
= {};
346 SwapEntry
*entry
= NULL
;
347 uint64_t resume_config_offset
;
348 dev_t resume_config_devno
;
351 assert(!ret_size
== !ret_used
);
353 r
= read_resume_config(&resume_config_devno
, &resume_config_offset
);
357 r
= read_swap_entries(&entries
);
360 if (entries
.n_swaps
== 0)
361 return log_debug_errno(SYNTHETIC_ERRNO(ENOSPC
), "No swap space available for hibernation.");
363 FOREACH_ARRAY(swap
, entries
.swaps
, entries
.n_swaps
) {
364 r
= swap_entry_get_resume_config(swap
);
366 return log_debug_errno(r
, "Failed to get devno and offset for swap '%s': %m", swap
->path
);
367 if (swap
->devno
== 0) {
368 assert(swap
->swapfile
);
370 log_debug("Swap file '%s' is not backed by block device, ignoring: %m", swap
->path
);
374 if (resume_config_devno
> 0) {
375 if (swap
->devno
== resume_config_devno
&&
376 (!swap
->swapfile
|| resume_config_offset
== UINT64_MAX
|| swap
->offset
== resume_config_offset
)) {
377 /* /sys/power/resume (resume=) is set, and the calculated swap file offset
378 * matches with /sys/power/resume_offset. If /sys/power/resume_offset is not
379 * exposed, we can't do proper check anyway, so use the found swap file too. */
384 /* If resume= is set, don't try to use other swap spaces. */
389 swap
->priority
> entry
->priority
||
390 swap
->size
- swap
->used
> entry
->size
- entry
->used
)
395 /* No need to check n_swaps == 0, since it's rejected early */
396 assert(resume_config_devno
> 0);
397 return log_debug_errno(SYNTHETIC_ERRNO(ESTALE
), "Cannot find swap entry corresponding to /sys/power/resume.");
403 if (entry
->swapfile
) {
404 r
= device_path_make_canonical(S_IFBLK
, entry
->devno
, &path
);
406 return log_debug_errno(r
,
407 "Failed to format canonical device path for devno '" DEVNUM_FORMAT_STR
"': %m",
408 DEVNUM_FORMAT_VAL(entry
->devno
));
410 path
= TAKE_PTR(entry
->path
);
412 *ret_device
= (HibernationDevice
) {
413 .devno
= entry
->devno
,
414 .offset
= entry
->offset
,
420 *ret_size
= entry
->size
;
421 *ret_used
= entry
->used
;
424 return resume_config_devno
> 0;
427 static int get_proc_meminfo_active(unsigned long long *ret
) {
428 _cleanup_free_
char *active_str
= NULL
;
429 unsigned long long active
;
434 r
= get_proc_field("/proc/meminfo", "Active(anon)", WHITESPACE
, &active_str
);
436 return log_debug_errno(r
, "Failed to retrieve Active(anon) from /proc/meminfo: %m");
438 r
= safe_atollu(active_str
, &active
);
440 return log_debug_errno(r
, "Failed to parse Active(anon) '%s' from /proc/meminfo: %m", active_str
);
446 int hibernation_is_safe(void) {
447 unsigned long long active
;
449 bool resume_set
, bypass_space_check
;
452 bypass_space_check
= getenv_bool("SYSTEMD_BYPASS_HIBERNATION_MEMORY_CHECK") > 0;
454 r
= find_suitable_hibernation_device_full(NULL
, &size
, &used
);
455 if (IN_SET(r
, -ENOSPC
, -ESTALE
) && bypass_space_check
)
456 /* If we don't have any available swap space at all, or the specified resume device is missing,
457 * and $SYSTEMD_BYPASS_HIBERNATION_MEMORY_CHECK is set, skip all remaining checks since
458 * we can't do that properly anyway. It is quite possible that the user is using a setup
459 * similar to #30083. When we actually perform hibernation in sleep.c we'll check everything again. */
465 if (!resume_set
&& !is_efi_boot())
466 return log_debug_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE
),
467 "Not running on EFI and resume= is not set. Hibernation is not safe.");
469 if (bypass_space_check
)
472 r
= get_proc_meminfo_active(&active
);
476 r
= active
<= (size
- used
) * HIBERNATION_SWAP_THRESHOLD
;
477 log_debug("Detected %s swap for hibernation: Active(anon)=%llu kB, size=%" PRIu64
" kB, used=%" PRIu64
" kB, threshold=%.2g%%",
478 r
? "enough" : "not enough", active
, size
, used
, 100 * HIBERNATION_SWAP_THRESHOLD
);
485 int write_resume_config(dev_t devno
, uint64_t offset
, const char *device
) {
486 char offset_str
[DECIMAL_STR_MAX(uint64_t)];
487 const char *devno_str
;
493 devno_str
= FORMAT_DEVNUM(devno
);
494 xsprintf(offset_str
, "%" PRIu64
, offset
);
496 /* We write the offset first since it's safer. Note that this file is only available in 4.17+, so
497 * fail gracefully if it doesn't exist and we're only overwriting it with 0. */
498 r
= write_string_file("/sys/power/resume_offset", offset_str
, WRITE_STRING_FILE_DISABLE_BUFFER
);
501 return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP
),
502 "Can't configure swap file offset %s, kernel does not support /sys/power/resume_offset. Refusing.",
505 log_warning_errno(r
, "/sys/power/resume_offset is unavailable, skipping writing swap file offset.");
507 return log_error_errno(r
,
508 "Failed to write swap file offset %s to /sys/power/resume_offset for device '%s': %m",
511 log_debug("Wrote resume_offset=%s for device '%s' to /sys/power/resume_offset.",
514 r
= write_string_file("/sys/power/resume", devno_str
, WRITE_STRING_FILE_DISABLE_BUFFER
);
516 return log_error_errno(r
,
517 "Failed to write device '%s' (%s) to /sys/power/resume: %m",
519 log_debug("Wrote resume=%s for device '%s' to /sys/power/resume.", devno_str
, device
);
524 int clear_efi_hibernate_location_and_warn(void) {
530 r
= efi_set_variable(EFI_SYSTEMD_VARIABLE(HibernateLocation
), NULL
, 0);
534 return log_warning_errno(r
, "Failed to clear HibernateLocation EFI variable: %m");