From: Zbigniew Jędrzejewski-Szmek Date: Tue, 9 Sep 2025 09:39:35 +0000 (+0200) Subject: basic/efivars: read EFI variables using one read(), not two X-Git-Tag: v258.1~62 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=61e8972bfceba0090ce297d8e30a7eafa47db074;p=thirdparty%2Fsystemd.git basic/efivars: read EFI variables using one read(), not two In https://github.com/systemd/systemd/issues/38842 it is reported that we're again having trouble accessing EFI variables: [ 292.212415] H (udev-worker)[253]: Reading EFI variable /sys/firmware/efi/efivars/LoaderDevicePartUUID-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f. ... [ 344.397961] H (udev-worker)[253]: Detected slow EFI variable read access on LoaderDevicePartUUID-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f: 52.185510s We don't know what causes the slowdown, but it seems reasonable to avoid unnecessary read() calls. We would read the 4-byte attr first, and then the actual value later. But our code always reads the value (and discards the attr in all cases except one, when _writing_ the variable), so let's optimize for the case where we read the value and read the whole contents in one readv(). (cherry picked from commit 9db9d6806e398465a6366dfc5bdde2e24338ac29) --- diff --git a/src/basic/efivars.c b/src/basic/efivars.c index 0163ad0c166..8185a111d16 100644 --- a/src/basic/efivars.c +++ b/src/basic/efivars.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -31,12 +32,7 @@ int efi_get_variable( void **ret_value, size_t *ret_size) { - _cleanup_close_ int fd = -EBADF; - _cleanup_free_ void *buf = NULL; - struct stat st; usec_t begin = 0; /* Unnecessary initialization to appease gcc */ - uint32_t a; - ssize_t n; assert(variable); @@ -56,79 +52,100 @@ int efi_get_variable( begin = now(CLOCK_MONOTONIC); } - fd = open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC); + _cleanup_close_ int fd = open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC); if (fd < 0) return log_debug_errno(errno, "open(\"%s\") failed: %m", p); - if (fstat(fd, &st) < 0) - return log_debug_errno(errno, "fstat(\"%s\") failed: %m", p); - if (st.st_size < 4) - return log_debug_errno(SYNTHETIC_ERRNO(ENODATA), "EFI variable %s is shorter than 4 bytes, refusing.", p); - if (st.st_size > 4*1024*1024 + 4) - return log_debug_errno(SYNTHETIC_ERRNO(E2BIG), "EFI variable %s is ridiculously large, refusing.", p); - - if (ret_value || ret_attribute) { - /* The kernel ratelimits reads from the efivarfs because EFI is inefficient, and we'll - * occasionally fail with EINTR here. A slowdown is better than a failure for us, so - * retry a few times and eventually fail with -EBUSY. - * - * See https://github.com/torvalds/linux/blob/master/fs/efivarfs/file.c#L75 - * and - * https://github.com/torvalds/linux/commit/bef3efbeb897b56867e271cdbc5f8adaacaeb9cd. - */ - for (unsigned try = 0;; try++) { - n = read(fd, &a, sizeof(a)); - if (n >= 0) - break; - log_debug_errno(errno, "Reading from \"%s\" failed: %m", p); - if (errno != EINTR) - return -errno; - if (try >= EFI_N_RETRIES_TOTAL) - return -EBUSY; - - if (try >= EFI_N_RETRIES_NO_DELAY) - (void) usleep_safe(EFI_RETRY_DELAY); - } + uint32_t attr; + _cleanup_free_ char *buf = NULL; + ssize_t n; - /* Unfortunately kernel reports EOF if there's an inconsistency between efivarfs var list - * and what's actually stored in firmware, c.f. #34304. A zero size env var is not allowed in - * efi and hence the variable doesn't really exist in the backing store as long as it is zero - * sized, and the kernel calls this "uncommitted". Hence we translate EOF back to ENOENT here, - * as with kernel behavior before - * https://github.com/torvalds/linux/commit/3fab70c165795431f00ddf9be8b84ddd07bd1f8f - * - * If the kernel changes behaviour (to flush dentries on resume), we can drop - * this at some point in the future. But note that the commit is 11 - * years old at this point so we'll need to deal with the current behaviour for - * a long time. - */ - if (n == 0) + /* The kernel ratelimits reads from the efivarfs because EFI is inefficient, and we'll occasionally + * fail with EINTR here. A slowdown is better than a failure for us, so retry a few times and + * eventually fail with -EBUSY. + * + * See https://github.com/torvalds/linux/blob/master/fs/efivarfs/file.c#L75 and + * https://github.com/torvalds/linux/commit/bef3efbeb897b56867e271cdbc5f8adaacaeb9cd. + * + * The variable may also be overwritten between the stat and read. If we find out that the new + * contents are longer, try again. + */ + for (unsigned try = 0;; try++) { + struct stat st; + + if (fstat(fd, &st) < 0) + return log_debug_errno(errno, "fstat(\"%s\") failed: %m", p); + if (st.st_size == 0) return log_debug_errno(SYNTHETIC_ERRNO(ENOENT), "EFI variable %s is uncommitted", p); + if (st.st_size < 4) + return log_debug_errno(SYNTHETIC_ERRNO(ENODATA), "EFI variable %s is shorter than 4 bytes, refusing.", p); + if (st.st_size > 4*1024*1024 + 4) + return log_debug_errno(SYNTHETIC_ERRNO(E2BIG), "EFI variable %s is ridiculously large, refusing.", p); + + if (!ret_attribute && !ret_value) { + /* No need to read anything, return the reported size. */ + n = st.st_size; + break; + } - if (n != sizeof(a)) - return log_debug_errno(SYNTHETIC_ERRNO(EIO), - "Read %zi bytes from EFI variable %s, expected %zu.", n, p, sizeof(a)); - } - - if (ret_value) { - buf = malloc(st.st_size - 4 + 3); - if (!buf) + /* We want +1 for the read call, and +3 for the additional terminating bytes added below. */ + char *t = realloc(buf, (size_t) st.st_size + MAX(1, 3)); + if (!t) return -ENOMEM; + buf = t; + + const struct iovec iov[] = { + { &attr, sizeof(attr) }, + { buf, (size_t) st.st_size + 1 }, + }; + + n = readv(fd, iov, 2); + assert(n <= st.st_size + 1); + if (n == st.st_size + 1) + /* We need to try again with a bigger buffer. */ + continue; + if (n >= 0) + break; + + log_debug_errno(errno, "Reading from \"%s\" failed: %m", p); + if (errno != EINTR) + return -errno; + if (try >= EFI_N_RETRIES_TOTAL) + return -EBUSY; + if (try >= EFI_N_RETRIES_NO_DELAY) + (void) usleep_safe(EFI_RETRY_DELAY); + } - n = read(fd, buf, (size_t) st.st_size - 4); - if (n < 0) - return log_debug_errno(errno, "Failed to read value of EFI variable %s: %m", p); - assert(n <= st.st_size - 4); + /* Unfortunately kernel reports EOF if there's an inconsistency between efivarfs var list and + * what's actually stored in firmware, c.f. #34304. A zero size env var is not allowed in EFI + * and hence the variable doesn't really exist in the backing store as long as it is zero + * sized, and the kernel calls this "uncommitted". Hence we translate EOF back to ENOENT + * here, as with kernel behavior before + * https://github.com/torvalds/linux/commit/3fab70c165795431f00ddf9be8b84ddd07bd1f8f. + * + * If the kernel changes behaviour (to flush dentries on resume), we can drop this at some + * point in the future. But note that the commit is 11 years old at this point so we'll need + * to deal with the current behaviour for a long time. + */ + if (n == 0) + return log_debug_errno(SYNTHETIC_ERRNO(ENOENT), + "EFI variable %s is uncommitted", p); + if (n < 4) + return log_debug_errno(SYNTHETIC_ERRNO(EIO), + "Read %zi bytes from EFI variable %s, expected >= 4", n, p); - /* Always NUL-terminate (3 bytes, to properly protect UTF-16, even if truncated in the middle - * of a character) */ - ((char*) buf)[n] = 0; - ((char*) buf)[n + 1] = 0; - ((char*) buf)[n + 2] = 0; - } else - /* Assume that the reported size is accurate */ - n = st.st_size - 4; + if (ret_attribute) + *ret_attribute = attr; + if (ret_value) { + assert(buf); + /* Always NUL-terminate (3 bytes, to properly protect UTF-16, even if truncated in + * the middle of a character) */ + buf[n - 4] = 0; + buf[n - 4 + 1] = 0; + buf[n - 4 + 2] = 0; + *ret_value = TAKE_PTR(buf); + } if (DEBUG_LOGGING) { usec_t end = now(CLOCK_MONOTONIC); @@ -140,14 +157,8 @@ int efi_get_variable( /* Note that efivarfs interestingly doesn't require ftruncate() to update an existing EFI variable * with a smaller value. */ - if (ret_attribute) - *ret_attribute = a; - - if (ret_value) - *ret_value = TAKE_PTR(buf); - if (ret_size) - *ret_size = n; + *ret_size = n - 4; return 0; }