]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
basic/efivars: read EFI variables using one read(), not two
authorZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Tue, 9 Sep 2025 09:39:35 +0000 (11:39 +0200)
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Fri, 10 Oct 2025 08:36:47 +0000 (10:36 +0200)
In https://github.com/systemd/systemd/issues/38842 it is reported that we're again
having trouble accessing EFI variables:
  [  292.212415] H (udev-worker)[253]: Reading EFI variable /sys/firmware/efi/efivars/LoaderDevicePartUUID-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f.
  ...
  [  344.397961] H (udev-worker)[253]: Detected slow EFI variable read access on LoaderDevicePartUUID-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f: 52.185510s

We don't know what causes the slowdown, but it seems reasonable to avoid
unnecessary read() calls. We would read the 4-byte attr first, and then the
actual value later. But our code always reads the value (and discards the attr
in all cases except one, when _writing_ the variable), so let's optimize for
the case where we read the value and read the whole contents in one readv().

(cherry picked from commit 9db9d6806e398465a6366dfc5bdde2e24338ac29)

src/basic/efivars.c

index 0163ad0c166f33741d33c78f552da930e0ac6466..8185a111d160ff860821a9c001e9fa38a5331437 100644 (file)
@@ -3,6 +3,7 @@
 #include <fcntl.h>
 #include <stdlib.h>
 #include <sys/stat.h>
+#include <sys/uio.h>
 #include <time.h>
 #include <unistd.h>
 
@@ -31,12 +32,7 @@ int efi_get_variable(
                 void **ret_value,
                 size_t *ret_size) {
 
-        _cleanup_close_ int fd = -EBADF;
-        _cleanup_free_ void *buf = NULL;
-        struct stat st;
         usec_t begin = 0; /* Unnecessary initialization to appease gcc */
-        uint32_t a;
-        ssize_t n;
 
         assert(variable);
 
@@ -56,79 +52,100 @@ int efi_get_variable(
                 begin = now(CLOCK_MONOTONIC);
         }
 
-        fd = open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+        _cleanup_close_ int fd = open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC);
         if (fd < 0)
                 return log_debug_errno(errno, "open(\"%s\") failed: %m", p);
 
-        if (fstat(fd, &st) < 0)
-                return log_debug_errno(errno, "fstat(\"%s\") failed: %m", p);
-        if (st.st_size < 4)
-                return log_debug_errno(SYNTHETIC_ERRNO(ENODATA), "EFI variable %s is shorter than 4 bytes, refusing.", p);
-        if (st.st_size > 4*1024*1024 + 4)
-                return log_debug_errno(SYNTHETIC_ERRNO(E2BIG), "EFI variable %s is ridiculously large, refusing.", p);
-
-        if (ret_value || ret_attribute) {
-                /* The kernel ratelimits reads from the efivarfs because EFI is inefficient, and we'll
-                 * occasionally fail with EINTR here. A slowdown is better than a failure for us, so
-                 * retry a few times and eventually fail with -EBUSY.
-                 *
-                 * See https://github.com/torvalds/linux/blob/master/fs/efivarfs/file.c#L75
-                 * and
-                 * https://github.com/torvalds/linux/commit/bef3efbeb897b56867e271cdbc5f8adaacaeb9cd.
-                 */
-                for (unsigned try = 0;; try++) {
-                        n = read(fd, &a, sizeof(a));
-                        if (n >= 0)
-                                break;
-                        log_debug_errno(errno, "Reading from \"%s\" failed: %m", p);
-                        if (errno != EINTR)
-                                return -errno;
-                        if (try >= EFI_N_RETRIES_TOTAL)
-                                return -EBUSY;
-
-                        if (try >= EFI_N_RETRIES_NO_DELAY)
-                                (void) usleep_safe(EFI_RETRY_DELAY);
-                }
+        uint32_t attr;
+        _cleanup_free_ char *buf = NULL;
+        ssize_t n;
 
-                /* Unfortunately kernel reports EOF if there's an inconsistency between efivarfs var list
-                 * and what's actually stored in firmware, c.f. #34304. A zero size env var is not allowed in
-                 * efi and hence the variable doesn't really exist in the backing store as long as it is zero
-                 * sized, and the kernel calls this "uncommitted". Hence we translate EOF back to ENOENT here,
-                 * as with kernel behavior before
-                 * https://github.com/torvalds/linux/commit/3fab70c165795431f00ddf9be8b84ddd07bd1f8f
-                 *
-                 * If the kernel changes behaviour (to flush dentries on resume), we can drop
-                 * this at some point in the future. But note that the commit is 11
-                 * years old at this point so we'll need to deal with the current behaviour for
-                 * a long time.
-                 */
-                if (n == 0)
+        /* The kernel ratelimits reads from the efivarfs because EFI is inefficient, and we'll occasionally
+         * fail with EINTR here. A slowdown is better than a failure for us, so retry a few times and
+         * eventually fail with -EBUSY.
+         *
+         * See https://github.com/torvalds/linux/blob/master/fs/efivarfs/file.c#L75 and
+         * https://github.com/torvalds/linux/commit/bef3efbeb897b56867e271cdbc5f8adaacaeb9cd.
+         *
+         * The variable may also be overwritten between the stat and read. If we find out that the new
+         * contents are longer, try again.
+         */
+        for (unsigned try = 0;; try++) {
+                struct stat st;
+
+                if (fstat(fd, &st) < 0)
+                        return log_debug_errno(errno, "fstat(\"%s\") failed: %m", p);
+                if (st.st_size == 0)
                         return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
                                                "EFI variable %s is uncommitted", p);
+                if (st.st_size < 4)
+                        return log_debug_errno(SYNTHETIC_ERRNO(ENODATA), "EFI variable %s is shorter than 4 bytes, refusing.", p);
+                if (st.st_size > 4*1024*1024 + 4)
+                        return log_debug_errno(SYNTHETIC_ERRNO(E2BIG), "EFI variable %s is ridiculously large, refusing.", p);
+
+                if (!ret_attribute && !ret_value) {
+                        /* No need to read anything, return the reported size. */
+                        n = st.st_size;
+                        break;
+                }
 
-                if (n != sizeof(a))
-                        return log_debug_errno(SYNTHETIC_ERRNO(EIO),
-                                               "Read %zi bytes from EFI variable %s, expected %zu.",  n, p, sizeof(a));
-        }
-
-        if (ret_value) {
-                buf = malloc(st.st_size - 4 + 3);
-                if (!buf)
+                /* We want +1 for the read call, and +3 for the additional terminating bytes added below. */
+                char *t = realloc(buf, (size_t) st.st_size + MAX(1, 3));
+                if (!t)
                         return -ENOMEM;
+                buf = t;
+
+                const struct iovec iov[] = {
+                        { &attr, sizeof(attr) },
+                        { buf, (size_t) st.st_size + 1 },
+                };
+
+                n = readv(fd, iov, 2);
+                assert(n <= st.st_size + 1);
+                if (n == st.st_size + 1)
+                        /* We need to try again with a bigger buffer. */
+                        continue;
+                if (n >= 0)
+                        break;
+
+                log_debug_errno(errno, "Reading from \"%s\" failed: %m", p);
+                if (errno != EINTR)
+                        return -errno;
+                if (try >= EFI_N_RETRIES_TOTAL)
+                        return -EBUSY;
+                if (try >= EFI_N_RETRIES_NO_DELAY)
+                        (void) usleep_safe(EFI_RETRY_DELAY);
+        }
 
-                n = read(fd, buf, (size_t) st.st_size - 4);
-                if (n < 0)
-                        return log_debug_errno(errno, "Failed to read value of EFI variable %s: %m", p);
-                assert(n <= st.st_size - 4);
+        /* Unfortunately kernel reports EOF if there's an inconsistency between efivarfs var list and
+         * what's actually stored in firmware, c.f. #34304. A zero size env var is not allowed in EFI
+         * and hence the variable doesn't really exist in the backing store as long as it is zero
+         * sized, and the kernel calls this "uncommitted". Hence we translate EOF back to ENOENT
+         * here, as with kernel behavior before
+         * https://github.com/torvalds/linux/commit/3fab70c165795431f00ddf9be8b84ddd07bd1f8f.
+         *
+         * If the kernel changes behaviour (to flush dentries on resume), we can drop this at some
+         * point in the future. But note that the commit is 11 years old at this point so we'll need
+         * to deal with the current behaviour for a long time.
+         */
+        if (n == 0)
+                return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
+                                       "EFI variable %s is uncommitted", p);
+        if (n < 4)
+                return log_debug_errno(SYNTHETIC_ERRNO(EIO),
+                                       "Read %zi bytes from EFI variable %s, expected >= 4", n, p);
 
-                /* Always NUL-terminate (3 bytes, to properly protect UTF-16, even if truncated in the middle
-                 * of a character) */
-                ((char*) buf)[n] = 0;
-                ((char*) buf)[n + 1] = 0;
-                ((char*) buf)[n + 2] = 0;
-        } else
-                /* Assume that the reported size is accurate */
-                n = st.st_size - 4;
+        if (ret_attribute)
+                *ret_attribute = attr;
+        if (ret_value) {
+                assert(buf);
+                /* Always NUL-terminate (3 bytes, to properly protect UTF-16, even if truncated in
+                 * the middle of a character) */
+                buf[n - 4] = 0;
+                buf[n - 4 + 1] = 0;
+                buf[n - 4 + 2] = 0;
+                *ret_value = TAKE_PTR(buf);
+        }
 
         if (DEBUG_LOGGING) {
                 usec_t end = now(CLOCK_MONOTONIC);
@@ -140,14 +157,8 @@ int efi_get_variable(
         /* Note that efivarfs interestingly doesn't require ftruncate() to update an existing EFI variable
          * with a smaller value. */
 
-        if (ret_attribute)
-                *ret_attribute = a;
-
-        if (ret_value)
-                *ret_value = TAKE_PTR(buf);
-
         if (ret_size)
-                *ret_size = n;
+                *ret_size = n - 4;
 
         return 0;
 }