]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
basic/efivars: read EFI variables using one read(), not two
authorZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Tue, 9 Sep 2025 09:39:35 +0000 (11:39 +0200)
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Mon, 13 Oct 2025 08:04:47 +0000 (10:04 +0200)
In https://github.com/systemd/systemd/issues/38842 it is reported that we're again
having trouble accessing EFI variables:
  [  292.212415] H (udev-worker)[253]: Reading EFI variable /sys/firmware/efi/efivars/LoaderDevicePartUUID-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f.
  ...
  [  344.397961] H (udev-worker)[253]: Detected slow EFI variable read access on LoaderDevicePartUUID-4a67b082-0a4c-41cf-b6c7-440b29bb8c4f: 52.185510s

We don't know what causes the slowdown, but it seems reasonable to avoid
unnecessary read() calls. We would read the 4-byte attr first, and then the
actual value later. But our code always reads the value (and discards the attr
in all cases except one, when _writing_ the variable), so let's optimize for
the case where we read the value and read the whole contents in one readv().

(cherry picked from commit 9db9d6806e398465a6366dfc5bdde2e24338ac29)
(cherry picked from commit 61e8972bfceba0090ce297d8e30a7eafa47db074)

src/basic/efivars.c

index 1811f9e37e74071dc93f82be885ac089e5d5986a..3dff6420997fd58e89064eef6806f24471e5e0de 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/fs.h>
 #include <stdlib.h>
 #include <sys/stat.h>
+#include <sys/uio.h>
 #include <unistd.h>
 
 #include "sd-id128.h"
@@ -37,12 +38,7 @@ int efi_get_variable(
                 void **ret_value,
                 size_t *ret_size) {
 
-        _cleanup_close_ int fd = -EBADF;
-        _cleanup_free_ void *buf = NULL;
-        struct stat st;
         usec_t begin = 0; /* Unnecessary initialization to appease gcc */
-        uint32_t a;
-        ssize_t n;
 
         assert(variable);
 
@@ -62,79 +58,100 @@ int efi_get_variable(
                 begin = now(CLOCK_MONOTONIC);
         }
 
-        fd = open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+        _cleanup_close_ int fd = open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC);
         if (fd < 0)
                 return log_debug_errno(errno, "open(\"%s\") failed: %m", p);
 
-        if (fstat(fd, &st) < 0)
-                return log_debug_errno(errno, "fstat(\"%s\") failed: %m", p);
-        if (st.st_size < 4)
-                return log_debug_errno(SYNTHETIC_ERRNO(ENODATA), "EFI variable %s is shorter than 4 bytes, refusing.", p);
-        if (st.st_size > 4*1024*1024 + 4)
-                return log_debug_errno(SYNTHETIC_ERRNO(E2BIG), "EFI variable %s is ridiculously large, refusing.", p);
-
-        if (ret_value || ret_attribute) {
-                /* The kernel ratelimits reads from the efivarfs because EFI is inefficient, and we'll
-                 * occasionally fail with EINTR here. A slowdown is better than a failure for us, so
-                 * retry a few times and eventually fail with -EBUSY.
-                 *
-                 * See https://github.com/torvalds/linux/blob/master/fs/efivarfs/file.c#L75
-                 * and
-                 * https://github.com/torvalds/linux/commit/bef3efbeb897b56867e271cdbc5f8adaacaeb9cd.
-                 */
-                for (unsigned try = 0;; try++) {
-                        n = read(fd, &a, sizeof(a));
-                        if (n >= 0)
-                                break;
-                        log_debug_errno(errno, "Reading from \"%s\" failed: %m", p);
-                        if (errno != EINTR)
-                                return -errno;
-                        if (try >= EFI_N_RETRIES_TOTAL)
-                                return -EBUSY;
-
-                        if (try >= EFI_N_RETRIES_NO_DELAY)
-                                (void) usleep_safe(EFI_RETRY_DELAY);
-                }
+        uint32_t attr;
+        _cleanup_free_ char *buf = NULL;
+        ssize_t n;
 
-                /* Unfortunately kernel reports EOF if there's an inconsistency between efivarfs var list
-                 * and what's actually stored in firmware, c.f. #34304. A zero size env var is not allowed in
-                 * efi and hence the variable doesn't really exist in the backing store as long as it is zero
-                 * sized, and the kernel calls this "uncommitted". Hence we translate EOF back to ENOENT here,
-                 * as with kernel behavior before
-                 * https://github.com/torvalds/linux/commit/3fab70c165795431f00ddf9be8b84ddd07bd1f8f
-                 *
-                 * If the kernel changes behaviour (to flush dentries on resume), we can drop
-                 * this at some point in the future. But note that the commit is 11
-                 * years old at this point so we'll need to deal with the current behaviour for
-                 * a long time.
-                 */
-                if (n == 0)
+        /* The kernel ratelimits reads from the efivarfs because EFI is inefficient, and we'll occasionally
+         * fail with EINTR here. A slowdown is better than a failure for us, so retry a few times and
+         * eventually fail with -EBUSY.
+         *
+         * See https://github.com/torvalds/linux/blob/master/fs/efivarfs/file.c#L75 and
+         * https://github.com/torvalds/linux/commit/bef3efbeb897b56867e271cdbc5f8adaacaeb9cd.
+         *
+         * The variable may also be overwritten between the stat and read. If we find out that the new
+         * contents are longer, try again.
+         */
+        for (unsigned try = 0;; try++) {
+                struct stat st;
+
+                if (fstat(fd, &st) < 0)
+                        return log_debug_errno(errno, "fstat(\"%s\") failed: %m", p);
+                if (st.st_size == 0)
                         return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
                                                "EFI variable %s is uncommitted", p);
+                if (st.st_size < 4)
+                        return log_debug_errno(SYNTHETIC_ERRNO(ENODATA), "EFI variable %s is shorter than 4 bytes, refusing.", p);
+                if (st.st_size > 4*1024*1024 + 4)
+                        return log_debug_errno(SYNTHETIC_ERRNO(E2BIG), "EFI variable %s is ridiculously large, refusing.", p);
+
+                if (!ret_attribute && !ret_value) {
+                        /* No need to read anything, return the reported size. */
+                        n = st.st_size;
+                        break;
+                }
 
-                if (n != sizeof(a))
-                        return log_debug_errno(SYNTHETIC_ERRNO(EIO),
-                                               "Read %zi bytes from EFI variable %s, expected %zu.",  n, p, sizeof(a));
-        }
-
-        if (ret_value) {
-                buf = malloc(st.st_size - 4 + 3);
-                if (!buf)
+                /* We want +1 for the read call, and +3 for the additional terminating bytes added below. */
+                char *t = realloc(buf, (size_t) st.st_size + MAX(1, 3));
+                if (!t)
                         return -ENOMEM;
+                buf = t;
+
+                const struct iovec iov[] = {
+                        { &attr, sizeof(attr) },
+                        { buf, (size_t) st.st_size + 1 },
+                };
+
+                n = readv(fd, iov, 2);
+                assert(n <= st.st_size + 1);
+                if (n == st.st_size + 1)
+                        /* We need to try again with a bigger buffer. */
+                        continue;
+                if (n >= 0)
+                        break;
+
+                log_debug_errno(errno, "Reading from \"%s\" failed: %m", p);
+                if (errno != EINTR)
+                        return -errno;
+                if (try >= EFI_N_RETRIES_TOTAL)
+                        return -EBUSY;
+                if (try >= EFI_N_RETRIES_NO_DELAY)
+                        (void) usleep_safe(EFI_RETRY_DELAY);
+        }
 
-                n = read(fd, buf, (size_t) st.st_size - 4);
-                if (n < 0)
-                        return log_debug_errno(errno, "Failed to read value of EFI variable %s: %m", p);
-                assert(n <= st.st_size - 4);
+        /* Unfortunately kernel reports EOF if there's an inconsistency between efivarfs var list and
+         * what's actually stored in firmware, c.f. #34304. A zero size env var is not allowed in EFI
+         * and hence the variable doesn't really exist in the backing store as long as it is zero
+         * sized, and the kernel calls this "uncommitted". Hence we translate EOF back to ENOENT
+         * here, as with kernel behavior before
+         * https://github.com/torvalds/linux/commit/3fab70c165795431f00ddf9be8b84ddd07bd1f8f.
+         *
+         * If the kernel changes behaviour (to flush dentries on resume), we can drop this at some
+         * point in the future. But note that the commit is 11 years old at this point so we'll need
+         * to deal with the current behaviour for a long time.
+         */
+        if (n == 0)
+                return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
+                                       "EFI variable %s is uncommitted", p);
+        if (n < 4)
+                return log_debug_errno(SYNTHETIC_ERRNO(EIO),
+                                       "Read %zi bytes from EFI variable %s, expected >= 4", n, p);
 
-                /* Always NUL-terminate (3 bytes, to properly protect UTF-16, even if truncated in the middle
-                 * of a character) */
-                ((char*) buf)[n] = 0;
-                ((char*) buf)[n + 1] = 0;
-                ((char*) buf)[n + 2] = 0;
-        } else
-                /* Assume that the reported size is accurate */
-                n = st.st_size - 4;
+        if (ret_attribute)
+                *ret_attribute = attr;
+        if (ret_value) {
+                assert(buf);
+                /* Always NUL-terminate (3 bytes, to properly protect UTF-16, even if truncated in
+                 * the middle of a character) */
+                buf[n - 4] = 0;
+                buf[n - 4 + 1] = 0;
+                buf[n - 4 + 2] = 0;
+                *ret_value = TAKE_PTR(buf);
+        }
 
         if (DEBUG_LOGGING) {
                 usec_t end = now(CLOCK_MONOTONIC);
@@ -146,14 +163,8 @@ int efi_get_variable(
         /* Note that efivarfs interestingly doesn't require ftruncate() to update an existing EFI variable
          * with a smaller value. */
 
-        if (ret_attribute)
-                *ret_attribute = a;
-
-        if (ret_value)
-                *ret_value = TAKE_PTR(buf);
-
         if (ret_size)
-                *ret_size = n;
+                *ret_size = n - 4;
 
         return 0;
 }