]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
boot/stub: honor PE SectionAlignment when loading inner kernel
authorJosh Hoffer <jhoffer@anduril.com>
Thu, 4 Jun 2026 00:19:33 +0000 (17:19 -0700)
committerLennart Poettering <lennart@poettering.net>
Fri, 19 Jun 2026 20:16:28 +0000 (22:16 +0200)
The stub copies the embedded kernel PE into an xmalloc_pages()
allocation with AllocateAnyPages, which only guarantees EFI_PAGE_SIZE
(4 KiB) alignment.

The arm64 kernel declares SectionAlignment = SZ_64K. When _text is not
64K-aligned the kernel's EFI stub reallocates and copies the image,
which can fail with EFI_OUT_OF_RESOURCES on memory-constrained firmware
(e.g. U-Boot, bounded by CONFIG_SYS_MALLOC_LEN rather than full DRAM).
The failure is non-deterministic since AllocateAnyPages returns whatever
page the allocator finds first.

Plumb SectionAlignment through pe_kernel_info() and allocate via
xmalloc_aligned_pages(). For alignment <= EFI_PAGE_SIZE (e.g. x86_64) it
reduces to a plain AllocatePages(), so the change is free there.

SectionAlignment comes from the PE header, which the spec requires to be
a power of 2. Sanitize it in pe_kernel_info() right where it is parsed:
a non-conforming value falls back to plain page alignment (matching the
behavior of older systemd-stub versions that ignored the field) rather
than propagate something that would break xmalloc_aligned_pages()'s
over-alignment maths, with a log_warning() so the fallback stays
diagnosable.

Co-developed-by: Claude <claude@anthropic.com>
Closes: #42443
Reported-By: Agathe Porte <agathe.porte@oss.qualcomm.com>
Tested-By: Agathe Porte <agathe.porte@oss.qualcomm.com>
src/boot/boot.c
src/boot/linux.c
src/boot/pe.c
src/boot/pe.h

index 3ba430a8442060b77f7bd8c240cd14ea32e33f03..dbe7f0c8ab60ce98aed66541e1afadcd68c81622 100644 (file)
@@ -3101,7 +3101,8 @@ static EFI_STATUS call_image_start(
                 uint32_t compat_address;
 
                 err = pe_kernel_info(loaded_image->ImageBase, /* ret_entry_point= */ NULL, &compat_address,
-                                     /* ret_size_in_memory= */ NULL);
+                                     /* ret_size_in_memory= */ NULL,
+                                     /* ret_section_alignment= */ NULL);
                 if (err != EFI_SUCCESS) {
                         if (err != EFI_UNSUPPORTED)
                                 return log_error_status(err, "Error finding kernel compat entry address: %m");
index b1f38e597d653f5a806230c1bc55509da593935c..bd71ada48358c84b806367a4e9de81240fde5e52 100644 (file)
@@ -160,14 +160,14 @@ EFI_STATUS linux_exec(
                 const struct iovec *initrd) {
 
         size_t kernel_size_in_memory = 0;
-        uint32_t compat_entry_point, entry_point;
+        uint32_t compat_entry_point, entry_point, section_alignment;
         EFI_STATUS err;
 
         assert(parent_image);
         assert(iovec_is_set(kernel));
         assert(iovec_is_valid(initrd));
 
-        err = pe_kernel_info(kernel->iov_base, &entry_point, &compat_entry_point, &kernel_size_in_memory);
+        err = pe_kernel_info(kernel->iov_base, &entry_point, &compat_entry_point, &kernel_size_in_memory, &section_alignment);
 #if defined(__i386__) || defined(__x86_64__)
         if (err == EFI_UNSUPPORTED)
                 /* Kernel is too old to support LINUX_INITRD_MEDIA_GUID, try the deprecated EFI handover
@@ -264,9 +264,17 @@ EFI_STATUS linux_exec(
         if (err != EFI_SUCCESS)
                 return log_error_status(err, "Cannot read sections: %m");
 
-        /* Do we need to ensure under 4gb address on x86? */
-        _cleanup_pages_ Pages loaded_kernel_pages = xmalloc_pages(
-                        AllocateAnyPages, EfiLoaderCode, EFI_SIZE_TO_PAGES(kernel_size_in_memory), 0);
+        /* Honor the PE SectionAlignment (SZ_64K on arm64): if _text is not aligned the kernel's EFI stub
+         * reallocates and copies the image, which can fail with EFI_OUT_OF_RESOURCES on memory-constrained
+         * firmware. When alignment <= EFI_PAGE_SIZE (e.g. x86_64) xmalloc_aligned_pages() reduces to a
+         * plain AllocatePages() with no extra over-allocation. pe_kernel_info() already sanitized a
+         * non-conforming SectionAlignment to plain page alignment. */
+        _cleanup_pages_ Pages loaded_kernel_pages = xmalloc_aligned_pages(
+                        AllocateAnyPages,
+                        EfiLoaderCode,
+                        EFI_SIZE_TO_PAGES(kernel_size_in_memory),
+                        section_alignment,
+                        /* addr= */ 0);
 
         uint8_t* loaded_kernel = PHYSICAL_ADDRESS_TO_POINTER(loaded_kernel_pages.addr);
         FOREACH_ARRAY(h, headers, n_headers) {
index 5fbf5a42e5386cd5cbaaa1189e7727a1fdcfcca8..e96600ab05d55a05f6b55a79fb9e91979b1685bc 100644 (file)
@@ -460,7 +460,12 @@ static uint32_t get_compatibility_entry_address(const DosFileHeader *dos, const
         return 0;
 }
 
-EFI_STATUS pe_kernel_info(const void *base, uint32_t *ret_entry_point, uint32_t *ret_compat_entry_point, size_t *ret_size_in_memory) {
+EFI_STATUS pe_kernel_info(
+                const void *base,
+                uint32_t *ret_entry_point,
+                uint32_t *ret_compat_entry_point,
+                size_t *ret_size_in_memory,
+                uint32_t *ret_section_alignment) {
         assert(base);
 
         const DosFileHeader *dos = (const DosFileHeader *) base;
@@ -475,6 +480,16 @@ EFI_STATUS pe_kernel_info(const void *base, uint32_t *ret_entry_point, uint32_t
          * of the SizeOfImage field in the PE header and return it */
         size_t size_in_memory = pe->OptionalHeader.SizeOfImage;
 
+        /* Honoring SectionAlignment lets callers place the image so the kernel's EFI stub need not relocate
+         * it (SZ_64K on arm64). The PE spec requires a power of 2; for a non-conforming value fall back to
+         * plain page alignment (what we assumed before honoring this field) rather than propagate something
+         * that would break the allocator's over-alignment math. */
+        uint32_t section_alignment = pe->OptionalHeader.SectionAlignment;
+        if (!ISPOWEROF2(section_alignment)) {
+                log_warning("PE SectionAlignment %" PRIu32 " is not a power of 2, falling back to page alignment.", section_alignment);
+                section_alignment = EFI_PAGE_SIZE;
+        }
+
         /* Support for LINUX_INITRD_MEDIA_GUID was added in kernel stub 1.0. */
         if (pe->OptionalHeader.MajorImageVersion < 1)
                 return EFI_UNSUPPORTED;
@@ -486,6 +501,8 @@ EFI_STATUS pe_kernel_info(const void *base, uint32_t *ret_entry_point, uint32_t
                         *ret_compat_entry_point = 0;
                 if (ret_size_in_memory)
                         *ret_size_in_memory = size_in_memory;
+                if (ret_section_alignment)
+                        *ret_section_alignment = section_alignment;
                 return EFI_SUCCESS;
         }
 
@@ -500,6 +517,8 @@ EFI_STATUS pe_kernel_info(const void *base, uint32_t *ret_entry_point, uint32_t
                 *ret_compat_entry_point = compat_entry_point;
         if (ret_size_in_memory)
                 *ret_size_in_memory = size_in_memory;
+        if (ret_section_alignment)
+                *ret_section_alignment = section_alignment;
 
         return EFI_SUCCESS;
 }
index 7387f3e2fdff85098596574c25e00722deb0abd6..5c8dc86fe938914d8da58ae4bfeb34953b0735e9 100644 (file)
@@ -57,7 +57,12 @@ EFI_STATUS pe_memory_locate_sections(
                 const char *const section_names[],
                 PeSectionVector sections[]);
 
-EFI_STATUS pe_kernel_info(const void *base, uint32_t *ret_entry_point, uint32_t *ret_compat_entry_point, size_t *ret_size_in_memory);
+EFI_STATUS pe_kernel_info(
+                const void *base,
+                uint32_t *ret_entry_point,
+                uint32_t *ret_compat_entry_point,
+                size_t *ret_size_in_memory,
+                uint32_t *ret_section_alignment);
 
 EFI_STATUS pe_kernel_check_no_relocation(const void *base);