]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
s390/boot: rework decompressor reserved tracking
authorVasily Gorbik <gor@linux.ibm.com>
Thu, 2 Feb 2023 12:59:36 +0000 (13:59 +0100)
committerHeiko Carstens <hca@linux.ibm.com>
Mon, 20 Mar 2023 10:02:50 +0000 (11:02 +0100)
Currently several approaches for finding unused memory in decompressor
are utilized. While "safe_addr" grows towards higher addresses, vmem
code allocates paging structures top down. The former requires careful
ordering. In addition to that ipl report handling code verifies potential
intersections with secure boot certificates on its own. Neither of two
approaches are memory holes aware and consistent with each other in low
memory conditions.

To solve that, existing approaches are generalized and combined
together, as well as online memory ranges are now taken into
consideration.

physmem_info has been extended to contain reserved memory ranges. New
set of functions allow to handle reserves and find unused memory.
All reserves and memory allocations are "typed". In case of out of
memory condition decompressor fails with detailed info on current
reserved ranges and usable online memory.

Linux version 6.2.0 ...
Kernel command line: ... mem=100M
Our of memory allocating 100000 bytes 100000 aligned in range 0:5800000
Reserved memory ranges:
0000000000000000 0000000003e33000 DECOMPRESSOR
0000000003f00000 00000000057648a3 INITRD
00000000063e0000 00000000063e8000 VMEM
00000000063eb000 00000000063f4000 VMEM
00000000063f7800 0000000006400000 VMEM
0000000005800000 0000000006300000 KASAN
Usable online memory ranges (info source: sclp read info [3]):
0000000000000000 0000000006400000
Usable online memory total: 6400000 Reserved: 61b10a3 Free: 24ef5d
Call Trace:
(sp:000000000002bd58 [<0000000000012a70>] physmem_alloc_top_down+0x60/0x14c)
 sp:000000000002bdc8 [<0000000000013756>] _pa+0x56/0x6a
 sp:000000000002bdf0 [<0000000000013bcc>] pgtable_populate+0x45c/0x65e
 sp:000000000002be90 [<00000000000140aa>] setup_vmem+0x2da/0x424
 sp:000000000002bec8 [<0000000000011c20>] startup_kernel+0x428/0x8b4
 sp:000000000002bf60 [<00000000000100f4>] startup_normal+0xd4/0xd4

physmem_alloc_range allows to find free memory in specified range. It
should be used for one time allocations only like finding position for
amode31 and vmlinux.
physmem_alloc_top_down can be used just like physmem_alloc_range, but
it also allows multiple allocations per type and tries to merge sequential
allocations together. Which is useful for paging structures allocations.
If sequential allocations cannot be merged together they are "chained",
allowing easy per type reserved ranges enumeration and migration to
memblock later. Extra "struct reserved_range" allocated for chaining are
not tracked or reserved but rely on the fact that both
physmem_alloc_range and physmem_alloc_top_down search for free memory
only below current top down allocator position. All reserved ranges
should be transferred to memblock before memblock allocations are
enabled.

The startup code has been reordered to delay any memory allocations until
online memory ranges are detected and occupied memory ranges are marked as
reserved to be excluded from follow-up allocations.
Ipl report certificates are a special case, ipl report certificates list
is checked together with other memory reserves until certificates are
saved elsewhere.
KASAN required memory for shadow memory allocation and mapping is reserved
as 1 large chunk which is later passed to KASAN early initialization code.

Acked-by: Heiko Carstens <hca@linux.ibm.com>
Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
12 files changed:
arch/s390/boot/boot.h
arch/s390/boot/ipl_report.c
arch/s390/boot/kaslr.c
arch/s390/boot/pgm_check_info.c
arch/s390/boot/physmem_info.c
arch/s390/boot/startup.c
arch/s390/boot/vmem.c
arch/s390/boot/vmlinux.lds.S
arch/s390/include/asm/physmem_info.h
arch/s390/include/asm/setup.h
arch/s390/kernel/setup.c
arch/s390/mm/kasan_init.c

index d39895d5796ed273856e621486b0d240ebcb57ed..2b404810641896b3c5b3aad582d7e859254d7709 100644 (file)
@@ -8,6 +8,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/physmem_info.h>
+
 struct machine_info {
        unsigned char has_edat1 : 1;
        unsigned char has_edat2 : 1;
@@ -33,21 +35,34 @@ struct vmlinux_info {
 };
 
 void startup_kernel(void);
-unsigned long detect_memory(unsigned long *safe_addr);
+unsigned long detect_max_physmem_end(void);
+void detect_physmem_online_ranges(unsigned long max_physmem_end);
 void physmem_set_usable_limit(unsigned long limit);
+void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size);
+void physmem_free(enum reserved_range_type type);
+/* for continuous/multiple allocations per type */
+unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
+                                    unsigned long align);
+/* for single allocations, 1 per type */
+unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size,
+                                 unsigned long align, unsigned long min, unsigned long max,
+                                 bool die_on_oom);
+bool ipl_report_certs_intersects(unsigned long addr, unsigned long size,
+                                unsigned long *intersection_start);
 bool is_ipl_block_dump(void);
 void store_ipl_parmblock(void);
-unsigned long read_ipl_report(unsigned long safe_addr);
+int read_ipl_report(void);
+void save_ipl_cert_comp_list(void);
 void setup_boot_command_line(void);
 void parse_boot_command_line(void);
 void verify_facilities(void);
 void print_missing_facilities(void);
 void sclp_early_setup_buffer(void);
 void print_pgm_check_info(void);
-unsigned long get_random_base(unsigned long safe_addr);
+unsigned long get_random_base(void);
 void setup_vmem(unsigned long asce_limit);
-unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total);
 void __printf(1, 2) decompressor_printk(const char *fmt, ...);
+void print_stacktrace(unsigned long sp);
 void error(char *m);
 
 extern struct machine_info machine;
@@ -62,7 +77,7 @@ extern char __boot_data_start[], __boot_data_end[];
 extern char __boot_data_preserved_start[], __boot_data_preserved_end[];
 extern char _decompressor_syms_start[], _decompressor_syms_end[];
 extern char _stack_start[], _stack_end[];
-extern char _end[];
+extern char _end[], _decompressor_end[];
 extern unsigned char _compressed_start[];
 extern unsigned char _compressed_end[];
 extern struct vmlinux_info _vmlinux_info;
@@ -70,5 +85,10 @@ extern struct vmlinux_info _vmlinux_info;
 
 #define __abs_lowcore_pa(x)    (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore))
 
+static inline bool intersects(unsigned long addr0, unsigned long size0,
+                             unsigned long addr1, unsigned long size1)
+{
+       return addr0 + size0 > addr1 && addr1 + size1 > addr0;
+}
 #endif /* __ASSEMBLY__ */
 #endif /* BOOT_BOOT_H */
index 74b5cd264862247fc040cabfe24c6a289d3d08a3..1803035e68d2ca53e2d61e32c8cd229ad5dae548 100644 (file)
@@ -5,6 +5,7 @@
 #include <asm/sclp.h>
 #include <asm/sections.h>
 #include <asm/boot_data.h>
+#include <asm/physmem_info.h>
 #include <uapi/asm/ipl.h>
 #include "boot.h"
 
@@ -16,20 +17,16 @@ unsigned long __bootdata_preserved(ipl_cert_list_size);
 unsigned long __bootdata(early_ipl_comp_list_addr);
 unsigned long __bootdata(early_ipl_comp_list_size);
 
+static struct ipl_rb_certificates *certs;
+static struct ipl_rb_components *comps;
+static bool ipl_report_needs_saving;
+
 #define for_each_rb_entry(entry, rb) \
        for (entry = rb->entries; \
             (void *) entry + sizeof(*entry) <= (void *) rb + rb->len; \
             entry++)
 
-static inline bool intersects(unsigned long addr0, unsigned long size0,
-                             unsigned long addr1, unsigned long size1)
-{
-       return addr0 + size0 > addr1 && addr1 + size1 > addr0;
-}
-
-static unsigned long find_bootdata_space(struct ipl_rb_components *comps,
-                                        struct ipl_rb_certificates *certs,
-                                        unsigned long safe_addr)
+static unsigned long get_cert_comp_list_size(void)
 {
        struct ipl_rb_certificate_entry *cert;
        struct ipl_rb_component_entry *comp;
@@ -44,44 +41,27 @@ static unsigned long find_bootdata_space(struct ipl_rb_components *comps,
        ipl_cert_list_size = 0;
        for_each_rb_entry(cert, certs)
                ipl_cert_list_size += sizeof(unsigned int) + cert->len;
-       size = ipl_cert_list_size + early_ipl_comp_list_size;
+       return ipl_cert_list_size + early_ipl_comp_list_size;
+}
 
-       /*
-        * Start from safe_addr to find a free memory area large
-        * enough for the IPL report boot data. This area is used
-        * for ipl_cert_list_addr/ipl_cert_list_size and
-        * early_ipl_comp_list_addr/early_ipl_comp_list_size. It must
-        * not overlap with any component or any certificate.
-        */
-repeat:
-       if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size &&
-           intersects(initrd_data.start, initrd_data.size, safe_addr, size))
-               safe_addr = initrd_data.start + initrd_data.size;
-       if (intersects(safe_addr, size, (unsigned long)comps, comps->len)) {
-               safe_addr = (unsigned long)comps + comps->len;
-               goto repeat;
-       }
-       for_each_rb_entry(comp, comps)
-               if (intersects(safe_addr, size, comp->addr, comp->len)) {
-                       safe_addr = comp->addr + comp->len;
-                       goto repeat;
+bool ipl_report_certs_intersects(unsigned long addr, unsigned long size,
+                                unsigned long *intersection_start)
+{
+       struct ipl_rb_certificate_entry *cert;
+
+       if (!ipl_report_needs_saving)
+               return false;
+
+       for_each_rb_entry(cert, certs) {
+               if (intersects(addr, size, cert->addr, cert->len)) {
+                       *intersection_start = cert->addr;
+                       return true;
                }
-       if (intersects(safe_addr, size, (unsigned long)certs, certs->len)) {
-               safe_addr = (unsigned long)certs + certs->len;
-               goto repeat;
        }
-       for_each_rb_entry(cert, certs)
-               if (intersects(safe_addr, size, cert->addr, cert->len)) {
-                       safe_addr = cert->addr + cert->len;
-                       goto repeat;
-               }
-       early_ipl_comp_list_addr = safe_addr;
-       ipl_cert_list_addr = safe_addr + early_ipl_comp_list_size;
-
-       return safe_addr + size;
+       return false;
 }
 
-static void copy_components_bootdata(struct ipl_rb_components *comps)
+static void copy_components_bootdata(void)
 {
        struct ipl_rb_component_entry *comp, *ptr;
 
@@ -90,7 +70,7 @@ static void copy_components_bootdata(struct ipl_rb_components *comps)
                memcpy(ptr++, comp, sizeof(*ptr));
 }
 
-static void copy_certificates_bootdata(struct ipl_rb_certificates *certs)
+static void copy_certificates_bootdata(void)
 {
        struct ipl_rb_certificate_entry *cert;
        void *ptr;
@@ -104,10 +84,8 @@ static void copy_certificates_bootdata(struct ipl_rb_certificates *certs)
        }
 }
 
-unsigned long read_ipl_report(unsigned long safe_addr)
+int read_ipl_report(void)
 {
-       struct ipl_rb_certificates *certs;
-       struct ipl_rb_components *comps;
        struct ipl_pl_hdr *pl_hdr;
        struct ipl_rl_hdr *rl_hdr;
        struct ipl_rb_hdr *rb_hdr;
@@ -120,7 +98,7 @@ unsigned long read_ipl_report(unsigned long safe_addr)
         */
        if (!ipl_block_valid ||
            !(ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR))
-               return safe_addr;
+               return -1;
        ipl_secure_flag = !!(ipl_block.hdr.flags & IPL_PL_FLAG_SIPL);
        /*
         * There is an IPL report, to find it load the pointer to the
@@ -158,16 +136,30 @@ unsigned long read_ipl_report(unsigned long safe_addr)
         * With either the component list or the certificate list
         * missing the kernel will stay ignorant of secure IPL.
         */
-       if (!comps || !certs)
-               return safe_addr;
+       if (!comps || !certs) {
+               certs = NULL;
+               return -1;
+       }
 
-       /*
-        * Copy component and certificate list to a safe area
-        * where the decompressed kernel can find them.
-        */
-       safe_addr = find_bootdata_space(comps, certs, safe_addr);
-       copy_components_bootdata(comps);
-       copy_certificates_bootdata(certs);
+       ipl_report_needs_saving = true;
+       physmem_reserve(RR_IPLREPORT, (unsigned long)pl_hdr,
+                       (unsigned long)rl_end - (unsigned long)pl_hdr);
+       return 0;
+}
+
+void save_ipl_cert_comp_list(void)
+{
+       unsigned long size;
+
+       if (!ipl_report_needs_saving)
+               return;
+
+       size = get_cert_comp_list_size();
+       early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int));
+       ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size;
 
-       return safe_addr;
+       copy_components_bootdata();
+       copy_certificates_bootdata();
+       physmem_free(RR_IPLREPORT);
+       ipl_report_needs_saving = false;
 }
index 22b7c5d8e94a6641f9c75981d967582f190d6221..71f75f03f800d5ae4d1b3f6aaed13ad2c9e3edd6 100644 (file)
@@ -91,113 +91,16 @@ static int get_random(unsigned long limit, unsigned long *value)
        return 0;
 }
 
-/*
- * To randomize kernel base address we have to consider several facts:
- * 1. physical online memory might not be continuous and have holes. physmem
- *    info contains list of online memory ranges we should consider.
- * 2. we have several memory regions which are occupied and we should not
- *    overlap and destroy them. Currently safe_addr tells us the border below
- *    which all those occupied regions are. We are safe to use anything above
- *    safe_addr.
- * 3. the upper limit might apply as well, even if memory above that limit is
- *    online. Currently those limitations are:
- *    3.1. Limit set by "mem=" kernel command line option
- *    3.2. memory reserved at the end for kasan initialization.
- * 4. kernel base address must be aligned to THREAD_SIZE (kernel stack size).
- *    Which is required for CONFIG_CHECK_STACK. Currently THREAD_SIZE is 4 pages
- *    (16 pages when the kernel is built with kasan enabled)
- * Assumptions:
- * 1. kernel size (including .bss size) and upper memory limit are page aligned.
- * 2. physmem online region start is THREAD_SIZE aligned / end is PAGE_SIZE
- *    aligned (in practice memory configurations granularity on z/VM and LPAR
- *    is 1mb).
- *
- * To guarantee uniform distribution of kernel base address among all suitable
- * addresses we generate random value just once. For that we need to build a
- * continuous range in which every value would be suitable. We can build this
- * range by simply counting all suitable addresses (let's call them positions)
- * which would be valid as kernel base address. To count positions we iterate
- * over online memory ranges. For each range which is big enough for the
- * kernel image we count all suitable addresses we can put the kernel image at
- * that is
- * (end - start - kernel_size) / THREAD_SIZE + 1
- * Two functions count_valid_kernel_positions and position_to_address help
- * to count positions in memory range given and then convert position back
- * to address.
- */
-static unsigned long count_valid_kernel_positions(unsigned long kernel_size,
-                                                 unsigned long _min,
-                                                 unsigned long _max)
-{
-       unsigned long start, end, pos = 0;
-       int i;
-
-       for_each_physmem_usable_range(i, &start, &end) {
-               if (_min >= end)
-                       continue;
-               if (start >= _max)
-                       break;
-               start = max(_min, start);
-               end = min(_max, end);
-               if (end - start < kernel_size)
-                       continue;
-               pos += (end - start - kernel_size) / THREAD_SIZE + 1;
-       }
-
-       return pos;
-}
-
-static unsigned long position_to_address(unsigned long pos, unsigned long kernel_size,
-                                unsigned long _min, unsigned long _max)
-{
-       unsigned long start, end;
-       int i;
-
-       for_each_physmem_usable_range(i, &start, &end) {
-               if (_min >= end)
-                       continue;
-               if (start >= _max)
-                       break;
-               start = max(_min, start);
-               end = min(_max, end);
-               if (end - start < kernel_size)
-                       continue;
-               if ((end - start - kernel_size) / THREAD_SIZE + 1 >= pos)
-                       return start + (pos - 1) * THREAD_SIZE;
-               pos -= (end - start - kernel_size) / THREAD_SIZE + 1;
-       }
-
-       return 0;
-}
-
-unsigned long get_random_base(unsigned long safe_addr)
+unsigned long get_random_base(void)
 {
-       unsigned long usable_total = get_physmem_usable_total();
-       unsigned long memory_limit = get_physmem_usable_end();
-       unsigned long base_pos, max_pos, kernel_size;
-       int i;
+       unsigned long vmlinux_size = vmlinux.image_size + vmlinux.bss_size;
+       unsigned long minimal_pos = vmlinux.default_lma + vmlinux_size;
+       unsigned long random;
 
-       /*
-        * Avoid putting kernel in the end of physical memory
-        * which vmem and kasan code will use for shadow memory and
-        * pgtable mapping allocations.
-        */
-       memory_limit -= kasan_estimate_memory_needs(usable_total);
-       memory_limit -= vmem_estimate_memory_needs(usable_total);
-
-       safe_addr = ALIGN(safe_addr, THREAD_SIZE);
-       kernel_size = vmlinux.image_size + vmlinux.bss_size;
-       if (safe_addr + kernel_size > memory_limit)
+       /* [vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size : physmem_info.usable] */
+       if (get_random(physmem_info.usable - minimal_pos, &random))
                return 0;
 
-       max_pos = count_valid_kernel_positions(kernel_size, safe_addr, memory_limit);
-       if (!max_pos) {
-               sclp_early_printk("KASLR disabled: not enough memory\n");
-               return 0;
-       }
-
-       /* we need a value in the range [1, base_pos] inclusive */
-       if (get_random(max_pos, &base_pos))
-               return 0;
-       return position_to_address(base_pos + 1, kernel_size, safe_addr, memory_limit);
+       return physmem_alloc_range(RR_VMLINUX, vmlinux_size, THREAD_SIZE,
+                                  vmlinux.default_lma, minimal_pos + random, false);
 }
index c2a1defc79daf206c0a7c5e8d96abf9d957c269f..0861e3c403f814f302601fedff02950a1feca418 100644 (file)
@@ -123,11 +123,10 @@ out:
        sclp_early_printk(buf);
 }
 
-static noinline void print_stacktrace(void)
+void print_stacktrace(unsigned long sp)
 {
        struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start,
                                         (unsigned long)_stack_end };
-       unsigned long sp = S390_lowcore.gpregs_save_area[15];
        bool first = true;
 
        decompressor_printk("Call Trace:\n");
@@ -173,7 +172,7 @@ void print_pgm_check_info(void)
                            gpregs[8], gpregs[9], gpregs[10], gpregs[11]);
        decompressor_printk("      %016lx %016lx %016lx %016lx\n",
                            gpregs[12], gpregs[13], gpregs[14], gpregs[15]);
-       print_stacktrace();
+       print_stacktrace(S390_lowcore.gpregs_save_area[15]);
        decompressor_printk("Last Breaking-Event-Address:\n");
        decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.pgm_last_break,
                            (void *)S390_lowcore.pgm_last_break);
index dc2e4d0abfab04730a8a9a0982f9f2eae5ea841f..4ee9b73811420d069684008de126c7d8f69fcbf0 100644 (file)
@@ -1,16 +1,21 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/processor.h>
 #include <linux/errno.h>
 #include <linux/init.h>
-#include <asm/setup.h>
-#include <asm/processor.h>
-#include <asm/sclp.h>
-#include <asm/sections.h>
 #include <asm/physmem_info.h>
+#include <asm/stacktrace.h>
+#include <asm/boot_data.h>
 #include <asm/sparsemem.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/sclp.h>
+#include <asm/uv.h>
 #include "decompressor.h"
 #include "boot.h"
 
 struct physmem_info __bootdata(physmem_info);
+static unsigned int physmem_alloc_ranges;
+static unsigned long physmem_alloc_pos;
 
 /* up to 256 storage elements, 1020 subincrements each */
 #define ENTRIES_EXTENDED_MAX                                                  \
@@ -20,6 +25,11 @@ static struct physmem_range *__get_physmem_range_ptr(u32 n)
 {
        if (n < MEM_INLINED_ENTRIES)
                return &physmem_info.online[n];
+       if (unlikely(!physmem_info.online_extended)) {
+               physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range(
+                       RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0,
+                       physmem_alloc_pos, true);
+       }
        return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES];
 }
 
@@ -143,49 +153,171 @@ static unsigned long search_mem_end(void)
        return (offset + 1) << 20;
 }
 
-unsigned long detect_memory(unsigned long *safe_addr)
+unsigned long detect_max_physmem_end(void)
 {
        unsigned long max_physmem_end = 0;
 
-       sclp_early_get_memsize(&max_physmem_end);
-       physmem_info.online_extended = (struct physmem_range *)ALIGN(*safe_addr, sizeof(u64));
+       if (!sclp_early_get_memsize(&max_physmem_end)) {
+               physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO;
+       } else {
+               max_physmem_end = search_mem_end();
+               physmem_info.info_source = MEM_DETECT_BIN_SEARCH;
+       }
+       return max_physmem_end;
+}
 
+void detect_physmem_online_ranges(unsigned long max_physmem_end)
+{
        if (!sclp_early_read_storage_info()) {
                physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO;
        } else if (!diag260()) {
                physmem_info.info_source = MEM_DETECT_DIAG260;
-               max_physmem_end = max_physmem_end ?: get_physmem_usable_end();
        } else if (max_physmem_end) {
                add_physmem_online_range(0, max_physmem_end);
-               physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO;
-       } else {
-               max_physmem_end = search_mem_end();
-               add_physmem_online_range(0, max_physmem_end);
-               physmem_info.info_source = MEM_DETECT_BIN_SEARCH;
        }
+}
 
-       if (physmem_info.range_count > MEM_INLINED_ENTRIES) {
-               *safe_addr += (physmem_info.range_count - MEM_INLINED_ENTRIES) *
-                             sizeof(struct physmem_range);
+void physmem_set_usable_limit(unsigned long limit)
+{
+       physmem_info.usable = limit;
+       physmem_alloc_pos = limit;
+}
+
+static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max)
+{
+       unsigned long start, end, total_mem = 0, total_reserved_mem = 0;
+       struct reserved_range *range;
+       enum reserved_range_type t;
+       int i;
+
+       decompressor_printk("Linux version %s\n", kernel_version);
+       if (!is_prot_virt_guest() && early_command_line[0])
+               decompressor_printk("Kernel command line: %s\n", early_command_line);
+       decompressor_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n",
+                           size, align, min, max);
+       decompressor_printk("Reserved memory ranges:\n");
+       for_each_physmem_reserved_range(t, range, &start, &end) {
+               decompressor_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t));
+               total_reserved_mem += end - start;
+       }
+       decompressor_printk("Usable online memory ranges (info source: %s [%x]):\n",
+                           get_physmem_info_source(), physmem_info.info_source);
+       for_each_physmem_usable_range(i, &start, &end) {
+               decompressor_printk("%016lx %016lx\n", start, end);
+               total_mem += end - start;
        }
+       decompressor_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n",
+                           total_mem, total_reserved_mem,
+                           total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0);
+       print_stacktrace(current_frame_address());
+       sclp_early_printk("\n\n -- System halted\n");
+       disabled_wait();
+}
 
-       return max_physmem_end;
+void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
+{
+       physmem_info.reserved[type].start = addr;
+       physmem_info.reserved[type].end = addr + size;
 }
 
-void physmem_set_usable_limit(unsigned long limit)
+void physmem_free(enum reserved_range_type type)
 {
-       struct physmem_range *range;
-       int i;
+       physmem_info.reserved[type].start = 0;
+       physmem_info.reserved[type].end = 0;
+}
 
-       /* make sure mem_detect.usable ends up within online memory block */
-       for (i = 0; i < physmem_info.range_count; i++) {
-               range = __get_physmem_range_ptr(i);
-               if (range->start >= limit)
-                       break;
-               if (range->end >= limit) {
-                       physmem_info.usable = limit;
+static bool __physmem_alloc_intersects(unsigned long addr, unsigned long size,
+                                      unsigned long *intersection_start)
+{
+       unsigned long res_addr, res_size;
+       int t;
+
+       for (t = 0; t < RR_MAX; t++) {
+               if (!get_physmem_reserved(t, &res_addr, &res_size))
+                       continue;
+               if (intersects(addr, size, res_addr, res_size)) {
+                       *intersection_start = res_addr;
+                       return true;
+               }
+       }
+       return ipl_report_certs_intersects(addr, size, intersection_start);
+}
+
+static unsigned long __physmem_alloc_range(unsigned long size, unsigned long align,
+                                          unsigned long min, unsigned long max,
+                                          unsigned int from_ranges, unsigned int *ranges_left,
+                                          bool die_on_oom)
+{
+       unsigned int nranges = from_ranges ?: physmem_info.range_count;
+       unsigned long range_start, range_end;
+       unsigned long intersection_start;
+       unsigned long addr, pos = max;
+
+       align = max(align, 8UL);
+       while (nranges) {
+               __get_physmem_range(nranges - 1, &range_start, &range_end, false);
+               pos = min(range_end, pos);
+
+               if (round_up(min, align) + size > pos)
                        break;
+               addr = round_down(pos - size, align);
+               if (range_start > addr) {
+                       nranges--;
+                       continue;
+               }
+               if (__physmem_alloc_intersects(addr, size, &intersection_start)) {
+                       pos = intersection_start;
+                       continue;
+               }
+
+               if (ranges_left)
+                       *ranges_left = nranges;
+               return addr;
+       }
+       if (die_on_oom)
+               die_oom(size, align, min, max);
+       return 0;
+}
+
+unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size,
+                                 unsigned long align, unsigned long min, unsigned long max,
+                                 bool die_on_oom)
+{
+       unsigned long addr;
+
+       max = min(max, physmem_alloc_pos);
+       addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom);
+       if (addr)
+               physmem_reserve(type, addr, size);
+       return addr;
+}
+
+unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
+                                    unsigned long align)
+{
+       struct reserved_range *range = &physmem_info.reserved[type];
+       struct reserved_range *new_range;
+       unsigned int ranges_left;
+       unsigned long addr;
+
+       addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges,
+                                    &ranges_left, true);
+       /* if not a consecutive allocation of the same type or first allocation */
+       if (range->start != addr + size) {
+               if (range->end) {
+                       physmem_alloc_pos = __physmem_alloc_range(
+                               sizeof(struct reserved_range), 0, 0, physmem_alloc_pos,
+                               physmem_alloc_ranges, &ranges_left, true);
+                       new_range = (struct reserved_range *)physmem_alloc_pos;
+                       *new_range = *range;
+                       range->chain = new_range;
+                       addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos,
+                                                    ranges_left, &ranges_left, true);
                }
-               physmem_info.usable = range->end;
+               range->end = addr + size;
        }
+       range->start = addr;
+       physmem_alloc_pos = addr;
+       physmem_alloc_ranges = ranges_left;
+       return addr;
 }
index 50475bf25ecda0816af0360bdbcd09627384766a..bc07e24329b9517fdb58d81ab6b7b2affb4d174f 100644 (file)
@@ -21,7 +21,6 @@ unsigned long __bootdata_preserved(__kaslr_offset);
 unsigned long __bootdata_preserved(__abs_lowcore);
 unsigned long __bootdata_preserved(__memcpy_real_area);
 pte_t *__bootdata_preserved(memcpy_real_ptep);
-unsigned long __bootdata(__amode31_base);
 unsigned long __bootdata_preserved(VMALLOC_START);
 unsigned long __bootdata_preserved(VMALLOC_END);
 struct page *__bootdata_preserved(vmemmap);
@@ -29,7 +28,6 @@ unsigned long __bootdata_preserved(vmemmap_size);
 unsigned long __bootdata_preserved(MODULES_VADDR);
 unsigned long __bootdata_preserved(MODULES_END);
 unsigned long __bootdata(ident_map_size);
-struct initrd_data __bootdata(initrd_data);
 
 u64 __bootdata_preserved(stfle_fac_list[16]);
 u64 __bootdata_preserved(alt_stfle_fac_list[16]);
@@ -75,17 +73,20 @@ unsigned long mem_safe_offset(void)
 }
 #endif
 
-static unsigned long rescue_initrd(unsigned long safe_addr)
+static void rescue_initrd(unsigned long min, unsigned long max)
 {
+       unsigned long old_addr, addr, size;
+
        if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD))
-               return safe_addr;
-       if (!initrd_data.start || !initrd_data.size)
-               return safe_addr;
-       if (initrd_data.start < safe_addr) {
-               memmove((void *)safe_addr, (void *)initrd_data.start, initrd_data.size);
-               initrd_data.start = safe_addr;
-       }
-       return initrd_data.start + initrd_data.size;
+               return;
+       if (!get_physmem_reserved(RR_INITRD, &addr, &size))
+               return;
+       if (addr >= min && addr + size <= max)
+               return;
+       old_addr = addr;
+       physmem_free(RR_INITRD);
+       addr = physmem_alloc_top_down(RR_INITRD, size, 0);
+       memmove((void *)addr, (void *)old_addr, size);
 }
 
 static void copy_bootdata(void)
@@ -267,46 +268,52 @@ static void offset_vmlinux_info(unsigned long offset)
        vmlinux.invalid_pg_dir_off += offset;
 }
 
-static unsigned long reserve_amode31(unsigned long safe_addr)
-{
-       __amode31_base = PAGE_ALIGN(safe_addr);
-       return __amode31_base + vmlinux.amode31_size;
-}
-
 void startup_kernel(void)
 {
        unsigned long max_physmem_end;
        unsigned long random_lma;
-       unsigned long safe_addr;
        unsigned long asce_limit;
+       unsigned long safe_addr;
        void *img;
        psw_t psw;
 
-       initrd_data.start = parmarea.initrd_start;
-       initrd_data.size = parmarea.initrd_size;
+       setup_lpp();
+       safe_addr = mem_safe_offset();
+       /*
+        * reserve decompressor memory together with decompression heap, buffer and
+        * memory which might be occupied by uncompressed kernel at default 1Mb
+        * position (if KASLR is off or failed).
+        */
+       physmem_reserve(RR_DECOMPRESSOR, 0, safe_addr);
+       if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && parmarea.initrd_size)
+               physmem_reserve(RR_INITRD, parmarea.initrd_start, parmarea.initrd_size);
        oldmem_data.start = parmarea.oldmem_base;
        oldmem_data.size = parmarea.oldmem_size;
 
-       setup_lpp();
        store_ipl_parmblock();
-       safe_addr = mem_safe_offset();
-       safe_addr = reserve_amode31(safe_addr);
-       safe_addr = read_ipl_report(safe_addr);
+       read_ipl_report();
        uv_query_info();
-       safe_addr = rescue_initrd(safe_addr);
        sclp_early_read_info();
        setup_boot_command_line();
        parse_boot_command_line();
        detect_facilities();
        sanitize_prot_virt_host();
-       max_physmem_end = detect_memory(&safe_addr);
+       max_physmem_end = detect_max_physmem_end();
        setup_ident_map_size(max_physmem_end);
        setup_vmalloc_size();
        asce_limit = setup_kernel_memory_layout();
+       /* got final ident_map_size, physmem allocations could be performed now */
        physmem_set_usable_limit(ident_map_size);
+       detect_physmem_online_ranges(max_physmem_end);
+       save_ipl_cert_comp_list();
+       rescue_initrd(safe_addr, ident_map_size);
+#ifdef CONFIG_KASAN
+       physmem_alloc_top_down(RR_KASAN, kasan_estimate_memory_needs(get_physmem_usable_total()),
+                              _SEGMENT_SIZE);
+#endif
 
        if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) {
-               random_lma = get_random_base(safe_addr);
+               random_lma = get_random_base();
                if (random_lma) {
                        __kaslr_offset = random_lma - vmlinux.default_lma;
                        img = (void *)vmlinux.default_lma;
@@ -317,8 +324,16 @@ void startup_kernel(void)
        if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) {
                img = decompress_kernel();
                memmove((void *)vmlinux.default_lma, img, vmlinux.image_size);
-       } else if (__kaslr_offset)
+       } else if (__kaslr_offset) {
                memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size);
+               memset(img, 0, vmlinux.image_size);
+       }
+
+       /* vmlinux decompression is done, shrink reserved low memory */
+       physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end);
+       if (!__kaslr_offset)
+               physmem_reserve(RR_VMLINUX, vmlinux.default_lma, vmlinux.image_size + vmlinux.bss_size);
+       physmem_alloc_range(RR_AMODE31, vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G, true);
 
        /*
         * The order of the following operations is important:
@@ -338,16 +353,11 @@ void startup_kernel(void)
        setup_vmem(asce_limit);
        copy_bootdata();
 
-       if (__kaslr_offset) {
-               /*
-                * Save KASLR offset for early dumps, before vmcore_info is set.
-                * Mark as uneven to distinguish from real vmcore_info pointer.
-                */
-               S390_lowcore.vmcore_info = __kaslr_offset | 0x1UL;
-               /* Clear non-relocated kernel */
-               if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED))
-                       memset(img, 0, vmlinux.image_size);
-       }
+       /*
+        * Save KASLR offset for early dumps, before vmcore_info is set.
+        * Mark as uneven to distinguish from real vmcore_info pointer.
+        */
+       S390_lowcore.vmcore_info = __kaslr_offset ? __kaslr_offset | 0x1UL : 0;
 
        /*
         * Jump to the decompressed kernel entry point and switch DAT mode on.
index b89a6893f398b059d54ffa8613a96e62ca82d1da..8f16e6f9fb20fb0cdde418a109c1d45bc672ee9e 100644 (file)
@@ -10,6 +10,8 @@
 #include "decompressor.h"
 #include "boot.h"
 
+unsigned long __bootdata_preserved(s390_invalid_asce);
+
 #define init_mm                        (*(struct mm_struct *)vmlinux.init_mm_off)
 #define swapper_pg_dir         vmlinux.swapper_pg_dir_off
 #define invalid_pg_dir         vmlinux.invalid_pg_dir_off
@@ -22,77 +24,27 @@ static inline pte_t *__virt_to_kpte(unsigned long va)
        return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va);
 }
 
-unsigned long __bootdata_preserved(s390_invalid_asce);
-unsigned long __bootdata(pgalloc_pos);
-unsigned long __bootdata(pgalloc_end);
-unsigned long __bootdata(pgalloc_low);
-
 enum populate_mode {
        POPULATE_NONE,
        POPULATE_ONE2ONE,
        POPULATE_ABS_LOWCORE,
 };
 
-static void boot_check_oom(void)
-{
-       if (pgalloc_pos < pgalloc_low)
-               error("out of memory on boot\n");
-}
-
-static void pgtable_populate_init(void)
-{
-       unsigned long initrd_end;
-       unsigned long kernel_end;
-
-       kernel_end = vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size;
-       pgalloc_low = round_up(kernel_end, PAGE_SIZE);
-       if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) {
-               initrd_end =  round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE);
-               pgalloc_low = max(pgalloc_low, initrd_end);
-       }
-
-       pgalloc_end = round_down(get_physmem_usable_end(), PAGE_SIZE);
-       pgalloc_pos = pgalloc_end;
-
-       boot_check_oom();
-}
-
-static void *boot_alloc_pages(unsigned int order)
-{
-       unsigned long size = PAGE_SIZE << order;
-
-       pgalloc_pos -= size;
-       pgalloc_pos = round_down(pgalloc_pos, size);
-
-       boot_check_oom();
-
-       return (void *)pgalloc_pos;
-}
-
 static void *boot_crst_alloc(unsigned long val)
 {
+       unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER;
        unsigned long *table;
 
-       table = boot_alloc_pages(CRST_ALLOC_ORDER);
-       if (table)
-               crst_table_init(table, val);
+       table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size);
+       crst_table_init(table, val);
        return table;
 }
 
 static pte_t *boot_pte_alloc(void)
 {
-       static void *pte_leftover;
        pte_t *pte;
 
-       BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE);
-
-       if (!pte_leftover) {
-               pte_leftover = boot_alloc_pages(0);
-               pte = pte_leftover + _PAGE_TABLE_SIZE;
-       } else {
-               pte = pte_leftover;
-               pte_leftover = NULL;
-       }
+       pte = (pte_t *)physmem_alloc_top_down(RR_VMEM, _PAGE_TABLE_SIZE, _PAGE_TABLE_SIZE);
        memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
        return pte;
 }
@@ -126,7 +78,6 @@ static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end)
 static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
                                 enum populate_mode mode)
 {
-       unsigned long next;
        pte_t *pte, entry;
 
        pte = pte_offset_kernel(pmd, addr);
@@ -250,7 +201,6 @@ void setup_vmem(unsigned long asce_limit)
         * To prevent creation of a large page at address 0 first map
         * the lowcore and create the identity mapping only afterwards.
         */
-       pgtable_populate_init();
        pgtable_populate(0, sizeof(struct lowcore), POPULATE_ONE2ONE);
        for_each_physmem_usable_range(i, &start, &end)
                pgtable_populate(start, end, POPULATE_ONE2ONE);
@@ -269,10 +219,3 @@ void setup_vmem(unsigned long asce_limit)
 
        init_mm.context.asce = S390_lowcore.kernel_asce;
 }
-
-unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total)
-{
-       unsigned long pages = DIV_ROUND_UP(online_mem_total, PAGE_SIZE);
-
-       return DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2;
-}
index fa9d33b01b858d56ead72a08ca9b27530717a06f..389df0e0d9e5d58fb89025e7e0eb4c532569e35e 100644 (file)
@@ -93,6 +93,8 @@ SECTIONS
                _decompressor_syms_end = .;
        }
 
+       _decompressor_end = .;
+
 #ifdef CONFIG_KERNEL_UNCOMPRESSED
        . = 0x100000;
 #else
index d5e65a5d06e7f596c76ce3ce5e7e0f0ea3ed1190..27234fa1da8ea0e5fd2792906bc4c86437b0f95b 100644 (file)
@@ -17,6 +17,27 @@ struct physmem_range {
        u64 end;
 };
 
+enum reserved_range_type {
+       RR_DECOMPRESSOR,
+       RR_INITRD,
+       RR_VMLINUX,
+       RR_AMODE31,
+       RR_IPLREPORT,
+       RR_CERT_COMP_LIST,
+       RR_MEM_DETECT_EXTENDED,
+       RR_VMEM,
+#ifdef CONFIG_KASAN
+       RR_KASAN,
+#endif
+       RR_MAX
+};
+
+struct reserved_range {
+       unsigned long start;
+       unsigned long end;
+       struct reserved_range *chain;
+};
+
 /*
  * Storage element id is defined as 1 byte (up to 256 storage elements).
  * In practise only storage element id 0 and 1 are used).
@@ -31,6 +52,7 @@ struct physmem_info {
        u32 range_count;
        u8 info_source;
        unsigned long usable;
+       struct reserved_range reserved[RR_MAX];
        struct physmem_range online[MEM_INLINED_ENTRIES];
        struct physmem_range *online_extended;
 };
@@ -80,6 +102,70 @@ static inline int __get_physmem_range(u32 n, unsigned long *start,
 #define for_each_physmem_online_range(i, p_start, p_end)               \
        for (i = 0; !__get_physmem_range(i, p_start, p_end, false); i++)
 
+static inline const char *get_physmem_info_source(void)
+{
+       switch (physmem_info.info_source) {
+       case MEM_DETECT_SCLP_STOR_INFO:
+               return "sclp storage info";
+       case MEM_DETECT_DIAG260:
+               return "diag260";
+       case MEM_DETECT_SCLP_READ_INFO:
+               return "sclp read info";
+       case MEM_DETECT_BIN_SEARCH:
+               return "binary search";
+       }
+       return "none";
+}
+
+#define RR_TYPE_NAME(t) case RR_ ## t: return #t
+static inline const char *get_rr_type_name(enum reserved_range_type t)
+{
+       switch (t) {
+       RR_TYPE_NAME(DECOMPRESSOR);
+       RR_TYPE_NAME(INITRD);
+       RR_TYPE_NAME(VMLINUX);
+       RR_TYPE_NAME(AMODE31);
+       RR_TYPE_NAME(IPLREPORT);
+       RR_TYPE_NAME(CERT_COMP_LIST);
+       RR_TYPE_NAME(MEM_DETECT_EXTENDED);
+       RR_TYPE_NAME(VMEM);
+#ifdef CONFIG_KASAN
+       RR_TYPE_NAME(KASAN);
+#endif
+       default:
+               return "UNKNOWN";
+       }
+}
+
+#define for_each_physmem_reserved_type_range(t, range, p_start, p_end)                         \
+       for (range = &physmem_info.reserved[t], *p_start = range->start, *p_end = range->end;   \
+            range && range->end; range = range->chain,                                         \
+            *p_start = range ? range->start : 0, *p_end = range ? range->end : 0)
+
+static inline struct reserved_range *__physmem_reserved_next(enum reserved_range_type *t,
+                                                            struct reserved_range *range)
+{
+       if (!range) {
+               range = &physmem_info.reserved[*t];
+               if (range->end)
+                       return range;
+       }
+       if (range->chain)
+               return range->chain;
+       while (++*t < RR_MAX) {
+               range = &physmem_info.reserved[*t];
+               if (range->end)
+                       return range;
+       }
+       return NULL;
+}
+
+#define for_each_physmem_reserved_range(t, range, p_start, p_end)                      \
+       for (t = 0, range = __physmem_reserved_next(&t, NULL),                  \
+           *p_start = range ? range->start : 0, *p_end = range ? range->end : 0;       \
+            range; range = __physmem_reserved_next(&t, range),                 \
+           *p_start = range ? range->start : 0, *p_end = range ? range->end : 0)
+
 static inline unsigned long get_physmem_usable_total(void)
 {
        unsigned long start, end, total = 0;
@@ -91,28 +177,12 @@ static inline unsigned long get_physmem_usable_total(void)
        return total;
 }
 
-static inline void get_physmem_reserved(unsigned long *start, unsigned long *size)
+static inline unsigned long get_physmem_reserved(enum reserved_range_type type,
+                                                unsigned long *addr, unsigned long *size)
 {
-       *start = (unsigned long)physmem_info.online_extended;
-       if (physmem_info.range_count > MEM_INLINED_ENTRIES)
-               *size = (physmem_info.range_count - MEM_INLINED_ENTRIES) *
-                       sizeof(struct physmem_range);
-       else
-               *size = 0;
-}
-
-static inline unsigned long get_physmem_usable_end(void)
-{
-       unsigned long start;
-       unsigned long end;
-
-       if (physmem_info.usable)
-               return physmem_info.usable;
-       if (physmem_info.range_count) {
-               __get_physmem_range(physmem_info.range_count - 1, &start, &end, false);
-               return end;
-       }
-       return 0;
+       *addr = physmem_info.reserved[type].start;
+       *size = physmem_info.reserved[type].end - physmem_info.reserved[type].start;
+       return *size;
 }
 
 #endif
index fc887e3e76f843840f31d8f75348e86fd31b2586..966d569f49b7b6aa3a87d1695ff7945cb5d08ce4 100644 (file)
@@ -74,10 +74,6 @@ extern unsigned int zlib_dfltcc_support;
 
 extern int noexec_disabled;
 extern unsigned long ident_map_size;
-extern unsigned long pgalloc_pos;
-extern unsigned long pgalloc_end;
-extern unsigned long pgalloc_low;
-extern unsigned long __amode31_base;
 
 /* The Write Back bit position in the physaddr is given by the SLPC PCI */
 extern unsigned long mio_wb_bit_mask;
@@ -150,11 +146,6 @@ static inline unsigned long kaslr_offset(void)
        return __kaslr_offset;
 }
 
-struct initrd_data {
-       unsigned long start;
-       unsigned long size;
-};
-extern struct initrd_data initrd_data;
 
 struct oldmem_data {
        unsigned long start;
index f909a2dc8a5a2ad0a604ff57b460fa38df8c54c0..d25425b8d0c0da64f348357d7bf3e2ec2aa5a8ad 100644 (file)
@@ -148,13 +148,8 @@ static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
 int __bootdata(noexec_disabled);
 unsigned long __bootdata(ident_map_size);
 struct physmem_info __bootdata(physmem_info);
-struct initrd_data __bootdata(initrd_data);
-unsigned long __bootdata(pgalloc_pos);
-unsigned long __bootdata(pgalloc_end);
-unsigned long __bootdata(pgalloc_low);
 
 unsigned long __bootdata_preserved(__kaslr_offset);
-unsigned long __bootdata(__amode31_base);
 unsigned int __bootdata_preserved(zlib_dfltcc_support);
 EXPORT_SYMBOL(zlib_dfltcc_support);
 u64 __bootdata_preserved(stfle_fac_list[16]);
@@ -635,7 +630,11 @@ static struct notifier_block kdump_mem_nb = {
  */
 static void __init reserve_pgtables(void)
 {
-       memblock_reserve(pgalloc_pos, pgalloc_end - pgalloc_pos);
+       unsigned long start, end;
+       struct reserved_range *range;
+
+       for_each_physmem_reserved_type_range(RR_VMEM, range, &start, &end)
+               memblock_reserve(start, end - start);
 }
 
 /*
@@ -712,13 +711,13 @@ static void __init reserve_crashkernel(void)
  */
 static void __init reserve_initrd(void)
 {
-#ifdef CONFIG_BLK_DEV_INITRD
-       if (!initrd_data.start || !initrd_data.size)
+       unsigned long addr, size;
+
+       if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD) || !get_physmem_reserved(RR_INITRD, &addr, &size))
                return;
-       initrd_start = (unsigned long)__va(initrd_data.start);
-       initrd_end = initrd_start + initrd_data.size;
-       memblock_reserve(initrd_data.start, initrd_data.size);
-#endif
+       initrd_start = (unsigned long)__va(addr);
+       initrd_end = initrd_start + size;
+       memblock_reserve(addr, size);
 }
 
 /*
@@ -732,35 +731,18 @@ static void __init reserve_certificate_list(void)
 
 static void __init reserve_physmem_info(void)
 {
-       unsigned long start, size;
+       unsigned long addr, size;
 
-       get_physmem_reserved(&start, &size);
-       if (size)
-               memblock_reserve(start, size);
+       if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
+               memblock_reserve(addr, size);
 }
 
 static void __init free_physmem_info(void)
 {
-       unsigned long start, size;
+       unsigned long addr, size;
 
-       get_physmem_reserved(&start, &size);
-       if (size)
-               memblock_phys_free(start, size);
-}
-
-static const char * __init get_mem_info_source(void)
-{
-       switch (physmem_info.info_source) {
-       case MEM_DETECT_SCLP_STOR_INFO:
-               return "sclp storage info";
-       case MEM_DETECT_DIAG260:
-               return "diag260";
-       case MEM_DETECT_SCLP_READ_INFO:
-               return "sclp read info";
-       case MEM_DETECT_BIN_SEARCH:
-               return "binary search";
-       }
-       return "none";
+       if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
+               memblock_phys_free(addr, size);
 }
 
 static void __init memblock_add_physmem_info(void)
@@ -769,7 +751,7 @@ static void __init memblock_add_physmem_info(void)
        int i;
 
        pr_debug("physmem info source: %s (%hhd)\n",
-                get_mem_info_source(), physmem_info.info_source);
+                get_physmem_info_source(), physmem_info.info_source);
        /* keep memblock lists close to the kernel */
        memblock_set_bottom_up(true);
        for_each_physmem_usable_range(i, &start, &end)
@@ -780,21 +762,6 @@ static void __init memblock_add_physmem_info(void)
        memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
 }
 
-/*
- * Check for initrd being in usable memory
- */
-static void __init check_initrd(void)
-{
-#ifdef CONFIG_BLK_DEV_INITRD
-       if (initrd_data.start && initrd_data.size &&
-           !memblock_is_region_memory(initrd_data.start, initrd_data.size)) {
-               pr_err("The initial RAM disk does not fit into the memory\n");
-               memblock_phys_free(initrd_data.start, initrd_data.size);
-               initrd_start = initrd_end = 0;
-       }
-#endif
-}
-
 /*
  * Reserve memory used for lowcore/command line/kernel image.
  */
@@ -803,7 +770,7 @@ static void __init reserve_kernel(void)
        memblock_reserve(0, STARTUP_NORMAL_OFFSET);
        memblock_reserve(OLDMEM_BASE, sizeof(unsigned long));
        memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long));
-       memblock_reserve(__amode31_base, __eamode31 - __samode31);
+       memblock_reserve(physmem_info.reserved[RR_AMODE31].start, __eamode31 - __samode31);
        memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP);
        memblock_reserve(__pa(_stext), _end - _stext);
 }
@@ -825,13 +792,13 @@ static void __init setup_memory(void)
 static void __init relocate_amode31_section(void)
 {
        unsigned long amode31_size = __eamode31 - __samode31;
-       long amode31_offset = __amode31_base - __samode31;
+       long amode31_offset = physmem_info.reserved[RR_AMODE31].start - __samode31;
        long *ptr;
 
        pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
 
        /* Move original AMODE31 section to the new one */
-       memmove((void *)__amode31_base, (void *)__samode31, amode31_size);
+       memmove((void *)physmem_info.reserved[RR_AMODE31].start, (void *)__samode31, amode31_size);
        /* Zero out the old AMODE31 section to catch invalid accesses within it */
        memset((void *)__samode31, 0, amode31_size);
 
@@ -1017,7 +984,6 @@ void __init setup_arch(char **cmdline_p)
        if (MACHINE_HAS_EDAT2)
                hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
 
-       check_initrd();
        reserve_crashkernel();
 #ifdef CONFIG_CRASH_DUMP
        /*
index b0658136264ff3ae0f6b60ef7d069f5413f94c96..2b20382f1bd8450b46fd60834795b26fd9d6e65d 100644 (file)
@@ -1,19 +1,15 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/kasan.h>
-#include <linux/sched/task.h>
+#include <linux/memblock.h>
 #include <linux/pgtable.h>
-#include <asm/pgalloc.h>
-#include <asm/kasan.h>
+#include <linux/kasan.h>
 #include <asm/physmem_info.h>
 #include <asm/processor.h>
-#include <asm/sclp.h>
 #include <asm/facility.h>
-#include <asm/sections.h>
-#include <asm/setup.h>
-#include <asm/uv.h>
+#include <asm/pgalloc.h>
+#include <asm/sclp.h>
 
+static unsigned long pgalloc_pos __initdata;
 static unsigned long segment_pos __initdata;
-static unsigned long segment_low __initdata;
 static bool has_edat __initdata;
 static bool has_nx __initdata;
 
@@ -28,19 +24,20 @@ static void __init kasan_early_panic(const char *reason)
 
 static void * __init kasan_early_alloc_segment(void)
 {
-       segment_pos -= _SEGMENT_SIZE;
+       unsigned long addr = segment_pos;
 
-       if (segment_pos < segment_low)
+       segment_pos += _SEGMENT_SIZE;
+       if (segment_pos > pgalloc_pos)
                kasan_early_panic("out of memory during initialisation\n");
 
-       return __va(segment_pos);
+       return __va(addr);
 }
 
 static void * __init kasan_early_alloc_pages(unsigned int order)
 {
        pgalloc_pos -= (PAGE_SIZE << order);
 
-       if (pgalloc_pos < pgalloc_low)
+       if (segment_pos > pgalloc_pos)
                kasan_early_panic("out of memory during initialisation\n");
 
        return __va(pgalloc_pos);
@@ -225,8 +222,8 @@ void __init kasan_early_init(void)
        pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
        pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY);
        p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY);
+       unsigned long pgalloc_pos_initial, segment_pos_initial;
        unsigned long untracked_end = MODULES_VADDR;
-       unsigned long shadow_alloc_size;
        unsigned long start, end;
        int i;
 
@@ -243,13 +240,11 @@ void __init kasan_early_init(void)
        crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
        memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE);
 
-       if (has_edat) {
-               shadow_alloc_size = get_physmem_usable_total() >> KASAN_SHADOW_SCALE_SHIFT;
-               segment_pos = round_down(pgalloc_pos, _SEGMENT_SIZE);
-               segment_low = segment_pos - shadow_alloc_size;
-               segment_low = round_down(segment_low, _SEGMENT_SIZE);
-               pgalloc_pos = segment_low;
-       }
+       /* segment allocations go bottom up -> <- pgalloc go top down */
+       segment_pos_initial = physmem_info.reserved[RR_KASAN].start;
+       segment_pos = segment_pos_initial;
+       pgalloc_pos_initial = physmem_info.reserved[RR_KASAN].end;
+       pgalloc_pos = pgalloc_pos_initial;
        /*
         * Current memory layout:
         * +- 0 -------------+         +- shadow start -+
@@ -298,4 +293,6 @@ void __init kasan_early_init(void)
        /* enable kasan */
        init_task.kasan_depth = 0;
        sclp_early_printk("KernelAddressSanitizer initialized\n");
+       memblock_reserve(segment_pos_initial, segment_pos - segment_pos_initial);
+       memblock_reserve(pgalloc_pos, pgalloc_pos_initial - pgalloc_pos);
 }