From: jbeulich@novell.com Subject: fix issues with the assignment of huge amounts of memory Patch-mainline: obsolete At the same time remove the non-applicable and broken support for the memmap= command line option. --- sle11-2009-05-14.orig/arch/x86/kernel/e820-xen.c 2009-02-17 14:01:20.000000000 +0100 +++ sle11-2009-05-14/arch/x86/kernel/e820-xen.c 2009-03-13 16:14:19.000000000 +0100 @@ -1308,6 +1308,26 @@ static int __init parse_memopt(char *p) i = e820.nr_map - 1; current_end = e820.map[i].addr + e820.map[i].size; + + /* + * A little less than 2% of available memory are needed for page + * tables, p2m map, and mem_map. Hence the maximum amount of memory + * we can potentially balloon up to can in no case exceed about 50 + * times of what we've been given initially. Since even with that we + * won't be able to boot (due to various calculations done based on + * the total number of pages) we further restrict this to factor 32. + */ + if ((mem_size >> (PAGE_SHIFT + 5)) > xen_start_info->nr_pages) { + u64 size = (u64)xen_start_info->nr_pages << 5; + + printk(KERN_WARNING "mem=%Luk is invalid for an initial" + " allocation of %luk, using %Luk\n", + (unsigned long long)mem_size >> 10, + xen_start_info->nr_pages << (PAGE_SHIFT - 10), + (unsigned long long)size << (PAGE_SHIFT - 10)); + mem_size = size << PAGE_SHIFT; + } + if (current_end < mem_size) { /* * The e820 map ends before our requested size so @@ -1367,6 +1387,7 @@ static int __init parse_memmap_opt(char return *p == '\0' ? 0 : -EINVAL; } early_param("memmap", parse_memmap_opt); +#endif void __init finish_e820_parsing(void) { @@ -1381,7 +1402,6 @@ void __init finish_e820_parsing(void) e820_print_map("user"); } } -#endif static inline const char *e820_type_to_string(int e820_type) { @@ -1407,7 +1427,7 @@ void __init e820_reserve_resources(void) struct resource *res; u64 end; - res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); + res = alloc_bootmem(sizeof(struct resource) * e820.nr_map); for (i = 0; i < e820.nr_map; i++) { end = e820.map[i].addr + e820.map[i].size - 1; #ifndef CONFIG_RESOURCES_64BIT --- sle11-2009-05-14.orig/arch/x86/kernel/setup-xen.c 2008-11-17 13:58:02.000000000 +0100 +++ sle11-2009-05-14/arch/x86/kernel/setup-xen.c 2009-02-16 17:05:16.000000000 +0100 @@ -128,12 +128,7 @@ static struct notifier_block xen_panic_b unsigned long *phys_to_machine_mapping; EXPORT_SYMBOL(phys_to_machine_mapping); -unsigned long *pfn_to_mfn_frame_list_list, -#ifdef CONFIG_X86_64 - *pfn_to_mfn_frame_list[512]; -#else - *pfn_to_mfn_frame_list[128]; -#endif +unsigned long *pfn_to_mfn_frame_list_list, **pfn_to_mfn_frame_list; /* Raw start-of-day parameters from the hypervisor. */ start_info_t *xen_start_info; @@ -1037,17 +1032,17 @@ void __init setup_arch(char **cmdline_p) p2m_pages = xen_start_info->nr_pages; if (!xen_feature(XENFEAT_auto_translated_physmap)) { - unsigned long i, j; + unsigned long i, j, size; unsigned int k, fpp; /* Make sure we have a large enough P->M table. */ phys_to_machine_mapping = alloc_bootmem_pages( max_pfn * sizeof(unsigned long)); - memset(phys_to_machine_mapping, ~0, - max_pfn * sizeof(unsigned long)); memcpy(phys_to_machine_mapping, (unsigned long *)xen_start_info->mfn_list, p2m_pages * sizeof(unsigned long)); + memset(phys_to_machine_mapping + p2m_pages, ~0, + (max_pfn - p2m_pages) * sizeof(unsigned long)); free_bootmem( __pa(xen_start_info->mfn_list), PFN_PHYS(PFN_UP(xen_start_info->nr_pages * @@ -1057,15 +1052,26 @@ void __init setup_arch(char **cmdline_p) * Initialise the list of the frames that specify the list of * frames that make up the p2m table. Used by save/restore. */ - pfn_to_mfn_frame_list_list = alloc_bootmem_pages(PAGE_SIZE); - fpp = PAGE_SIZE/sizeof(unsigned long); + size = (max_pfn + fpp - 1) / fpp; + size = (size + fpp - 1) / fpp; + ++size; /* include a zero terminator for crash tools */ + size *= sizeof(unsigned long); + pfn_to_mfn_frame_list_list = alloc_bootmem_pages(size); + if (size > PAGE_SIZE + && xen_create_contiguous_region((unsigned long) + pfn_to_mfn_frame_list_list, + get_order(size), 0)) + BUG(); + size -= sizeof(unsigned long); + pfn_to_mfn_frame_list = alloc_bootmem(size); + for (i = j = 0, k = -1; i < max_pfn; i += fpp, j++) { if (j == fpp) j = 0; if (j == 0) { k++; - BUG_ON(k>=ARRAY_SIZE(pfn_to_mfn_frame_list)); + BUG_ON(k * sizeof(unsigned long) >= size); pfn_to_mfn_frame_list[k] = alloc_bootmem_pages(PAGE_SIZE); pfn_to_mfn_frame_list_list[k] = --- sle11-2009-05-14.orig/arch/x86/kernel/setup_percpu-xen.c 2009-03-16 16:38:16.000000000 +0100 +++ sle11-2009-05-14/arch/x86/kernel/setup_percpu-xen.c 2009-03-13 16:14:41.000000000 +0100 @@ -211,7 +211,7 @@ static void __init setup_node_to_cpumask } /* allocate the map */ - map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); + map = alloc_bootmem(nr_node_ids * sizeof(cpumask_t)); pr_debug(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", map, nr_node_ids); --- sle11-2009-05-14.orig/drivers/xen/core/machine_kexec.c 2009-03-16 16:38:16.000000000 +0100 +++ sle11-2009-05-14/drivers/xen/core/machine_kexec.c 2009-03-13 16:13:15.000000000 +0100 @@ -57,7 +57,7 @@ void __init xen_machine_kexec_setup_reso /* allocate xen_phys_cpus */ - xen_phys_cpus = alloc_bootmem_low(k * sizeof(struct resource)); + xen_phys_cpus = alloc_bootmem(k * sizeof(struct resource)); BUG_ON(xen_phys_cpus == NULL); /* fill in xen_phys_cpus with per-cpu crash note information */ --- sle11-2009-05-14.orig/drivers/xen/core/machine_reboot.c 2009-02-17 12:23:48.000000000 +0100 +++ sle11-2009-05-14/drivers/xen/core/machine_reboot.c 2009-02-17 12:25:29.000000000 +0100 @@ -76,7 +76,7 @@ static void post_suspend(int suspend_can unsigned long shinfo_mfn; extern unsigned long max_pfn; extern unsigned long *pfn_to_mfn_frame_list_list; - extern unsigned long *pfn_to_mfn_frame_list[]; + extern unsigned long **pfn_to_mfn_frame_list; if (suspend_cancelled) { xen_start_info->store_mfn =