--- /dev/null
+From b7b898ae0c0a82489511a1ce1b35f26215e6beb5 Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Sat, 18 Jan 2014 12:48:17 +0100
+Subject: x86/efi: Make efi virtual runtime map passing more robust
+
+From: Borislav Petkov <bp@suse.de>
+
+commit b7b898ae0c0a82489511a1ce1b35f26215e6beb5 upstream.
+
+Currently, running SetVirtualAddressMap() and passing the physical
+address of the virtual map array was working only by a lucky coincidence
+because the memory was present in the EFI page table too. Until Toshi
+went and booted this on a big HP box - the krealloc() manner of resizing
+the memmap we're doing did allocate from such physical addresses which
+were not mapped anymore and boom:
+
+http://lkml.kernel.org/r/1386806463.1791.295.camel@misato.fc.hp.com
+
+One way to take care of that issue is to reimplement the krealloc thing
+but with pages. We start with contiguous pages of order 1, i.e. 2 pages,
+and when we deplete that memory (shouldn't happen all that often but you
+know firmware) we realloc the next power-of-two pages.
+
+Having the pages, it is much more handy and easy to map them into the
+EFI page table with the already existing mapping code which we're using
+for building the virtual mappings.
+
+Thanks to Toshi Kani and Matt for the great debugging help.
+
+Reported-by: Toshi Kani <toshi.kani@hp.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Tested-by: Toshi Kani <toshi.kani@hp.com>
+Signed-off-by: Matt Fleming <matt.fleming@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/efi.h | 3 -
+ arch/x86/platform/efi/efi.c | 99 ++++++++++++++++++++++++++++++++---------
+ arch/x86/platform/efi/efi_32.c | 7 ++
+ arch/x86/platform/efi/efi_64.c | 32 ++++++++++++-
+ 4 files changed, 115 insertions(+), 26 deletions(-)
+
+--- a/arch/x86/include/asm/efi.h
++++ b/arch/x86/include/asm/efi.h
+@@ -130,7 +130,8 @@ extern void efi_memory_uc(u64 addr, unsi
+ extern void __init efi_map_region(efi_memory_desc_t *md);
+ extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
+ extern void efi_sync_low_kernel_mappings(void);
+-extern void efi_setup_page_tables(void);
++extern int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
++extern void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages);
+ extern void __init old_map_region(efi_memory_desc_t *md);
+ extern void __init runtime_code_page_mkexec(void);
+ extern void __init efi_runtime_mkexec(void);
+--- a/arch/x86/platform/efi/efi.c
++++ b/arch/x86/platform/efi/efi.c
+@@ -939,14 +939,36 @@ static void __init efi_map_regions_fixed
+
+ }
+
++static void *realloc_pages(void *old_memmap, int old_shift)
++{
++ void *ret;
++
++ ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1);
++ if (!ret)
++ goto out;
++
++ /*
++ * A first-time allocation doesn't have anything to copy.
++ */
++ if (!old_memmap)
++ return ret;
++
++ memcpy(ret, old_memmap, PAGE_SIZE << old_shift);
++
++out:
++ free_pages((unsigned long)old_memmap, old_shift);
++ return ret;
++}
++
+ /*
+- * Map efi memory ranges for runtime serivce and update new_memmap with virtual
+- * addresses.
++ * Map the efi memory ranges of the runtime services and update new_mmap with
++ * virtual addresses.
+ */
+-static void * __init efi_map_regions(int *count)
++static void * __init efi_map_regions(int *count, int *pg_shift)
+ {
++ void *p, *new_memmap = NULL;
++ unsigned long left = 0;
+ efi_memory_desc_t *md;
+- void *p, *tmp, *new_memmap = NULL;
+
+ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ md = p;
+@@ -961,20 +983,23 @@ static void * __init efi_map_regions(int
+ efi_map_region(md);
+ get_systab_virt_addr(md);
+
+- tmp = krealloc(new_memmap, (*count + 1) * memmap.desc_size,
+- GFP_KERNEL);
+- if (!tmp)
+- goto out;
+- new_memmap = tmp;
++ if (left < memmap.desc_size) {
++ new_memmap = realloc_pages(new_memmap, *pg_shift);
++ if (!new_memmap)
++ return NULL;
++
++ left += PAGE_SIZE << *pg_shift;
++ (*pg_shift)++;
++ }
++
+ memcpy(new_memmap + (*count * memmap.desc_size), md,
+ memmap.desc_size);
++
++ left -= memmap.desc_size;
+ (*count)++;
+ }
+
+ return new_memmap;
+-out:
+- kfree(new_memmap);
+- return NULL;
+ }
+
+ /*
+@@ -1000,9 +1025,9 @@ out:
+ */
+ void __init efi_enter_virtual_mode(void)
+ {
+- efi_status_t status;
++ int err, count = 0, pg_shift = 0;
+ void *new_memmap = NULL;
+- int err, count = 0;
++ efi_status_t status;
+
+ efi.systab = NULL;
+
+@@ -1019,20 +1044,24 @@ void __init efi_enter_virtual_mode(void)
+ efi_map_regions_fixed();
+ } else {
+ efi_merge_regions();
+- new_memmap = efi_map_regions(&count);
++ new_memmap = efi_map_regions(&count, &pg_shift);
+ if (!new_memmap) {
+ pr_err("Error reallocating memory, EFI runtime non-functional!\n");
+ return;
+ }
+- }
+
+- err = save_runtime_map();
+- if (err)
+- pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
++ err = save_runtime_map();
++ if (err)
++ pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
++ }
+
+ BUG_ON(!efi.systab);
+
+- efi_setup_page_tables();
++ if (!efi_setup) {
++ if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift))
++ return;
++ }
++
+ efi_sync_low_kernel_mappings();
+
+ if (!efi_setup) {
+@@ -1072,7 +1101,35 @@ void __init efi_enter_virtual_mode(void)
+
+ efi_runtime_mkexec();
+
+- kfree(new_memmap);
++
++ /*
++ * We mapped the descriptor array into the EFI pagetable above but we're
++ * not unmapping it here. Here's why:
++ *
++ * We're copying select PGDs from the kernel page table to the EFI page
++ * table and when we do so and make changes to those PGDs like unmapping
++ * stuff from them, those changes appear in the kernel page table and we
++ * go boom.
++ *
++ * From setup_real_mode():
++ *
++ * ...
++ * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
++ *
++ * In this particular case, our allocation is in PGD 0 of the EFI page
++ * table but we've copied that PGD from PGD[272] of the EFI page table:
++ *
++ * pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272
++ *
++ * where the direct memory mapping in kernel space is.
++ *
++ * new_memmap's VA comes from that direct mapping and thus clearing it,
++ * it would get cleared in the kernel page table too.
++ *
++ * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift);
++ */
++ if (!efi_setup)
++ free_pages((unsigned long)new_memmap, pg_shift);
+
+ /* clean DUMMY object */
+ efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
+--- a/arch/x86/platform/efi/efi_32.c
++++ b/arch/x86/platform/efi/efi_32.c
+@@ -40,7 +40,12 @@
+ static unsigned long efi_rt_eflags;
+
+ void efi_sync_low_kernel_mappings(void) {}
+-void efi_setup_page_tables(void) {}
++void __init efi_dump_pagetable(void) {}
++int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
++{
++ return 0;
++}
++void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) {}
+
+ void __init efi_map_region(efi_memory_desc_t *md)
+ {
+--- a/arch/x86/platform/efi/efi_64.c
++++ b/arch/x86/platform/efi/efi_64.c
+@@ -137,12 +137,38 @@ void efi_sync_low_kernel_mappings(void)
+ sizeof(pgd_t) * num_pgds);
+ }
+
+-void efi_setup_page_tables(void)
++int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
+ {
++ pgd_t *pgd;
++
++ if (efi_enabled(EFI_OLD_MEMMAP))
++ return 0;
++
+ efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd;
++ pgd = __va(efi_scratch.efi_pgt);
++
++ /*
++ * It can happen that the physical address of new_memmap lands in memory
++ * which is not mapped in the EFI page table. Therefore we need to go
++ * and ident-map those pages containing the map before calling
++ * phys_efi_set_virtual_address_map().
++ */
++ if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, _PAGE_NX)) {
++ pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
++ return 1;
++ }
++
++ efi_scratch.use_pgd = true;
++
++
++ return 0;
++}
++
++void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
++{
++ pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
+
+- if (!efi_enabled(EFI_OLD_MEMMAP))
+- efi_scratch.use_pgd = true;
++ kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages);
+ }
+
+ static void __init __map_region(efi_memory_desc_t *md, u64 va)
--- /dev/null
+From 42a5477251f0e0f33ad5f6a95c48d685ec03191e Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Sat, 18 Jan 2014 12:48:16 +0100
+Subject: x86, pageattr: Export page unmapping interface
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 42a5477251f0e0f33ad5f6a95c48d685ec03191e upstream.
+
+We will use it in efi so expose it.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Tested-by: Toshi Kani <toshi.kani@hp.com>
+Signed-off-by: Matt Fleming <matt.fleming@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/pgtable_types.h | 2 +
+ arch/x86/mm/pageattr.c | 44 ++++++++++++++++++++++++-----------
+ 2 files changed, 33 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -385,6 +385,8 @@ extern pte_t *lookup_address(unsigned lo
+ extern phys_addr_t slow_virt_to_phys(void *__address);
+ extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
+ unsigned numpages, unsigned long page_flags);
++void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address,
++ unsigned numpages);
+ #endif /* !__ASSEMBLY__ */
+
+ #endif /* _ASM_X86_PGTABLE_DEFS_H */
+--- a/arch/x86/mm/pageattr.c
++++ b/arch/x86/mm/pageattr.c
+@@ -692,6 +692,18 @@ static bool try_to_free_pmd_page(pmd_t *
+ return true;
+ }
+
++static bool try_to_free_pud_page(pud_t *pud)
++{
++ int i;
++
++ for (i = 0; i < PTRS_PER_PUD; i++)
++ if (!pud_none(pud[i]))
++ return false;
++
++ free_page((unsigned long)pud);
++ return true;
++}
++
+ static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
+ {
+ pte_t *pte = pte_offset_kernel(pmd, start);
+@@ -805,6 +817,16 @@ static void unmap_pud_range(pgd_t *pgd,
+ */
+ }
+
++static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end)
++{
++ pgd_t *pgd_entry = root + pgd_index(addr);
++
++ unmap_pud_range(pgd_entry, addr, end);
++
++ if (try_to_free_pud_page((pud_t *)pgd_page_vaddr(*pgd_entry)))
++ pgd_clear(pgd_entry);
++}
++
+ static int alloc_pte_page(pmd_t *pmd)
+ {
+ pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK);
+@@ -999,9 +1021,8 @@ static int populate_pud(struct cpa_data
+ static int populate_pgd(struct cpa_data *cpa, unsigned long addr)
+ {
+ pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
+- bool allocd_pgd = false;
+- pgd_t *pgd_entry;
+ pud_t *pud = NULL; /* shut up gcc */
++ pgd_t *pgd_entry;
+ int ret;
+
+ pgd_entry = cpa->pgd + pgd_index(addr);
+@@ -1015,7 +1036,6 @@ static int populate_pgd(struct cpa_data
+ return -1;
+
+ set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE));
+- allocd_pgd = true;
+ }
+
+ pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr);
+@@ -1023,19 +1043,11 @@ static int populate_pgd(struct cpa_data
+
+ ret = populate_pud(cpa, addr, pgd_entry, pgprot);
+ if (ret < 0) {
+- unmap_pud_range(pgd_entry, addr,
++ unmap_pgd_range(cpa->pgd, addr,
+ addr + (cpa->numpages << PAGE_SHIFT));
+-
+- if (allocd_pgd) {
+- /*
+- * If I allocated this PUD page, I can just as well
+- * free it in this error path.
+- */
+- pgd_clear(pgd_entry);
+- free_page((unsigned long)pud);
+- }
+ return ret;
+ }
++
+ cpa->numpages = ret;
+ return 0;
+ }
+@@ -1861,6 +1873,12 @@ out:
+ return retval;
+ }
+
++void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address,
++ unsigned numpages)
++{
++ unmap_pgd_range(root, address, address + (numpages << PAGE_SHIFT));
++}
++
+ /*
+ * The testcases use internal knowledge of the implementation that shouldn't
+ * be exposed to the rest of the kernel. Include these directly here.