From: Greg Kroah-Hartman Date: Wed, 22 Aug 2018 09:05:00 +0000 (+0200) Subject: 4.17-stable patches X-Git-Tag: v4.18.5~37 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ba52a7837bae02f97d1545f623dd3ce23f867e9d;p=thirdparty%2Fkernel%2Fstable-queue.git 4.17-stable patches added patches: mm-allow-non-direct-map-arguments-to-free_reserved_area.patch x86-mm-init-add-helper-for-freeing-kernel-image-pages.patch x86-mm-init-pass-unconverted-symbol-addresses-to-free_init_pages.patch x86-mm-init-remove-freed-kernel-image-areas-from-alias-mapping.patch --- diff --git a/queue-4.17/mm-allow-non-direct-map-arguments-to-free_reserved_area.patch b/queue-4.17/mm-allow-non-direct-map-arguments-to-free_reserved_area.patch new file mode 100644 index 00000000000..f4e307379c5 --- /dev/null +++ b/queue-4.17/mm-allow-non-direct-map-arguments-to-free_reserved_area.patch @@ -0,0 +1,88 @@ +From 0d83432811f26871295a9bc24d3c387924da6071 Mon Sep 17 00:00:00 2001 +From: Dave Hansen +Date: Thu, 2 Aug 2018 15:58:26 -0700 +Subject: mm: Allow non-direct-map arguments to free_reserved_area() + +From: Dave Hansen + +commit 0d83432811f26871295a9bc24d3c387924da6071 upstream. + +free_reserved_area() takes pointers as arguments to show which addresses +should be freed. However, it does this in a somewhat ambiguous way. If it +gets a kernel direct map address, it always works. However, if it gets an +address that is part of the kernel image alias mapping, it can fail. + +It fails if all of the following happen: + * The specified address is part of the kernel image alias + * Poisoning is requested (forcing a memset()) + * The address is in a read-only portion of the kernel image + +The memset() fails on the read-only mapping, of course. +free_reserved_area() *is* called both on the direct map and on kernel image +alias addresses. We've just lucked out thus far that the kernel image +alias areas it gets used on are read-write. I'm fairly sure this has been +just a happy accident. + +It is quite easy to make free_reserved_area() work for all cases: just +convert the address to a direct map address before doing the memset(), and +do this unconditionally. There is little chance of a regression here +because we previously did a virt_to_page() on the address for the memset, +so we know these are not highmem pages for which virt_to_page() would fail. + +Signed-off-by: Dave Hansen +Signed-off-by: Thomas Gleixner +Cc: keescook@google.com +Cc: aarcange@redhat.com +Cc: jgross@suse.com +Cc: jpoimboe@redhat.com +Cc: gregkh@linuxfoundation.org +Cc: peterz@infradead.org +Cc: hughd@google.com +Cc: torvalds@linux-foundation.org +Cc: bp@alien8.de +Cc: luto@kernel.org +Cc: ak@linux.intel.com +Cc: Kees Cook +Cc: Andrea Arcangeli +Cc: Juergen Gross +Cc: Josh Poimboeuf +Cc: Greg Kroah-Hartman +Cc: Peter Zijlstra +Cc: Hugh Dickins +Cc: Linus Torvalds +Cc: Borislav Petkov +Cc: Andy Lutomirski +Cc: Andi Kleen +Link: https://lkml.kernel.org/r/20180802225826.1287AE3E@viggo.jf.intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + mm/page_alloc.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -6933,9 +6933,21 @@ unsigned long free_reserved_area(void *s + start = (void *)PAGE_ALIGN((unsigned long)start); + end = (void *)((unsigned long)end & PAGE_MASK); + for (pos = start; pos < end; pos += PAGE_SIZE, pages++) { ++ struct page *page = virt_to_page(pos); ++ void *direct_map_addr; ++ ++ /* ++ * 'direct_map_addr' might be different from 'pos' ++ * because some architectures' virt_to_page() ++ * work with aliases. Getting the direct map ++ * address ensures that we get a _writeable_ ++ * alias for the memset(). ++ */ ++ direct_map_addr = page_address(page); + if ((unsigned int)poison <= 0xFF) +- memset(pos, poison, PAGE_SIZE); +- free_reserved_page(virt_to_page(pos)); ++ memset(direct_map_addr, poison, PAGE_SIZE); ++ ++ free_reserved_page(page); + } + + if (pages && s) diff --git a/queue-4.17/series b/queue-4.17/series index 04b64cebd95..c12868f851a 100644 --- a/queue-4.17/series +++ b/queue-4.17/series @@ -302,3 +302,7 @@ mm-make-vm_area_dup-actually-copy-the-old-vma-data.patch mm-make-vm_area_alloc-initialize-core-fields.patch edac-add-missing-mem_lrddr4-entry-in-edac_mem_types.patch pty-fix-o_cloexec-for-tiocgptpeer.patch +mm-allow-non-direct-map-arguments-to-free_reserved_area.patch +x86-mm-init-pass-unconverted-symbol-addresses-to-free_init_pages.patch +x86-mm-init-add-helper-for-freeing-kernel-image-pages.patch +x86-mm-init-remove-freed-kernel-image-areas-from-alias-mapping.patch diff --git a/queue-4.17/x86-mm-init-add-helper-for-freeing-kernel-image-pages.patch b/queue-4.17/x86-mm-init-add-helper-for-freeing-kernel-image-pages.patch new file mode 100644 index 00000000000..5a67e519be7 --- /dev/null +++ b/queue-4.17/x86-mm-init-add-helper-for-freeing-kernel-image-pages.patch @@ -0,0 +1,98 @@ +From 6ea2738e0ca0e626c75202fb051c1e88d7a950fa Mon Sep 17 00:00:00 2001 +From: Dave Hansen +Date: Thu, 2 Aug 2018 15:58:29 -0700 +Subject: x86/mm/init: Add helper for freeing kernel image pages + +From: Dave Hansen + +commit 6ea2738e0ca0e626c75202fb051c1e88d7a950fa upstream. + +When chunks of the kernel image are freed, free_init_pages() is used +directly. Consolidate the three sites that do this. Also update the +string to give an incrementally better description of that memory versus +what was there before. + +Signed-off-by: Dave Hansen +Signed-off-by: Thomas Gleixner +Cc: keescook@google.com +Cc: aarcange@redhat.com +Cc: jgross@suse.com +Cc: jpoimboe@redhat.com +Cc: gregkh@linuxfoundation.org +Cc: peterz@infradead.org +Cc: hughd@google.com +Cc: torvalds@linux-foundation.org +Cc: bp@alien8.de +Cc: luto@kernel.org +Cc: ak@linux.intel.com +Cc: Kees Cook +Cc: Andrea Arcangeli +Cc: Juergen Gross +Cc: Josh Poimboeuf +Cc: Greg Kroah-Hartman +Cc: Peter Zijlstra +Cc: Hugh Dickins +Cc: Linus Torvalds +Cc: Borislav Petkov +Cc: Andy Lutomirski +Cc: Andi Kleen +Link: https://lkml.kernel.org/r/20180802225829.FE0E32EA@viggo.jf.intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/processor.h | 1 + + arch/x86/mm/init.c | 15 ++++++++++++--- + arch/x86/mm/init_64.c | 4 ++-- + 3 files changed, 15 insertions(+), 5 deletions(-) + +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -980,6 +980,7 @@ static inline uint32_t hypervisor_cpuid_ + + extern unsigned long arch_align_stack(unsigned long sp); + extern void free_init_pages(char *what, unsigned long begin, unsigned long end); ++extern void free_kernel_image_pages(void *begin, void *end); + + void default_idle(void); + #ifdef CONFIG_XEN +--- a/arch/x86/mm/init.c ++++ b/arch/x86/mm/init.c +@@ -775,13 +775,22 @@ void free_init_pages(char *what, unsigne + } + } + ++/* ++ * begin/end can be in the direct map or the "high kernel mapping" ++ * used for the kernel image only. free_init_pages() will do the ++ * right thing for either kind of address. ++ */ ++void free_kernel_image_pages(void *begin, void *end) ++{ ++ free_init_pages("unused kernel image", ++ (unsigned long)begin, (unsigned long)end); ++} ++ + void __ref free_initmem(void) + { + e820__reallocate_tables(); + +- free_init_pages("unused kernel", +- (unsigned long)(&__init_begin), +- (unsigned long)(&__init_end)); ++ free_kernel_image_pages(&__init_begin, &__init_end); + } + + #ifdef CONFIG_BLK_DEV_INITRD +--- a/arch/x86/mm/init_64.c ++++ b/arch/x86/mm/init_64.c +@@ -1283,8 +1283,8 @@ void mark_rodata_ro(void) + set_memory_ro(start, (end-start) >> PAGE_SHIFT); + #endif + +- free_init_pages("unused kernel", text_end, rodata_start); +- free_init_pages("unused kernel", rodata_end, _sdata); ++ free_kernel_image_pages((void *)text_end, (void *)rodata_start); ++ free_kernel_image_pages((void *)rodata_end, (void *)_sdata); + + debug_checkwx(); + diff --git a/queue-4.17/x86-mm-init-pass-unconverted-symbol-addresses-to-free_init_pages.patch b/queue-4.17/x86-mm-init-pass-unconverted-symbol-addresses-to-free_init_pages.patch new file mode 100644 index 00000000000..a302dff4939 --- /dev/null +++ b/queue-4.17/x86-mm-init-pass-unconverted-symbol-addresses-to-free_init_pages.patch @@ -0,0 +1,72 @@ +From 9f515cdb411ef34f1aaf4c40bb0c932cf6db5de1 Mon Sep 17 00:00:00 2001 +From: Dave Hansen +Date: Thu, 2 Aug 2018 15:58:28 -0700 +Subject: x86/mm/init: Pass unconverted symbol addresses to free_init_pages() + +From: Dave Hansen + +commit 9f515cdb411ef34f1aaf4c40bb0c932cf6db5de1 upstream. + +The x86 code has several places where it frees parts of kernel image: + + 1. Unused SMP alternative + 2. __init code + 3. The hole between text and rodata + 4. The hole between rodata and data + +We call free_init_pages() to do this. Strangely, we convert the symbol +addresses to kernel direct map addresses in some cases (#3, #4) but not +others (#1, #2). + +The virt_to_page() and the other code in free_reserved_area() now works +fine for for symbol addresses on x86, so don't bother converting the +addresses to direct map addresses before freeing them. + +Signed-off-by: Dave Hansen +Signed-off-by: Thomas Gleixner +Cc: keescook@google.com +Cc: aarcange@redhat.com +Cc: jgross@suse.com +Cc: jpoimboe@redhat.com +Cc: gregkh@linuxfoundation.org +Cc: peterz@infradead.org +Cc: hughd@google.com +Cc: torvalds@linux-foundation.org +Cc: bp@alien8.de +Cc: luto@kernel.org +Cc: ak@linux.intel.com +Cc: Kees Cook +Cc: Andrea Arcangeli +Cc: Juergen Gross +Cc: Josh Poimboeuf +Cc: Greg Kroah-Hartman +Cc: Peter Zijlstra +Cc: Hugh Dickins +Cc: Linus Torvalds +Cc: Borislav Petkov +Cc: Andy Lutomirski +Cc: Andi Kleen +Link: https://lkml.kernel.org/r/20180802225828.89B2D0E2@viggo.jf.intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/mm/init_64.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/arch/x86/mm/init_64.c ++++ b/arch/x86/mm/init_64.c +@@ -1283,12 +1283,8 @@ void mark_rodata_ro(void) + set_memory_ro(start, (end-start) >> PAGE_SHIFT); + #endif + +- free_init_pages("unused kernel", +- (unsigned long) __va(__pa_symbol(text_end)), +- (unsigned long) __va(__pa_symbol(rodata_start))); +- free_init_pages("unused kernel", +- (unsigned long) __va(__pa_symbol(rodata_end)), +- (unsigned long) __va(__pa_symbol(_sdata))); ++ free_init_pages("unused kernel", text_end, rodata_start); ++ free_init_pages("unused kernel", rodata_end, _sdata); + + debug_checkwx(); + diff --git a/queue-4.17/x86-mm-init-remove-freed-kernel-image-areas-from-alias-mapping.patch b/queue-4.17/x86-mm-init-remove-freed-kernel-image-areas-from-alias-mapping.patch new file mode 100644 index 00000000000..1ecf92ec864 --- /dev/null +++ b/queue-4.17/x86-mm-init-remove-freed-kernel-image-areas-from-alias-mapping.patch @@ -0,0 +1,200 @@ +From c40a56a7818cfe735fc93a69e1875f8bba834483 Mon Sep 17 00:00:00 2001 +From: Dave Hansen +Date: Thu, 2 Aug 2018 15:58:31 -0700 +Subject: x86/mm/init: Remove freed kernel image areas from alias mapping + +From: Dave Hansen + +commit c40a56a7818cfe735fc93a69e1875f8bba834483 upstream. + +The kernel image is mapped into two places in the virtual address space +(addresses without KASLR, of course): + + 1. The kernel direct map (0xffff880000000000) + 2. The "high kernel map" (0xffffffff81000000) + +We actually execute out of #2. If we get the address of a kernel symbol, +it points to #2, but almost all physical-to-virtual translations point to + +Parts of the "high kernel map" alias are mapped in the userspace page +tables with the Global bit for performance reasons. The parts that we map +to userspace do not (er, should not) have secrets. When PTI is enabled then +the global bit is usually not set in the high mapping and just used to +compensate for poor performance on systems which lack PCID. + +This is fine, except that some areas in the kernel image that are adjacent +to the non-secret-containing areas are unused holes. We free these holes +back into the normal page allocator and reuse them as normal kernel memory. +The memory will, of course, get *used* via the normal map, but the alias +mapping is kept. + +This otherwise unused alias mapping of the holes will, by default keep the +Global bit, be mapped out to userspace, and be vulnerable to Meltdown. + +Remove the alias mapping of these pages entirely. This is likely to +fracture the 2M page mapping the kernel image near these areas, but this +should affect a minority of the area. + +The pageattr code changes *all* aliases mapping the physical pages that it +operates on (by default). We only want to modify a single alias, so we +need to tweak its behavior. + +This unmapping behavior is currently dependent on PTI being in place. +Going forward, we should at least consider doing this for all +configurations. Having an extra read-write alias for memory is not exactly +ideal for debugging things like random memory corruption and this does +undercut features like DEBUG_PAGEALLOC or future work like eXclusive Page +Frame Ownership (XPFO). + +Before this patch: + +current_kernel:---[ High Kernel Mapping ]--- +current_kernel-0xffffffff80000000-0xffffffff81000000 16M pmd +current_kernel-0xffffffff81000000-0xffffffff81e00000 14M ro PSE GLB x pmd +current_kernel-0xffffffff81e00000-0xffffffff81e11000 68K ro GLB x pte +current_kernel-0xffffffff81e11000-0xffffffff82000000 1980K RW NX pte +current_kernel-0xffffffff82000000-0xffffffff82600000 6M ro PSE GLB NX pmd +current_kernel-0xffffffff82600000-0xffffffff82c00000 6M RW PSE NX pmd +current_kernel-0xffffffff82c00000-0xffffffff82e00000 2M RW NX pte +current_kernel-0xffffffff82e00000-0xffffffff83200000 4M RW PSE NX pmd +current_kernel-0xffffffff83200000-0xffffffffa0000000 462M pmd + + current_user:---[ High Kernel Mapping ]--- + current_user-0xffffffff80000000-0xffffffff81000000 16M pmd + current_user-0xffffffff81000000-0xffffffff81e00000 14M ro PSE GLB x pmd + current_user-0xffffffff81e00000-0xffffffff81e11000 68K ro GLB x pte + current_user-0xffffffff81e11000-0xffffffff82000000 1980K RW NX pte + current_user-0xffffffff82000000-0xffffffff82600000 6M ro PSE GLB NX pmd + current_user-0xffffffff82600000-0xffffffffa0000000 474M pmd + +After this patch: + +current_kernel:---[ High Kernel Mapping ]--- +current_kernel-0xffffffff80000000-0xffffffff81000000 16M pmd +current_kernel-0xffffffff81000000-0xffffffff81e00000 14M ro PSE GLB x pmd +current_kernel-0xffffffff81e00000-0xffffffff81e11000 68K ro GLB x pte +current_kernel-0xffffffff81e11000-0xffffffff82000000 1980K pte +current_kernel-0xffffffff82000000-0xffffffff82400000 4M ro PSE GLB NX pmd +current_kernel-0xffffffff82400000-0xffffffff82488000 544K ro NX pte +current_kernel-0xffffffff82488000-0xffffffff82600000 1504K pte +current_kernel-0xffffffff82600000-0xffffffff82c00000 6M RW PSE NX pmd +current_kernel-0xffffffff82c00000-0xffffffff82c0d000 52K RW NX pte +current_kernel-0xffffffff82c0d000-0xffffffff82dc0000 1740K pte + + current_user:---[ High Kernel Mapping ]--- + current_user-0xffffffff80000000-0xffffffff81000000 16M pmd + current_user-0xffffffff81000000-0xffffffff81e00000 14M ro PSE GLB x pmd + current_user-0xffffffff81e00000-0xffffffff81e11000 68K ro GLB x pte + current_user-0xffffffff81e11000-0xffffffff82000000 1980K pte + current_user-0xffffffff82000000-0xffffffff82400000 4M ro PSE GLB NX pmd + current_user-0xffffffff82400000-0xffffffff82488000 544K ro NX pte + current_user-0xffffffff82488000-0xffffffff82600000 1504K pte + current_user-0xffffffff82600000-0xffffffffa0000000 474M pmd + +[ tglx: Do not unmap on 32bit as there is only one mapping ] + +Fixes: 0f561fce4d69 ("x86/pti: Enable global pages for shared areas") +Signed-off-by: Dave Hansen +Signed-off-by: Thomas Gleixner +Cc: Kees Cook +Cc: Andrea Arcangeli +Cc: Juergen Gross +Cc: Josh Poimboeuf +Cc: Greg Kroah-Hartman +Cc: Peter Zijlstra +Cc: Hugh Dickins +Cc: Linus Torvalds +Cc: Borislav Petkov +Cc: Andy Lutomirski +Cc: Andi Kleen +Cc: Joerg Roedel +Link: https://lkml.kernel.org/r/20180802225831.5F6A2BFC@viggo.jf.intel.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/set_memory.h | 1 + + arch/x86/mm/init.c | 26 ++++++++++++++++++++++++-- + arch/x86/mm/pageattr.c | 13 +++++++++++++ + 3 files changed, 38 insertions(+), 2 deletions(-) + +--- a/arch/x86/include/asm/set_memory.h ++++ b/arch/x86/include/asm/set_memory.h +@@ -46,6 +46,7 @@ int set_memory_np(unsigned long addr, in + int set_memory_4k(unsigned long addr, int numpages); + int set_memory_encrypted(unsigned long addr, int numpages); + int set_memory_decrypted(unsigned long addr, int numpages); ++int set_memory_np_noalias(unsigned long addr, int numpages); + + int set_memory_array_uc(unsigned long *addr, int addrinarray); + int set_memory_array_wc(unsigned long *addr, int addrinarray); +--- a/arch/x86/mm/init.c ++++ b/arch/x86/mm/init.c +@@ -782,8 +782,30 @@ void free_init_pages(char *what, unsigne + */ + void free_kernel_image_pages(void *begin, void *end) + { +- free_init_pages("unused kernel image", +- (unsigned long)begin, (unsigned long)end); ++ unsigned long begin_ul = (unsigned long)begin; ++ unsigned long end_ul = (unsigned long)end; ++ unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT; ++ ++ ++ free_init_pages("unused kernel image", begin_ul, end_ul); ++ ++ /* ++ * PTI maps some of the kernel into userspace. For performance, ++ * this includes some kernel areas that do not contain secrets. ++ * Those areas might be adjacent to the parts of the kernel image ++ * being freed, which may contain secrets. Remove the "high kernel ++ * image mapping" for these freed areas, ensuring they are not even ++ * potentially vulnerable to Meltdown regardless of the specific ++ * optimizations PTI is currently using. ++ * ++ * The "noalias" prevents unmapping the direct map alias which is ++ * needed to access the freed pages. ++ * ++ * This is only valid for 64bit kernels. 32bit has only one mapping ++ * which can't be treated in this way for obvious reasons. ++ */ ++ if (IS_ENABLED(CONFIG_X86_64) && cpu_feature_enabled(X86_FEATURE_PTI)) ++ set_memory_np_noalias(begin_ul, len_pages); + } + + void __ref free_initmem(void) +--- a/arch/x86/mm/pageattr.c ++++ b/arch/x86/mm/pageattr.c +@@ -53,6 +53,7 @@ static DEFINE_SPINLOCK(cpa_lock); + #define CPA_FLUSHTLB 1 + #define CPA_ARRAY 2 + #define CPA_PAGES_ARRAY 4 ++#define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */ + + #ifdef CONFIG_PROC_FS + static unsigned long direct_pages_count[PG_LEVEL_NUM]; +@@ -1486,6 +1487,9 @@ static int change_page_attr_set_clr(unsi + + /* No alias checking for _NX bit modifications */ + checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; ++ /* Has caller explicitly disabled alias checking? */ ++ if (in_flag & CPA_NO_CHECK_ALIAS) ++ checkalias = 0; + + ret = __change_page_attr_set_clr(&cpa, checkalias); + +@@ -1772,6 +1776,15 @@ int set_memory_np(unsigned long addr, in + return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0); + } + ++int set_memory_np_noalias(unsigned long addr, int numpages) ++{ ++ int cpa_flags = CPA_NO_CHECK_ALIAS; ++ ++ return change_page_attr_set_clr(&addr, numpages, __pgprot(0), ++ __pgprot(_PAGE_PRESENT), 0, ++ cpa_flags, NULL); ++} ++ + int set_memory_4k(unsigned long addr, int numpages) + { + return change_page_attr_set_clr(&addr, numpages, __pgprot(0),