From 9b40e1ee7ee5a4ac89b8b490fed6dc97c3ef7939 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 2 Oct 2014 20:11:23 -0700 Subject: [PATCH] 3.14-stable patches added patches: cgroup-fix-unbalanced-locking.patch kvm-s390-mm-try-a-cow-on-read-only-pages-for-key-ops.patch memblock-memhotplug-fix-wrong-type-in-memblock_find_in_range_node.patch regmap-don-t-attempt-block-writes-when-syncing-cache-on-single_rw-devices.patch regmap-fix-handling-of-volatile-registers-for-format_write-chips.patch shmem-fix-nlink-for-rename-overwrite-directory.patch x86-early_ioremap-increase-fix_btmaps_slots-to-8.patch x86-kaslr-avoid-the-setup_data-area-when-picking-location.patch x86-xen-don-t-copy-bogus-duplicate-entries-into-kernel-page-tables.patch xen-manage-always-freeze-thaw-processes-when-suspend-resuming.patch --- .../cgroup-fix-unbalanced-locking.patch | 36 +++++ ...a-cow-on-read-only-pages-for-key-ops.patch | 45 ++++++ ...-type-in-memblock_find_in_range_node.patch | 48 +++++++ ...n-syncing-cache-on-single_rw-devices.patch | 33 +++++ ...ile-registers-for-format_write-chips.patch | 39 ++++++ queue-3.14/series | 10 ++ ...nlink-for-rename-overwrite-directory.patch | 77 ++++++++++ ...remap-increase-fix_btmaps_slots-to-8.patch | 109 +++++++++++++++ ...etup_data-area-when-picking-location.patch | 59 ++++++++ ...cate-entries-into-kernel-page-tables.patch | 131 ++++++++++++++++++ ...thaw-processes-when-suspend-resuming.patch | 59 ++++++++ 11 files changed, 646 insertions(+) create mode 100644 queue-3.14/cgroup-fix-unbalanced-locking.patch create mode 100644 queue-3.14/kvm-s390-mm-try-a-cow-on-read-only-pages-for-key-ops.patch create mode 100644 queue-3.14/memblock-memhotplug-fix-wrong-type-in-memblock_find_in_range_node.patch create mode 100644 queue-3.14/regmap-don-t-attempt-block-writes-when-syncing-cache-on-single_rw-devices.patch create mode 100644 queue-3.14/regmap-fix-handling-of-volatile-registers-for-format_write-chips.patch create mode 100644 queue-3.14/shmem-fix-nlink-for-rename-overwrite-directory.patch create mode 100644 queue-3.14/x86-early_ioremap-increase-fix_btmaps_slots-to-8.patch create mode 100644 queue-3.14/x86-kaslr-avoid-the-setup_data-area-when-picking-location.patch create mode 100644 queue-3.14/x86-xen-don-t-copy-bogus-duplicate-entries-into-kernel-page-tables.patch create mode 100644 queue-3.14/xen-manage-always-freeze-thaw-processes-when-suspend-resuming.patch diff --git a/queue-3.14/cgroup-fix-unbalanced-locking.patch b/queue-3.14/cgroup-fix-unbalanced-locking.patch new file mode 100644 index 00000000000..88e9b897097 --- /dev/null +++ b/queue-3.14/cgroup-fix-unbalanced-locking.patch @@ -0,0 +1,36 @@ +From eb4aec84d6bdf98d00cedb41c18000f7a31e648a Mon Sep 17 00:00:00 2001 +From: Zefan Li +Date: Thu, 18 Sep 2014 17:28:46 +0800 +Subject: cgroup: fix unbalanced locking + +From: Zefan Li + +commit eb4aec84d6bdf98d00cedb41c18000f7a31e648a upstream. + +cgroup_pidlist_start() holds cgrp->pidlist_mutex and then calls +pidlist_array_load(), and cgroup_pidlist_stop() releases the mutex. + +It is wrong that we release the mutex in the failure path in +pidlist_array_load(), because cgroup_pidlist_stop() will be called +no matter if cgroup_pidlist_start() returns errno or not. + +Fixes: 4bac00d16a8760eae7205e41d2c246477d42a210 +Signed-off-by: Zefan Li +Signed-off-by: Tejun Heo +Acked-by: Cong Wang +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/cgroup.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/kernel/cgroup.c ++++ b/kernel/cgroup.c +@@ -3663,7 +3663,6 @@ static int pidlist_array_load(struct cgr + + l = cgroup_pidlist_find_create(cgrp, type); + if (!l) { +- mutex_unlock(&cgrp->pidlist_mutex); + pidlist_free(array); + return -ENOMEM; + } diff --git a/queue-3.14/kvm-s390-mm-try-a-cow-on-read-only-pages-for-key-ops.patch b/queue-3.14/kvm-s390-mm-try-a-cow-on-read-only-pages-for-key-ops.patch new file mode 100644 index 00000000000..822aeb56a8d --- /dev/null +++ b/queue-3.14/kvm-s390-mm-try-a-cow-on-read-only-pages-for-key-ops.patch @@ -0,0 +1,45 @@ +From ab3f285f227fec62868037e9b1b1fd18294a83b8 Mon Sep 17 00:00:00 2001 +From: Christian Borntraeger +Date: Tue, 19 Aug 2014 16:19:35 +0200 +Subject: KVM: s390/mm: try a cow on read only pages for key ops + +From: Christian Borntraeger + +commit ab3f285f227fec62868037e9b1b1fd18294a83b8 upstream. + +The PFMF instruction handler blindly wrote the storage key even if +the page was mapped R/O in the host. Lets try a COW before continuing +and bail out in case of errors. + +Signed-off-by: Christian Borntraeger +Reviewed-by: Dominik Dingel +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/mm/pgtable.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/arch/s390/mm/pgtable.c ++++ b/arch/s390/mm/pgtable.c +@@ -810,11 +810,21 @@ int set_guest_storage_key(struct mm_stru + pte_t *ptep; + + down_read(&mm->mmap_sem); ++retry: + ptep = get_locked_pte(current->mm, addr, &ptl); + if (unlikely(!ptep)) { + up_read(&mm->mmap_sem); + return -EFAULT; + } ++ if (!(pte_val(*ptep) & _PAGE_INVALID) && ++ (pte_val(*ptep) & _PAGE_PROTECT)) { ++ pte_unmap_unlock(*ptep, ptl); ++ if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) { ++ up_read(&mm->mmap_sem); ++ return -EFAULT; ++ } ++ goto retry; ++ } + + new = old = pgste_get_lock(ptep); + pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | diff --git a/queue-3.14/memblock-memhotplug-fix-wrong-type-in-memblock_find_in_range_node.patch b/queue-3.14/memblock-memhotplug-fix-wrong-type-in-memblock_find_in_range_node.patch new file mode 100644 index 00000000000..d47b8357676 --- /dev/null +++ b/queue-3.14/memblock-memhotplug-fix-wrong-type-in-memblock_find_in_range_node.patch @@ -0,0 +1,48 @@ +From 0cfb8f0c3e21e36d4a6e472e4c419d58ba848698 Mon Sep 17 00:00:00 2001 +From: Tang Chen +Date: Fri, 29 Aug 2014 15:18:31 -0700 +Subject: memblock, memhotplug: fix wrong type in memblock_find_in_range_node(). + +From: Tang Chen + +commit 0cfb8f0c3e21e36d4a6e472e4c419d58ba848698 upstream. + +In memblock_find_in_range_node(), we defined ret as int. But it should +be phys_addr_t because it is used to store the return value from +__memblock_find_range_bottom_up(). + +The bug has not been triggered because when allocating low memory near +the kernel end, the "int ret" won't turn out to be negative. When we +started to allocate memory on other nodes, and the "int ret" could be +minus. Then the kernel will panic. + +A simple way to reproduce this: comment out the following code in +numa_init(), + + memblock_set_bottom_up(false); + +and the kernel won't boot. + +Reported-by: Xishi Qiu +Signed-off-by: Tang Chen +Tested-by: Xishi Qiu +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memblock.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/mm/memblock.c ++++ b/mm/memblock.c +@@ -183,8 +183,7 @@ phys_addr_t __init_memblock memblock_fin + phys_addr_t align, phys_addr_t start, + phys_addr_t end, int nid) + { +- int ret; +- phys_addr_t kernel_end; ++ phys_addr_t kernel_end, ret; + + /* pump up @end */ + if (end == MEMBLOCK_ALLOC_ACCESSIBLE) diff --git a/queue-3.14/regmap-don-t-attempt-block-writes-when-syncing-cache-on-single_rw-devices.patch b/queue-3.14/regmap-don-t-attempt-block-writes-when-syncing-cache-on-single_rw-devices.patch new file mode 100644 index 00000000000..515d5e4e0a3 --- /dev/null +++ b/queue-3.14/regmap-don-t-attempt-block-writes-when-syncing-cache-on-single_rw-devices.patch @@ -0,0 +1,33 @@ +From 5c1ebe7f73f9166893c3459915db8a09d6d1d715 Mon Sep 17 00:00:00 2001 +From: Mark Brown +Date: Wed, 27 Aug 2014 13:09:12 +0100 +Subject: regmap: Don't attempt block writes when syncing cache on single_rw devices + +From: Mark Brown + +commit 5c1ebe7f73f9166893c3459915db8a09d6d1d715 upstream. + +If the device can't support block writes then don't attempt to use raw +syncing which will automatically generate block writes for adjacent +registers, use the existing _single() block syncing implementation. + +Reported-by: Jarkko Nikula +Tested-by: Jarkko Nikula +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/base/regmap/regcache.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/base/regmap/regcache.c ++++ b/drivers/base/regmap/regcache.c +@@ -701,7 +701,7 @@ int regcache_sync_block(struct regmap *m + unsigned int block_base, unsigned int start, + unsigned int end) + { +- if (regmap_can_raw_write(map)) ++ if (regmap_can_raw_write(map) && !map->use_single_rw) + return regcache_sync_block_raw(map, block, cache_present, + block_base, start, end); + else diff --git a/queue-3.14/regmap-fix-handling-of-volatile-registers-for-format_write-chips.patch b/queue-3.14/regmap-fix-handling-of-volatile-registers-for-format_write-chips.patch new file mode 100644 index 00000000000..de062c13a89 --- /dev/null +++ b/queue-3.14/regmap-fix-handling-of-volatile-registers-for-format_write-chips.patch @@ -0,0 +1,39 @@ +From 5844a8b9d98ec11ce1d77610daacf3f0a0e14715 Mon Sep 17 00:00:00 2001 +From: Mark Brown +Date: Tue, 26 Aug 2014 12:12:17 +0100 +Subject: regmap: Fix handling of volatile registers for format_write() chips + +From: Mark Brown + +commit 5844a8b9d98ec11ce1d77610daacf3f0a0e14715 upstream. + +A previous over-zealous factorisation of code means that we only treat +registers as volatile if they are readable. For most devices this is fine +since normally most registers can be read and volatility implies +readability but for format_write() devices where there is no readback from +the hardware and we use volatility to mean simply uncacheability this means +that we end up treating all registers as cacheble. + +A bigger refactoring of the code to clarify this is in order but as a fix +make a minimal change and only check readability when checking volatility +if there is no format_write() operation defined for the device. + +Signed-off-by: Mark Brown +Tested-by: Lars-Peter Clausen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/base/regmap/regmap.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/base/regmap/regmap.c ++++ b/drivers/base/regmap/regmap.c +@@ -105,7 +105,7 @@ bool regmap_readable(struct regmap *map, + + bool regmap_volatile(struct regmap *map, unsigned int reg) + { +- if (!regmap_readable(map, reg)) ++ if (!map->format.format_write && !regmap_readable(map, reg)) + return false; + + if (map->volatile_reg) diff --git a/queue-3.14/series b/queue-3.14/series index 09b9ffd90c1..e983b498381 100644 --- a/queue-3.14/series +++ b/queue-3.14/series @@ -120,3 +120,13 @@ mips-mcount-adjust-stack-pointer-for-static-trace-in-mips32.patch acpica-update-to-gpio-region-handler-interface.patch acpi-hotplug-generate-online-uevents-for-acpi-containers.patch acpi-scan-correct-error-return-value-of-create_modalias.patch +memblock-memhotplug-fix-wrong-type-in-memblock_find_in_range_node.patch +regmap-fix-handling-of-volatile-registers-for-format_write-chips.patch +regmap-don-t-attempt-block-writes-when-syncing-cache-on-single_rw-devices.patch +cgroup-fix-unbalanced-locking.patch +kvm-s390-mm-try-a-cow-on-read-only-pages-for-key-ops.patch +xen-manage-always-freeze-thaw-processes-when-suspend-resuming.patch +x86-xen-don-t-copy-bogus-duplicate-entries-into-kernel-page-tables.patch +x86-early_ioremap-increase-fix_btmaps_slots-to-8.patch +x86-kaslr-avoid-the-setup_data-area-when-picking-location.patch +shmem-fix-nlink-for-rename-overwrite-directory.patch diff --git a/queue-3.14/shmem-fix-nlink-for-rename-overwrite-directory.patch b/queue-3.14/shmem-fix-nlink-for-rename-overwrite-directory.patch new file mode 100644 index 00000000000..b0c3ab0567f --- /dev/null +++ b/queue-3.14/shmem-fix-nlink-for-rename-overwrite-directory.patch @@ -0,0 +1,77 @@ +From b928095b0a7cff7fb9fcf4c706348ceb8ab2c295 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Wed, 24 Sep 2014 17:56:17 +0200 +Subject: shmem: fix nlink for rename overwrite directory + +From: Miklos Szeredi + +commit b928095b0a7cff7fb9fcf4c706348ceb8ab2c295 upstream. + +If overwriting an empty directory with rename, then need to drop the extra +nlink. + +Test prog: + +#include +#include +#include +#include + +int main(void) +{ + const char *test_dir1 = "test-dir1"; + const char *test_dir2 = "test-dir2"; + int res; + int fd; + struct stat statbuf; + + res = mkdir(test_dir1, 0777); + if (res == -1) + err(1, "mkdir(\"%s\")", test_dir1); + + res = mkdir(test_dir2, 0777); + if (res == -1) + err(1, "mkdir(\"%s\")", test_dir2); + + fd = open(test_dir2, O_RDONLY); + if (fd == -1) + err(1, "open(\"%s\")", test_dir2); + + res = rename(test_dir1, test_dir2); + if (res == -1) + err(1, "rename(\"%s\", \"%s\")", test_dir1, test_dir2); + + res = fstat(fd, &statbuf); + if (res == -1) + err(1, "fstat(%i)", fd); + + if (statbuf.st_nlink != 0) { + fprintf(stderr, "nlink is %lu, should be 0\n", statbuf.st_nlink); + return 1; + } + + return 0; +} + +Signed-off-by: Miklos Szeredi +Signed-off-by: Al Viro +Signed-off-by: Greg Kroah-Hartman + +--- + mm/shmem.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -2143,8 +2143,10 @@ static int shmem_rename(struct inode *ol + + if (new_dentry->d_inode) { + (void) shmem_unlink(new_dir, new_dentry); +- if (they_are_dirs) ++ if (they_are_dirs) { ++ drop_nlink(new_dentry->d_inode); + drop_nlink(old_dir); ++ } + } else if (they_are_dirs) { + drop_nlink(old_dir); + inc_nlink(new_dir); diff --git a/queue-3.14/x86-early_ioremap-increase-fix_btmaps_slots-to-8.patch b/queue-3.14/x86-early_ioremap-increase-fix_btmaps_slots-to-8.patch new file mode 100644 index 00000000000..bffcd1d1d7b --- /dev/null +++ b/queue-3.14/x86-early_ioremap-increase-fix_btmaps_slots-to-8.patch @@ -0,0 +1,109 @@ +From 3eddc69ffeba092d288c386646bfa5ec0fce25fd Mon Sep 17 00:00:00 2001 +From: Dave Young +Date: Tue, 26 Aug 2014 17:06:41 +0800 +Subject: x86 early_ioremap: Increase FIX_BTMAPS_SLOTS to 8 + +From: Dave Young + +commit 3eddc69ffeba092d288c386646bfa5ec0fce25fd upstream. + +3.16 kernel boot fail with earlyprintk=efi, it keeps scrolling at the +bottom line of screen. + +Bisected, the first bad commit is below: +commit 86dfc6f339886559d80ee0d4bd20fe5ee90450f0 +Author: Lv Zheng +Date: Fri Apr 4 12:38:57 2014 +0800 + + ACPICA: Tables: Fix table checksums verification before installation. + +I did some debugging by enabling both serial and efi earlyprintk, below is +some debug dmesg, seems early_ioremap fails in scroll up function due to +no free slot, see below dmesg output: + + WARNING: CPU: 0 PID: 0 at mm/early_ioremap.c:116 __early_ioremap+0x90/0x1c4() + __early_ioremap(ed00c800, 00000c80) not found slot + Modules linked in: + CPU: 0 PID: 0 Comm: swapper Not tainted 3.17.0-rc1+ #204 + Hardware name: Hewlett-Packard HP Z420 Workstation/1589, BIOS J61 v03.15 05/09/2013 + Call Trace: + dump_stack+0x4e/0x7a + warn_slowpath_common+0x75/0x8e + ? __early_ioremap+0x90/0x1c4 + warn_slowpath_fmt+0x47/0x49 + __early_ioremap+0x90/0x1c4 + ? sprintf+0x46/0x48 + early_ioremap+0x13/0x15 + early_efi_map+0x24/0x26 + early_efi_scroll_up+0x6d/0xc0 + early_efi_write+0x1b0/0x214 + call_console_drivers.constprop.21+0x73/0x7e + console_unlock+0x151/0x3b2 + ? vprintk_emit+0x49f/0x532 + vprintk_emit+0x521/0x532 + ? console_unlock+0x383/0x3b2 + printk+0x4f/0x51 + acpi_os_vprintf+0x2b/0x2d + acpi_os_printf+0x43/0x45 + acpi_info+0x5c/0x63 + ? __acpi_map_table+0x13/0x18 + ? acpi_os_map_iomem+0x21/0x147 + acpi_tb_print_table_header+0x177/0x186 + acpi_tb_install_table_with_override+0x4b/0x62 + acpi_tb_install_standard_table+0xd9/0x215 + ? early_ioremap+0x13/0x15 + ? __acpi_map_table+0x13/0x18 + acpi_tb_parse_root_table+0x16e/0x1b4 + acpi_initialize_tables+0x57/0x59 + acpi_table_init+0x50/0xce + acpi_boot_table_init+0x1e/0x85 + setup_arch+0x9b7/0xcc4 + start_kernel+0x94/0x42d + ? early_idt_handlers+0x120/0x120 + x86_64_start_reservations+0x2a/0x2c + x86_64_start_kernel+0xf3/0x100 + +Quote reply from Lv.zheng about the early ioremap slot usage in this case: + +""" +In early_efi_scroll_up(), 2 mapping entries will be used for the src/dst screen buffer. +In drivers/acpi/acpica/tbutils.c, we've improved the early table loading code in acpi_tb_parse_root_table(). +We now need 2 mapping entries: +1. One mapping entry is used for RSDT table mapping. Each RSDT entry contains an address for another ACPI table. +2. For each entry in RSDP, we need another mapping entry to map the table to perform necessary check/override before installing it. + +When acpi_tb_parse_root_table() prints something through EFI earlyprintk console, we'll have 4 mapping entries used. +The current 4 slots setting of early_ioremap() seems to be too small for such a use case. +""" + +Thus increase the slot to 8 in this patch to fix this issue. +boot-time mappings become 512 page with this patch. + +Signed-off-by: Dave Young +Signed-off-by: Matt Fleming +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/fixmap.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/x86/include/asm/fixmap.h ++++ b/arch/x86/include/asm/fixmap.h +@@ -123,14 +123,14 @@ enum fixed_addresses { + __end_of_permanent_fixed_addresses, + + /* +- * 256 temporary boot-time mappings, used by early_ioremap(), ++ * 512 temporary boot-time mappings, used by early_ioremap(), + * before ioremap() is functional. + * +- * If necessary we round it up to the next 256 pages boundary so ++ * If necessary we round it up to the next 512 pages boundary so + * that we can have a single pgd entry and a single pte table: + */ + #define NR_FIX_BTMAPS 64 +-#define FIX_BTMAPS_SLOTS 4 ++#define FIX_BTMAPS_SLOTS 8 + #define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) + FIX_BTMAP_END = + (__end_of_permanent_fixed_addresses ^ diff --git a/queue-3.14/x86-kaslr-avoid-the-setup_data-area-when-picking-location.patch b/queue-3.14/x86-kaslr-avoid-the-setup_data-area-when-picking-location.patch new file mode 100644 index 00000000000..bcc09ae56d8 --- /dev/null +++ b/queue-3.14/x86-kaslr-avoid-the-setup_data-area-when-picking-location.patch @@ -0,0 +1,59 @@ +From 0cacbfbeb5077b63d5d3cf6df88b14ac12ad584b Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Thu, 11 Sep 2014 09:19:31 -0700 +Subject: x86/kaslr: Avoid the setup_data area when picking location + +From: Kees Cook + +commit 0cacbfbeb5077b63d5d3cf6df88b14ac12ad584b upstream. + +The KASLR location-choosing logic needs to avoid the setup_data +list memory areas as well. Without this, it would be possible to +have the ASLR position stomp on the memory, ultimately causing +the boot to fail. + +Signed-off-by: Kees Cook +Tested-by: Baoquan He +Cc: Vivek Goyal +Cc: Rafael J. Wysocki +Cc: Wei Yongjun +Cc: Pavel Machek +Cc: Linus Torvalds +Link: http://lkml.kernel.org/r/20140911161931.GA12001@www.outflux.net +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/boot/compressed/aslr.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +--- a/arch/x86/boot/compressed/aslr.c ++++ b/arch/x86/boot/compressed/aslr.c +@@ -183,12 +183,27 @@ static void mem_avoid_init(unsigned long + static bool mem_avoid_overlap(struct mem_vector *img) + { + int i; ++ struct setup_data *ptr; + + for (i = 0; i < MEM_AVOID_MAX; i++) { + if (mem_overlaps(img, &mem_avoid[i])) + return true; + } + ++ /* Avoid all entries in the setup_data linked list. */ ++ ptr = (struct setup_data *)(unsigned long)real_mode->hdr.setup_data; ++ while (ptr) { ++ struct mem_vector avoid; ++ ++ avoid.start = (u64)ptr; ++ avoid.size = sizeof(*ptr) + ptr->len; ++ ++ if (mem_overlaps(img, &avoid)) ++ return true; ++ ++ ptr = (struct setup_data *)(unsigned long)ptr->next; ++ } ++ + return false; + } + diff --git a/queue-3.14/x86-xen-don-t-copy-bogus-duplicate-entries-into-kernel-page-tables.patch b/queue-3.14/x86-xen-don-t-copy-bogus-duplicate-entries-into-kernel-page-tables.patch new file mode 100644 index 00000000000..807056d313b --- /dev/null +++ b/queue-3.14/x86-xen-don-t-copy-bogus-duplicate-entries-into-kernel-page-tables.patch @@ -0,0 +1,131 @@ +From 0b5a50635fc916cf46e3de0b819a61fc3f17e7ee Mon Sep 17 00:00:00 2001 +From: Stefan Bader +Date: Tue, 2 Sep 2014 11:16:01 +0100 +Subject: x86/xen: don't copy bogus duplicate entries into kernel page tables + +From: Stefan Bader + +commit 0b5a50635fc916cf46e3de0b819a61fc3f17e7ee upstream. + +When RANDOMIZE_BASE (KASLR) is enabled; or the sum of all loaded +modules exceeds 512 MiB, then loading modules fails with a warning +(and hence a vmalloc allocation failure) because the PTEs for the +newly-allocated vmalloc address space are not zero. + + WARNING: CPU: 0 PID: 494 at linux/mm/vmalloc.c:128 + vmap_page_range_noflush+0x2a1/0x360() + +This is caused by xen_setup_kernel_pagetables() copying +level2_kernel_pgt into level2_fixmap_pgt, overwriting many non-present +entries. + +Without KASLR, the normal kernel image size only covers the first half +of level2_kernel_pgt and module space starts after that. + +L4[511]->level3_kernel_pgt[510]->level2_kernel_pgt[ 0..255]->kernel + [256..511]->module + [511]->level2_fixmap_pgt[ 0..505]->module + +This allows 512 MiB of of module vmalloc space to be used before +having to use the corrupted level2_fixmap_pgt entries. + +With KASLR enabled, the kernel image uses the full PUD range of 1G and +module space starts in the level2_fixmap_pgt. So basically: + +L4[511]->level3_kernel_pgt[510]->level2_kernel_pgt[0..511]->kernel + [511]->level2_fixmap_pgt[0..505]->module + +And now no module vmalloc space can be used without using the corrupt +level2_fixmap_pgt entries. + +Fix this by properly converting the level2_fixmap_pgt entries to MFNs, +and setting level1_fixmap_pgt as read-only. + +A number of comments were also using the the wrong L3 offset for +level2_kernel_pgt. These have been corrected. + +Signed-off-by: Stefan Bader +Signed-off-by: David Vrabel +Reviewed-by: Boris Ostrovsky +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/pgtable_64.h | 1 + + arch/x86/xen/mmu.c | 27 ++++++++++++--------------- + 2 files changed, 13 insertions(+), 15 deletions(-) + +--- a/arch/x86/include/asm/pgtable_64.h ++++ b/arch/x86/include/asm/pgtable_64.h +@@ -19,6 +19,7 @@ extern pud_t level3_ident_pgt[512]; + extern pmd_t level2_kernel_pgt[512]; + extern pmd_t level2_fixmap_pgt[512]; + extern pmd_t level2_ident_pgt[512]; ++extern pte_t level1_fixmap_pgt[512]; + extern pgd_t init_level4_pgt[]; + + #define swapper_pg_dir init_level4_pgt +--- a/arch/x86/xen/mmu.c ++++ b/arch/x86/xen/mmu.c +@@ -1866,12 +1866,11 @@ static void __init check_pt_base(unsigne + * + * We can construct this by grafting the Xen provided pagetable into + * head_64.S's preconstructed pagetables. We copy the Xen L2's into +- * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This +- * means that only the kernel has a physical mapping to start with - +- * but that's enough to get __va working. We need to fill in the rest +- * of the physical mapping once some sort of allocator has been set +- * up. +- * NOTE: for PVH, the page tables are native. ++ * level2_ident_pgt, and level2_kernel_pgt. This means that only the ++ * kernel has a physical mapping to start with - but that's enough to ++ * get __va working. We need to fill in the rest of the physical ++ * mapping once some sort of allocator has been set up. NOTE: for ++ * PVH, the page tables are native. + */ + void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) + { +@@ -1902,8 +1901,11 @@ void __init xen_setup_kernel_pagetable(p + /* L3_i[0] -> level2_ident_pgt */ + convert_pfn_mfn(level3_ident_pgt); + /* L3_k[510] -> level2_kernel_pgt +- * L3_i[511] -> level2_fixmap_pgt */ ++ * L3_k[511] -> level2_fixmap_pgt */ + convert_pfn_mfn(level3_kernel_pgt); ++ ++ /* L3_k[511][506] -> level1_fixmap_pgt */ ++ convert_pfn_mfn(level2_fixmap_pgt); + } + /* We get [511][511] and have Xen's version of level2_kernel_pgt */ + l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); +@@ -1913,21 +1915,15 @@ void __init xen_setup_kernel_pagetable(p + addr[1] = (unsigned long)l3; + addr[2] = (unsigned long)l2; + /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: +- * Both L4[272][0] and L4[511][511] have entries that point to the same ++ * Both L4[272][0] and L4[511][510] have entries that point to the same + * L2 (PMD) tables. Meaning that if you modify it in __va space + * it will be also modified in the __ka space! (But if you just + * modify the PMD table to point to other PTE's or none, then you + * are OK - which is what cleanup_highmap does) */ + copy_page(level2_ident_pgt, l2); +- /* Graft it onto L4[511][511] */ ++ /* Graft it onto L4[511][510] */ + copy_page(level2_kernel_pgt, l2); + +- /* Get [511][510] and graft that in level2_fixmap_pgt */ +- l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); +- l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); +- copy_page(level2_fixmap_pgt, l2); +- /* Note that we don't do anything with level1_fixmap_pgt which +- * we don't need. */ + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Make pagetable pieces RO */ + set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); +@@ -1937,6 +1933,7 @@ void __init xen_setup_kernel_pagetable(p + set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); + set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); ++ set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO); + + /* Pin down new L4 */ + pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, diff --git a/queue-3.14/xen-manage-always-freeze-thaw-processes-when-suspend-resuming.patch b/queue-3.14/xen-manage-always-freeze-thaw-processes-when-suspend-resuming.patch new file mode 100644 index 00000000000..b680231876e --- /dev/null +++ b/queue-3.14/xen-manage-always-freeze-thaw-processes-when-suspend-resuming.patch @@ -0,0 +1,59 @@ +From 61a734d305e16944b42730ef582a7171dc733321 Mon Sep 17 00:00:00 2001 +From: Ross Lagerwall +Date: Mon, 18 Aug 2014 10:41:36 +0100 +Subject: xen/manage: Always freeze/thaw processes when suspend/resuming + +From: Ross Lagerwall + +commit 61a734d305e16944b42730ef582a7171dc733321 upstream. + +Always freeze processes when suspending and thaw processes when resuming +to prevent a race noticeable with HVM guests. + +This prevents a deadlock where the khubd kthread (which is designed to +be freezable) acquires a usb device lock and then tries to allocate +memory which requires the disk which hasn't been resumed yet. +Meanwhile, the xenwatch thread deadlocks waiting for the usb device +lock. + +Freezing processes fixes this because the khubd thread is only thawed +after the xenwatch thread finishes resuming all the devices. + +Signed-off-by: Ross Lagerwall +Signed-off-by: David Vrabel +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/manage.c | 7 ------- + 1 file changed, 7 deletions(-) + +--- a/drivers/xen/manage.c ++++ b/drivers/xen/manage.c +@@ -111,16 +111,11 @@ static void do_suspend(void) + + shutting_down = SHUTDOWN_SUSPEND; + +-#ifdef CONFIG_PREEMPT +- /* If the kernel is preemptible, we need to freeze all the processes +- to prevent them from being in the middle of a pagetable update +- during suspend. */ + err = freeze_processes(); + if (err) { + pr_err("%s: freeze failed %d\n", __func__, err); + goto out; + } +-#endif + + err = dpm_suspend_start(PMSG_FREEZE); + if (err) { +@@ -169,10 +164,8 @@ out_resume: + dpm_resume_end(si.cancelled ? PMSG_THAW : PMSG_RESTORE); + + out_thaw: +-#ifdef CONFIG_PREEMPT + thaw_processes(); + out: +-#endif + shutting_down = SHUTDOWN_INVALID; + } + #endif /* CONFIG_HIBERNATE_CALLBACKS */ -- 2.47.3