From: Greg Kroah-Hartman Date: Fri, 3 Oct 2014 03:11:55 +0000 (-0700) Subject: 3.16-stable patches X-Git-Tag: v3.16.4~44 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=14778d34a5f5f826c3db00b3595205c73687df2e;p=thirdparty%2Fkernel%2Fstable-queue.git 3.16-stable patches added patches: arm64-add-brackets-around-user_stack_pointer.patch cgroup-delay-the-clearing-of-cgrp-kn-priv.patch cgroup-fix-unbalanced-locking.patch cgroup-reject-cgroup-names-with-n.patch kvm-s390-fix-user-triggerable-bug-in-dead-code.patch kvm-s390-mm-fix-guest-storage-key-corruption-in-ptep_set_access_flags.patch kvm-s390-mm-fix-storage-key-corruption-during-swapping.patch kvm-s390-mm-try-a-cow-on-read-only-pages-for-key-ops.patch memblock-memhotplug-fix-wrong-type-in-memblock_find_in_range_node.patch regmap-don-t-attempt-block-writes-when-syncing-cache-on-single_rw-devices.patch regmap-fix-handling-of-volatile-registers-for-format_write-chips.patch regmap-fix-regcache-debugfs-initialization.patch shmem-fix-nlink-for-rename-overwrite-directory.patch x86-early_ioremap-increase-fix_btmaps_slots-to-8.patch x86-kaslr-avoid-the-setup_data-area-when-picking-location.patch x86-xen-don-t-copy-bogus-duplicate-entries-into-kernel-page-tables.patch xen-manage-always-freeze-thaw-processes-when-suspend-resuming.patch --- diff --git a/queue-3.16/arm64-add-brackets-around-user_stack_pointer.patch b/queue-3.16/arm64-add-brackets-around-user_stack_pointer.patch new file mode 100644 index 00000000000..7bb9c062594 --- /dev/null +++ b/queue-3.16/arm64-add-brackets-around-user_stack_pointer.patch @@ -0,0 +1,35 @@ +From 2520d039728b2a3c5ae7f79fe2a0e9d182855b12 Mon Sep 17 00:00:00 2001 +From: Catalin Marinas +Date: Fri, 29 Aug 2014 16:08:02 +0100 +Subject: arm64: Add brackets around user_stack_pointer() + +From: Catalin Marinas + +commit 2520d039728b2a3c5ae7f79fe2a0e9d182855b12 upstream. + +Commit 5f888a1d33 (ARM64: perf: support dwarf unwinding in compat mode) +changes user_stack_pointer() to return the compat SP for 32-bit tasks +but without brackets around the whole definition, with possible issues +on the call sites (noticed with a subsequent fix for KSTK_ESP). + +Fixes: 5f888a1d33c4 (ARM64: perf: support dwarf unwinding in compat mode) +Reported-by: Sudeep Holla +Signed-off-by: Catalin Marinas +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm64/include/asm/ptrace.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/arm64/include/asm/ptrace.h ++++ b/arch/arm64/include/asm/ptrace.h +@@ -137,7 +137,7 @@ struct pt_regs { + (!((regs)->pstate & PSR_F_BIT)) + + #define user_stack_pointer(regs) \ +- (!compat_user_mode(regs)) ? ((regs)->sp) : ((regs)->compat_sp) ++ (!compat_user_mode(regs) ? (regs)->sp : (regs)->compat_sp) + + static inline unsigned long regs_return_value(struct pt_regs *regs) + { diff --git a/queue-3.16/cgroup-delay-the-clearing-of-cgrp-kn-priv.patch b/queue-3.16/cgroup-delay-the-clearing-of-cgrp-kn-priv.patch new file mode 100644 index 00000000000..f2ba8989180 --- /dev/null +++ b/queue-3.16/cgroup-delay-the-clearing-of-cgrp-kn-priv.patch @@ -0,0 +1,107 @@ +From a4189487da1b4f8260c6006b9dc47c3c4107a5ae Mon Sep 17 00:00:00 2001 +From: Li Zefan +Date: Thu, 4 Sep 2014 14:43:07 +0800 +Subject: cgroup: delay the clearing of cgrp->kn->priv +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Li Zefan + +commit a4189487da1b4f8260c6006b9dc47c3c4107a5ae upstream. + +Run these two scripts concurrently: + + for ((; ;)) + { + mkdir /cgroup/sub + rmdir /cgroup/sub + } + + for ((; ;)) + { + echo $$ > /cgroup/sub/cgroup.procs + echo $$ > /cgroup/cgroup.procs + } + +A kernel bug will be triggered: + +BUG: unable to handle kernel NULL pointer dereference at 00000038 +IP: [] cgroup_put+0x9/0x80 +... +Call Trace: + [] cgroup_kn_unlock+0x39/0x50 + [] cgroup_kn_lock_live+0x61/0x70 + [] __cgroup_procs_write.isra.26+0x51/0x230 + [] cgroup_tasks_write+0x12/0x20 + [] cgroup_file_write+0x40/0x130 + [] kernfs_fop_write+0xd1/0x160 + [] vfs_write+0x98/0x1e0 + [] SyS_write+0x4d/0xa0 + [] sysenter_do_call+0x12/0x12 + +We clear cgrp->kn->priv in the end of cgroup_rmdir(), but another +concurrent thread can access kn->priv after the clearing. + +We should move the clearing to css_release_work_fn(). At that time +no one is holding reference to the cgroup and no one can gain a new +reference to access it. + +v2: +- move RCU_INIT_POINTER() into the else block. (Tejun) +- remove the cgroup_parent() check. (Tejun) +- update the comment in css_tryget_online_from_dir(). + +Reported-by: Toralf Förster +Signed-off-by: Zefan Li +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/cgroup.c | 21 ++++++++++----------- + 1 file changed, 10 insertions(+), 11 deletions(-) + +--- a/kernel/cgroup.c ++++ b/kernel/cgroup.c +@@ -4242,6 +4242,15 @@ static void css_release_work_fn(struct w + /* cgroup release path */ + cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id); + cgrp->id = -1; ++ ++ /* ++ * There are two control paths which try to determine ++ * cgroup from dentry without going through kernfs - ++ * cgroupstats_build() and css_tryget_online_from_dir(). ++ * Those are supported by RCU protecting clearing of ++ * cgrp->kn->priv backpointer. ++ */ ++ RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv, NULL); + } + + mutex_unlock(&cgroup_mutex); +@@ -4667,16 +4676,6 @@ static int cgroup_rmdir(struct kernfs_no + + cgroup_kn_unlock(kn); + +- /* +- * There are two control paths which try to determine cgroup from +- * dentry without going through kernfs - cgroupstats_build() and +- * css_tryget_online_from_dir(). Those are supported by RCU +- * protecting clearing of cgrp->kn->priv backpointer, which should +- * happen after all files under it have been removed. +- */ +- if (!ret) +- RCU_INIT_POINTER(*(void __rcu __force **)&kn->priv, NULL); +- + cgroup_put(cgrp); + return ret; + } +@@ -5242,7 +5241,7 @@ struct cgroup_subsys_state *css_tryget_o + /* + * This path doesn't originate from kernfs and @kn could already + * have been or be removed at any point. @kn->priv is RCU +- * protected for this access. See cgroup_rmdir() for details. ++ * protected for this access. See css_release_work_fn() for details. + */ + cgrp = rcu_dereference(kn->priv); + if (cgrp) diff --git a/queue-3.16/cgroup-fix-unbalanced-locking.patch b/queue-3.16/cgroup-fix-unbalanced-locking.patch new file mode 100644 index 00000000000..f6d9f6d6676 --- /dev/null +++ b/queue-3.16/cgroup-fix-unbalanced-locking.patch @@ -0,0 +1,36 @@ +From eb4aec84d6bdf98d00cedb41c18000f7a31e648a Mon Sep 17 00:00:00 2001 +From: Zefan Li +Date: Thu, 18 Sep 2014 17:28:46 +0800 +Subject: cgroup: fix unbalanced locking + +From: Zefan Li + +commit eb4aec84d6bdf98d00cedb41c18000f7a31e648a upstream. + +cgroup_pidlist_start() holds cgrp->pidlist_mutex and then calls +pidlist_array_load(), and cgroup_pidlist_stop() releases the mutex. + +It is wrong that we release the mutex in the failure path in +pidlist_array_load(), because cgroup_pidlist_stop() will be called +no matter if cgroup_pidlist_start() returns errno or not. + +Fixes: 4bac00d16a8760eae7205e41d2c246477d42a210 +Signed-off-by: Zefan Li +Signed-off-by: Tejun Heo +Acked-by: Cong Wang +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/cgroup.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/kernel/cgroup.c ++++ b/kernel/cgroup.c +@@ -3833,7 +3833,6 @@ static int pidlist_array_load(struct cgr + + l = cgroup_pidlist_find_create(cgrp, type); + if (!l) { +- mutex_unlock(&cgrp->pidlist_mutex); + pidlist_free(array); + return -ENOMEM; + } diff --git a/queue-3.16/cgroup-reject-cgroup-names-with-n.patch b/queue-3.16/cgroup-reject-cgroup-names-with-n.patch new file mode 100644 index 00000000000..348a203ed51 --- /dev/null +++ b/queue-3.16/cgroup-reject-cgroup-names-with-n.patch @@ -0,0 +1,34 @@ +From 71b1fb5c4473a5b1e601d41b109bdfe001ec82e0 Mon Sep 17 00:00:00 2001 +From: Alban Crequy +Date: Mon, 18 Aug 2014 12:20:20 +0100 +Subject: cgroup: reject cgroup names with '\n' + +From: Alban Crequy + +commit 71b1fb5c4473a5b1e601d41b109bdfe001ec82e0 upstream. + +/proc//cgroup contains one cgroup path on each line. If cgroup names are +allowed to contain "\n", applications cannot parse /proc//cgroup safely. + +Signed-off-by: Alban Crequy +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/cgroup.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/kernel/cgroup.c ++++ b/kernel/cgroup.c +@@ -4393,6 +4393,11 @@ static int cgroup_mkdir(struct kernfs_no + struct kernfs_node *kn; + int ssid, ret; + ++ /* Do not accept '\n' to prevent making /proc//cgroup unparsable. ++ */ ++ if (strchr(name, '\n')) ++ return -EINVAL; ++ + parent = cgroup_kn_lock_live(parent_kn); + if (!parent) + return -ENODEV; diff --git a/queue-3.16/kvm-s390-fix-user-triggerable-bug-in-dead-code.patch b/queue-3.16/kvm-s390-fix-user-triggerable-bug-in-dead-code.patch new file mode 100644 index 00000000000..6b66248a7cb --- /dev/null +++ b/queue-3.16/kvm-s390-fix-user-triggerable-bug-in-dead-code.patch @@ -0,0 +1,50 @@ +From 614a80e474b227cace52fd6e3c790554db8a396e Mon Sep 17 00:00:00 2001 +From: Christian Borntraeger +Date: Wed, 6 Aug 2014 16:17:58 +0200 +Subject: KVM: s390: Fix user triggerable bug in dead code + +From: Christian Borntraeger + +commit 614a80e474b227cace52fd6e3c790554db8a396e upstream. + +In the early days, we had some special handling for the +KVM_EXIT_S390_SIEIC exit, but this was gone in 2009 with commit +d7b0b5eb3000 (KVM: s390: Make psw available on all exits, not +just a subset). + +Now this switch statement is just a sanity check for userspace +not messing with the kvm_run structure. Unfortunately, this +allows userspace to trigger a kernel BUG. Let's just remove +this switch statement. + +Signed-off-by: Christian Borntraeger +Reviewed-by: Cornelia Huck +Reviewed-by: David Hildenbrand +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/kvm/kvm-s390.c | 13 ------------- + 1 file changed, 13 deletions(-) + +--- a/arch/s390/kvm/kvm-s390.c ++++ b/arch/s390/kvm/kvm-s390.c +@@ -1286,19 +1286,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v + + kvm_s390_vcpu_start(vcpu); + +- switch (kvm_run->exit_reason) { +- case KVM_EXIT_S390_SIEIC: +- case KVM_EXIT_UNKNOWN: +- case KVM_EXIT_INTR: +- case KVM_EXIT_S390_RESET: +- case KVM_EXIT_S390_UCONTROL: +- case KVM_EXIT_S390_TSCH: +- case KVM_EXIT_DEBUG: +- break; +- default: +- BUG(); +- } +- + vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; + vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; + if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) { diff --git a/queue-3.16/kvm-s390-mm-fix-guest-storage-key-corruption-in-ptep_set_access_flags.patch b/queue-3.16/kvm-s390-mm-fix-guest-storage-key-corruption-in-ptep_set_access_flags.patch new file mode 100644 index 00000000000..89f945e36a9 --- /dev/null +++ b/queue-3.16/kvm-s390-mm-fix-guest-storage-key-corruption-in-ptep_set_access_flags.patch @@ -0,0 +1,36 @@ +From 1951497d90d6754201af3e65241a06f9ef6755cd Mon Sep 17 00:00:00 2001 +From: Christian Borntraeger +Date: Thu, 28 Aug 2014 23:44:57 +0200 +Subject: KVM: s390/mm: Fix guest storage key corruption in ptep_set_access_flags + +From: Christian Borntraeger + +commit 1951497d90d6754201af3e65241a06f9ef6755cd upstream. + +commit 0944fe3f4a32 ("s390/mm: implement software referenced bits") +triggered another paging/storage key corruption. There is an +unhandled invalid->valid pte change where we have to set the real +storage key from the pgste. +When doing paging a guest page might be swapcache or swap and when +faulted in it might be read-only and due to a parallel scan old. +An do_wp_page will make it writeable and young. Due to software +reference tracking this page was invalid and now becomes valid. + +Signed-off-by: Christian Borntraeger +Acked-by: Martin Schwidefsky +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/include/asm/pgtable.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/s390/include/asm/pgtable.h ++++ b/arch/s390/include/asm/pgtable.h +@@ -1319,6 +1319,7 @@ static inline int ptep_set_access_flags( + ptep_flush_direct(vma->vm_mm, address, ptep); + + if (mm_has_pgste(vma->vm_mm)) { ++ pgste_set_key(ptep, pgste, entry, vma->vm_mm); + pgste = pgste_set_pte(ptep, pgste, entry); + pgste_set_unlock(ptep, pgste); + } else diff --git a/queue-3.16/kvm-s390-mm-fix-storage-key-corruption-during-swapping.patch b/queue-3.16/kvm-s390-mm-fix-storage-key-corruption-during-swapping.patch new file mode 100644 index 00000000000..698b6f5420c --- /dev/null +++ b/queue-3.16/kvm-s390-mm-fix-storage-key-corruption-during-swapping.patch @@ -0,0 +1,49 @@ +From 3e03d4c46daa849880837d802e41c14132a03ef9 Mon Sep 17 00:00:00 2001 +From: Christian Borntraeger +Date: Thu, 28 Aug 2014 21:21:41 +0200 +Subject: KVM: s390/mm: Fix storage key corruption during swapping + +From: Christian Borntraeger + +commit 3e03d4c46daa849880837d802e41c14132a03ef9 upstream. + +Since 3.12 or more precisely commit 0944fe3f4a32 ("s390/mm: +implement software referenced bits") guest storage keys get +corrupted during paging. This commit added another valid->invalid +translation for page tables - namely ptep_test_and_clear_young. +We have to transfer the storage key into the pgste in that case. + +Signed-off-by: Christian Borntraeger +Acked-by: Martin Schwidefsky +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/include/asm/pgtable.h | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/arch/s390/include/asm/pgtable.h ++++ b/arch/s390/include/asm/pgtable.h +@@ -1115,7 +1115,7 @@ static inline int ptep_test_and_clear_yo + unsigned long addr, pte_t *ptep) + { + pgste_t pgste; +- pte_t pte; ++ pte_t pte, oldpte; + int young; + + if (mm_has_pgste(vma->vm_mm)) { +@@ -1123,12 +1123,13 @@ static inline int ptep_test_and_clear_yo + pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); + } + +- pte = *ptep; ++ oldpte = pte = *ptep; + ptep_flush_direct(vma->vm_mm, addr, ptep); + young = pte_young(pte); + pte = pte_mkold(pte); + + if (mm_has_pgste(vma->vm_mm)) { ++ pgste = pgste_update_all(&oldpte, pgste, vma->vm_mm); + pgste = pgste_set_pte(ptep, pgste, pte); + pgste_set_unlock(ptep, pgste); + } else diff --git a/queue-3.16/kvm-s390-mm-try-a-cow-on-read-only-pages-for-key-ops.patch b/queue-3.16/kvm-s390-mm-try-a-cow-on-read-only-pages-for-key-ops.patch new file mode 100644 index 00000000000..bee73007110 --- /dev/null +++ b/queue-3.16/kvm-s390-mm-try-a-cow-on-read-only-pages-for-key-ops.patch @@ -0,0 +1,45 @@ +From ab3f285f227fec62868037e9b1b1fd18294a83b8 Mon Sep 17 00:00:00 2001 +From: Christian Borntraeger +Date: Tue, 19 Aug 2014 16:19:35 +0200 +Subject: KVM: s390/mm: try a cow on read only pages for key ops + +From: Christian Borntraeger + +commit ab3f285f227fec62868037e9b1b1fd18294a83b8 upstream. + +The PFMF instruction handler blindly wrote the storage key even if +the page was mapped R/O in the host. Lets try a COW before continuing +and bail out in case of errors. + +Signed-off-by: Christian Borntraeger +Reviewed-by: Dominik Dingel +Signed-off-by: Greg Kroah-Hartman + +--- + arch/s390/mm/pgtable.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/arch/s390/mm/pgtable.c ++++ b/arch/s390/mm/pgtable.c +@@ -986,11 +986,21 @@ int set_guest_storage_key(struct mm_stru + pte_t *ptep; + + down_read(&mm->mmap_sem); ++retry: + ptep = get_locked_pte(current->mm, addr, &ptl); + if (unlikely(!ptep)) { + up_read(&mm->mmap_sem); + return -EFAULT; + } ++ if (!(pte_val(*ptep) & _PAGE_INVALID) && ++ (pte_val(*ptep) & _PAGE_PROTECT)) { ++ pte_unmap_unlock(*ptep, ptl); ++ if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) { ++ up_read(&mm->mmap_sem); ++ return -EFAULT; ++ } ++ goto retry; ++ } + + new = old = pgste_get_lock(ptep); + pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | diff --git a/queue-3.16/memblock-memhotplug-fix-wrong-type-in-memblock_find_in_range_node.patch b/queue-3.16/memblock-memhotplug-fix-wrong-type-in-memblock_find_in_range_node.patch new file mode 100644 index 00000000000..9c9d2fa3d25 --- /dev/null +++ b/queue-3.16/memblock-memhotplug-fix-wrong-type-in-memblock_find_in_range_node.patch @@ -0,0 +1,48 @@ +From 0cfb8f0c3e21e36d4a6e472e4c419d58ba848698 Mon Sep 17 00:00:00 2001 +From: Tang Chen +Date: Fri, 29 Aug 2014 15:18:31 -0700 +Subject: memblock, memhotplug: fix wrong type in memblock_find_in_range_node(). + +From: Tang Chen + +commit 0cfb8f0c3e21e36d4a6e472e4c419d58ba848698 upstream. + +In memblock_find_in_range_node(), we defined ret as int. But it should +be phys_addr_t because it is used to store the return value from +__memblock_find_range_bottom_up(). + +The bug has not been triggered because when allocating low memory near +the kernel end, the "int ret" won't turn out to be negative. When we +started to allocate memory on other nodes, and the "int ret" could be +minus. Then the kernel will panic. + +A simple way to reproduce this: comment out the following code in +numa_init(), + + memblock_set_bottom_up(false); + +and the kernel won't boot. + +Reported-by: Xishi Qiu +Signed-off-by: Tang Chen +Tested-by: Xishi Qiu +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memblock.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/mm/memblock.c ++++ b/mm/memblock.c +@@ -192,8 +192,7 @@ phys_addr_t __init_memblock memblock_fin + phys_addr_t align, phys_addr_t start, + phys_addr_t end, int nid) + { +- int ret; +- phys_addr_t kernel_end; ++ phys_addr_t kernel_end, ret; + + /* pump up @end */ + if (end == MEMBLOCK_ALLOC_ACCESSIBLE) diff --git a/queue-3.16/regmap-don-t-attempt-block-writes-when-syncing-cache-on-single_rw-devices.patch b/queue-3.16/regmap-don-t-attempt-block-writes-when-syncing-cache-on-single_rw-devices.patch new file mode 100644 index 00000000000..79435aeb0b8 --- /dev/null +++ b/queue-3.16/regmap-don-t-attempt-block-writes-when-syncing-cache-on-single_rw-devices.patch @@ -0,0 +1,33 @@ +From 5c1ebe7f73f9166893c3459915db8a09d6d1d715 Mon Sep 17 00:00:00 2001 +From: Mark Brown +Date: Wed, 27 Aug 2014 13:09:12 +0100 +Subject: regmap: Don't attempt block writes when syncing cache on single_rw devices + +From: Mark Brown + +commit 5c1ebe7f73f9166893c3459915db8a09d6d1d715 upstream. + +If the device can't support block writes then don't attempt to use raw +syncing which will automatically generate block writes for adjacent +registers, use the existing _single() block syncing implementation. + +Reported-by: Jarkko Nikula +Tested-by: Jarkko Nikula +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/base/regmap/regcache.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/base/regmap/regcache.c ++++ b/drivers/base/regmap/regcache.c +@@ -698,7 +698,7 @@ int regcache_sync_block(struct regmap *m + unsigned int block_base, unsigned int start, + unsigned int end) + { +- if (regmap_can_raw_write(map)) ++ if (regmap_can_raw_write(map) && !map->use_single_rw) + return regcache_sync_block_raw(map, block, cache_present, + block_base, start, end); + else diff --git a/queue-3.16/regmap-fix-handling-of-volatile-registers-for-format_write-chips.patch b/queue-3.16/regmap-fix-handling-of-volatile-registers-for-format_write-chips.patch new file mode 100644 index 00000000000..61d351cf35e --- /dev/null +++ b/queue-3.16/regmap-fix-handling-of-volatile-registers-for-format_write-chips.patch @@ -0,0 +1,39 @@ +From 5844a8b9d98ec11ce1d77610daacf3f0a0e14715 Mon Sep 17 00:00:00 2001 +From: Mark Brown +Date: Tue, 26 Aug 2014 12:12:17 +0100 +Subject: regmap: Fix handling of volatile registers for format_write() chips + +From: Mark Brown + +commit 5844a8b9d98ec11ce1d77610daacf3f0a0e14715 upstream. + +A previous over-zealous factorisation of code means that we only treat +registers as volatile if they are readable. For most devices this is fine +since normally most registers can be read and volatility implies +readability but for format_write() devices where there is no readback from +the hardware and we use volatility to mean simply uncacheability this means +that we end up treating all registers as cacheble. + +A bigger refactoring of the code to clarify this is in order but as a fix +make a minimal change and only check readability when checking volatility +if there is no format_write() operation defined for the device. + +Signed-off-by: Mark Brown +Tested-by: Lars-Peter Clausen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/base/regmap/regmap.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/base/regmap/regmap.c ++++ b/drivers/base/regmap/regmap.c +@@ -109,7 +109,7 @@ bool regmap_readable(struct regmap *map, + + bool regmap_volatile(struct regmap *map, unsigned int reg) + { +- if (!regmap_readable(map, reg)) ++ if (!map->format.format_write && !regmap_readable(map, reg)) + return false; + + if (map->volatile_reg) diff --git a/queue-3.16/regmap-fix-regcache-debugfs-initialization.patch b/queue-3.16/regmap-fix-regcache-debugfs-initialization.patch new file mode 100644 index 00000000000..15c303e1b86 --- /dev/null +++ b/queue-3.16/regmap-fix-regcache-debugfs-initialization.patch @@ -0,0 +1,85 @@ +From 5e0cbe78762b5f02986bf9e59a188dad2f6e0be1 Mon Sep 17 00:00:00 2001 +From: Lars-Peter Clausen +Date: Sun, 24 Aug 2014 15:32:27 +0200 +Subject: regmap: Fix regcache debugfs initialization + +From: Lars-Peter Clausen + +commit 5e0cbe78762b5f02986bf9e59a188dad2f6e0be1 upstream. + +Commit 6cfec04bcc05 ("regmap: Separate regmap dev initialization") moved the +regmap debugfs initialization after regcache initialization. This means +that the regmap debugfs directory is not created yet when the cache +initialization runs and so any debugfs files registered by the regcache are +created in the debugfs root directory rather than the debugfs directory of +the regmap instance. Fix this by adding a separate callback for the +regcache debugfs initialization which will be called after the parent +debugfs entry has been created. + +Fixes: 6cfec04bcc05 (regmap: Separate regmap dev initialization) +Signed-off-by: Lars-Peter Clausen +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/base/regmap/internal.h | 3 +++ + drivers/base/regmap/regcache-rbtree.c | 9 +++------ + drivers/base/regmap/regmap-debugfs.c | 3 +++ + 3 files changed, 9 insertions(+), 6 deletions(-) + +--- a/drivers/base/regmap/internal.h ++++ b/drivers/base/regmap/internal.h +@@ -146,6 +146,9 @@ struct regcache_ops { + enum regcache_type type; + int (*init)(struct regmap *map); + int (*exit)(struct regmap *map); ++#ifdef CONFIG_DEBUG_FS ++ void (*debugfs_init)(struct regmap *map); ++#endif + int (*read)(struct regmap *map, unsigned int reg, unsigned int *value); + int (*write)(struct regmap *map, unsigned int reg, unsigned int value); + int (*sync)(struct regmap *map, unsigned int min, unsigned int max); +--- a/drivers/base/regmap/regcache-rbtree.c ++++ b/drivers/base/regmap/regcache-rbtree.c +@@ -194,10 +194,6 @@ static void rbtree_debugfs_init(struct r + { + debugfs_create_file("rbtree", 0400, map->debugfs, map, &rbtree_fops); + } +-#else +-static void rbtree_debugfs_init(struct regmap *map) +-{ +-} + #endif + + static int regcache_rbtree_init(struct regmap *map) +@@ -222,8 +218,6 @@ static int regcache_rbtree_init(struct r + goto err; + } + +- rbtree_debugfs_init(map); +- + return 0; + + err: +@@ -532,6 +526,9 @@ struct regcache_ops regcache_rbtree_ops + .name = "rbtree", + .init = regcache_rbtree_init, + .exit = regcache_rbtree_exit, ++#ifdef CONFIG_DEBUG_FS ++ .debugfs_init = rbtree_debugfs_init, ++#endif + .read = regcache_rbtree_read, + .write = regcache_rbtree_write, + .sync = regcache_rbtree_sync, +--- a/drivers/base/regmap/regmap-debugfs.c ++++ b/drivers/base/regmap/regmap-debugfs.c +@@ -538,6 +538,9 @@ void regmap_debugfs_init(struct regmap * + + next = rb_next(&range_node->node); + } ++ ++ if (map->cache_ops && map->cache_ops->debugfs_init) ++ map->cache_ops->debugfs_init(map); + } + + void regmap_debugfs_exit(struct regmap *map) diff --git a/queue-3.16/series b/queue-3.16/series index 75e7f578bf0..d5c7aab81c1 100644 --- a/queue-3.16/series +++ b/queue-3.16/series @@ -198,3 +198,20 @@ acpi-platform-lpss-disable-async-suspend-resume-of-lpss-devices.patch acpi-hotplug-generate-online-uevents-for-acpi-containers.patch acpi-scan-correct-error-return-value-of-create_modalias.patch acpi-video-disable-native-backlight-for-thinkpad-x201s.patch +arm64-add-brackets-around-user_stack_pointer.patch +memblock-memhotplug-fix-wrong-type-in-memblock_find_in_range_node.patch +regmap-fix-regcache-debugfs-initialization.patch +regmap-fix-handling-of-volatile-registers-for-format_write-chips.patch +regmap-don-t-attempt-block-writes-when-syncing-cache-on-single_rw-devices.patch +cgroup-reject-cgroup-names-with-n.patch +cgroup-delay-the-clearing-of-cgrp-kn-priv.patch +cgroup-fix-unbalanced-locking.patch +kvm-s390-fix-user-triggerable-bug-in-dead-code.patch +kvm-s390-mm-try-a-cow-on-read-only-pages-for-key-ops.patch +kvm-s390-mm-fix-storage-key-corruption-during-swapping.patch +kvm-s390-mm-fix-guest-storage-key-corruption-in-ptep_set_access_flags.patch +xen-manage-always-freeze-thaw-processes-when-suspend-resuming.patch +x86-xen-don-t-copy-bogus-duplicate-entries-into-kernel-page-tables.patch +x86-early_ioremap-increase-fix_btmaps_slots-to-8.patch +x86-kaslr-avoid-the-setup_data-area-when-picking-location.patch +shmem-fix-nlink-for-rename-overwrite-directory.patch diff --git a/queue-3.16/shmem-fix-nlink-for-rename-overwrite-directory.patch b/queue-3.16/shmem-fix-nlink-for-rename-overwrite-directory.patch new file mode 100644 index 00000000000..b15a5cca263 --- /dev/null +++ b/queue-3.16/shmem-fix-nlink-for-rename-overwrite-directory.patch @@ -0,0 +1,77 @@ +From b928095b0a7cff7fb9fcf4c706348ceb8ab2c295 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Wed, 24 Sep 2014 17:56:17 +0200 +Subject: shmem: fix nlink for rename overwrite directory + +From: Miklos Szeredi + +commit b928095b0a7cff7fb9fcf4c706348ceb8ab2c295 upstream. + +If overwriting an empty directory with rename, then need to drop the extra +nlink. + +Test prog: + +#include +#include +#include +#include + +int main(void) +{ + const char *test_dir1 = "test-dir1"; + const char *test_dir2 = "test-dir2"; + int res; + int fd; + struct stat statbuf; + + res = mkdir(test_dir1, 0777); + if (res == -1) + err(1, "mkdir(\"%s\")", test_dir1); + + res = mkdir(test_dir2, 0777); + if (res == -1) + err(1, "mkdir(\"%s\")", test_dir2); + + fd = open(test_dir2, O_RDONLY); + if (fd == -1) + err(1, "open(\"%s\")", test_dir2); + + res = rename(test_dir1, test_dir2); + if (res == -1) + err(1, "rename(\"%s\", \"%s\")", test_dir1, test_dir2); + + res = fstat(fd, &statbuf); + if (res == -1) + err(1, "fstat(%i)", fd); + + if (statbuf.st_nlink != 0) { + fprintf(stderr, "nlink is %lu, should be 0\n", statbuf.st_nlink); + return 1; + } + + return 0; +} + +Signed-off-by: Miklos Szeredi +Signed-off-by: Al Viro +Signed-off-by: Greg Kroah-Hartman + +--- + mm/shmem.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -2064,8 +2064,10 @@ static int shmem_rename(struct inode *ol + + if (new_dentry->d_inode) { + (void) shmem_unlink(new_dir, new_dentry); +- if (they_are_dirs) ++ if (they_are_dirs) { ++ drop_nlink(new_dentry->d_inode); + drop_nlink(old_dir); ++ } + } else if (they_are_dirs) { + drop_nlink(old_dir); + inc_nlink(new_dir); diff --git a/queue-3.16/x86-early_ioremap-increase-fix_btmaps_slots-to-8.patch b/queue-3.16/x86-early_ioremap-increase-fix_btmaps_slots-to-8.patch new file mode 100644 index 00000000000..d7aa1f3a88d --- /dev/null +++ b/queue-3.16/x86-early_ioremap-increase-fix_btmaps_slots-to-8.patch @@ -0,0 +1,109 @@ +From 3eddc69ffeba092d288c386646bfa5ec0fce25fd Mon Sep 17 00:00:00 2001 +From: Dave Young +Date: Tue, 26 Aug 2014 17:06:41 +0800 +Subject: x86 early_ioremap: Increase FIX_BTMAPS_SLOTS to 8 + +From: Dave Young + +commit 3eddc69ffeba092d288c386646bfa5ec0fce25fd upstream. + +3.16 kernel boot fail with earlyprintk=efi, it keeps scrolling at the +bottom line of screen. + +Bisected, the first bad commit is below: +commit 86dfc6f339886559d80ee0d4bd20fe5ee90450f0 +Author: Lv Zheng +Date: Fri Apr 4 12:38:57 2014 +0800 + + ACPICA: Tables: Fix table checksums verification before installation. + +I did some debugging by enabling both serial and efi earlyprintk, below is +some debug dmesg, seems early_ioremap fails in scroll up function due to +no free slot, see below dmesg output: + + WARNING: CPU: 0 PID: 0 at mm/early_ioremap.c:116 __early_ioremap+0x90/0x1c4() + __early_ioremap(ed00c800, 00000c80) not found slot + Modules linked in: + CPU: 0 PID: 0 Comm: swapper Not tainted 3.17.0-rc1+ #204 + Hardware name: Hewlett-Packard HP Z420 Workstation/1589, BIOS J61 v03.15 05/09/2013 + Call Trace: + dump_stack+0x4e/0x7a + warn_slowpath_common+0x75/0x8e + ? __early_ioremap+0x90/0x1c4 + warn_slowpath_fmt+0x47/0x49 + __early_ioremap+0x90/0x1c4 + ? sprintf+0x46/0x48 + early_ioremap+0x13/0x15 + early_efi_map+0x24/0x26 + early_efi_scroll_up+0x6d/0xc0 + early_efi_write+0x1b0/0x214 + call_console_drivers.constprop.21+0x73/0x7e + console_unlock+0x151/0x3b2 + ? vprintk_emit+0x49f/0x532 + vprintk_emit+0x521/0x532 + ? console_unlock+0x383/0x3b2 + printk+0x4f/0x51 + acpi_os_vprintf+0x2b/0x2d + acpi_os_printf+0x43/0x45 + acpi_info+0x5c/0x63 + ? __acpi_map_table+0x13/0x18 + ? acpi_os_map_iomem+0x21/0x147 + acpi_tb_print_table_header+0x177/0x186 + acpi_tb_install_table_with_override+0x4b/0x62 + acpi_tb_install_standard_table+0xd9/0x215 + ? early_ioremap+0x13/0x15 + ? __acpi_map_table+0x13/0x18 + acpi_tb_parse_root_table+0x16e/0x1b4 + acpi_initialize_tables+0x57/0x59 + acpi_table_init+0x50/0xce + acpi_boot_table_init+0x1e/0x85 + setup_arch+0x9b7/0xcc4 + start_kernel+0x94/0x42d + ? early_idt_handlers+0x120/0x120 + x86_64_start_reservations+0x2a/0x2c + x86_64_start_kernel+0xf3/0x100 + +Quote reply from Lv.zheng about the early ioremap slot usage in this case: + +""" +In early_efi_scroll_up(), 2 mapping entries will be used for the src/dst screen buffer. +In drivers/acpi/acpica/tbutils.c, we've improved the early table loading code in acpi_tb_parse_root_table(). +We now need 2 mapping entries: +1. One mapping entry is used for RSDT table mapping. Each RSDT entry contains an address for another ACPI table. +2. For each entry in RSDP, we need another mapping entry to map the table to perform necessary check/override before installing it. + +When acpi_tb_parse_root_table() prints something through EFI earlyprintk console, we'll have 4 mapping entries used. +The current 4 slots setting of early_ioremap() seems to be too small for such a use case. +""" + +Thus increase the slot to 8 in this patch to fix this issue. +boot-time mappings become 512 page with this patch. + +Signed-off-by: Dave Young +Signed-off-by: Matt Fleming +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/fixmap.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/x86/include/asm/fixmap.h ++++ b/arch/x86/include/asm/fixmap.h +@@ -106,14 +106,14 @@ enum fixed_addresses { + __end_of_permanent_fixed_addresses, + + /* +- * 256 temporary boot-time mappings, used by early_ioremap(), ++ * 512 temporary boot-time mappings, used by early_ioremap(), + * before ioremap() is functional. + * +- * If necessary we round it up to the next 256 pages boundary so ++ * If necessary we round it up to the next 512 pages boundary so + * that we can have a single pgd entry and a single pte table: + */ + #define NR_FIX_BTMAPS 64 +-#define FIX_BTMAPS_SLOTS 4 ++#define FIX_BTMAPS_SLOTS 8 + #define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) + FIX_BTMAP_END = + (__end_of_permanent_fixed_addresses ^ diff --git a/queue-3.16/x86-kaslr-avoid-the-setup_data-area-when-picking-location.patch b/queue-3.16/x86-kaslr-avoid-the-setup_data-area-when-picking-location.patch new file mode 100644 index 00000000000..bcc09ae56d8 --- /dev/null +++ b/queue-3.16/x86-kaslr-avoid-the-setup_data-area-when-picking-location.patch @@ -0,0 +1,59 @@ +From 0cacbfbeb5077b63d5d3cf6df88b14ac12ad584b Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Thu, 11 Sep 2014 09:19:31 -0700 +Subject: x86/kaslr: Avoid the setup_data area when picking location + +From: Kees Cook + +commit 0cacbfbeb5077b63d5d3cf6df88b14ac12ad584b upstream. + +The KASLR location-choosing logic needs to avoid the setup_data +list memory areas as well. Without this, it would be possible to +have the ASLR position stomp on the memory, ultimately causing +the boot to fail. + +Signed-off-by: Kees Cook +Tested-by: Baoquan He +Cc: Vivek Goyal +Cc: Rafael J. Wysocki +Cc: Wei Yongjun +Cc: Pavel Machek +Cc: Linus Torvalds +Link: http://lkml.kernel.org/r/20140911161931.GA12001@www.outflux.net +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/boot/compressed/aslr.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +--- a/arch/x86/boot/compressed/aslr.c ++++ b/arch/x86/boot/compressed/aslr.c +@@ -183,12 +183,27 @@ static void mem_avoid_init(unsigned long + static bool mem_avoid_overlap(struct mem_vector *img) + { + int i; ++ struct setup_data *ptr; + + for (i = 0; i < MEM_AVOID_MAX; i++) { + if (mem_overlaps(img, &mem_avoid[i])) + return true; + } + ++ /* Avoid all entries in the setup_data linked list. */ ++ ptr = (struct setup_data *)(unsigned long)real_mode->hdr.setup_data; ++ while (ptr) { ++ struct mem_vector avoid; ++ ++ avoid.start = (u64)ptr; ++ avoid.size = sizeof(*ptr) + ptr->len; ++ ++ if (mem_overlaps(img, &avoid)) ++ return true; ++ ++ ptr = (struct setup_data *)(unsigned long)ptr->next; ++ } ++ + return false; + } + diff --git a/queue-3.16/x86-xen-don-t-copy-bogus-duplicate-entries-into-kernel-page-tables.patch b/queue-3.16/x86-xen-don-t-copy-bogus-duplicate-entries-into-kernel-page-tables.patch new file mode 100644 index 00000000000..807056d313b --- /dev/null +++ b/queue-3.16/x86-xen-don-t-copy-bogus-duplicate-entries-into-kernel-page-tables.patch @@ -0,0 +1,131 @@ +From 0b5a50635fc916cf46e3de0b819a61fc3f17e7ee Mon Sep 17 00:00:00 2001 +From: Stefan Bader +Date: Tue, 2 Sep 2014 11:16:01 +0100 +Subject: x86/xen: don't copy bogus duplicate entries into kernel page tables + +From: Stefan Bader + +commit 0b5a50635fc916cf46e3de0b819a61fc3f17e7ee upstream. + +When RANDOMIZE_BASE (KASLR) is enabled; or the sum of all loaded +modules exceeds 512 MiB, then loading modules fails with a warning +(and hence a vmalloc allocation failure) because the PTEs for the +newly-allocated vmalloc address space are not zero. + + WARNING: CPU: 0 PID: 494 at linux/mm/vmalloc.c:128 + vmap_page_range_noflush+0x2a1/0x360() + +This is caused by xen_setup_kernel_pagetables() copying +level2_kernel_pgt into level2_fixmap_pgt, overwriting many non-present +entries. + +Without KASLR, the normal kernel image size only covers the first half +of level2_kernel_pgt and module space starts after that. + +L4[511]->level3_kernel_pgt[510]->level2_kernel_pgt[ 0..255]->kernel + [256..511]->module + [511]->level2_fixmap_pgt[ 0..505]->module + +This allows 512 MiB of of module vmalloc space to be used before +having to use the corrupted level2_fixmap_pgt entries. + +With KASLR enabled, the kernel image uses the full PUD range of 1G and +module space starts in the level2_fixmap_pgt. So basically: + +L4[511]->level3_kernel_pgt[510]->level2_kernel_pgt[0..511]->kernel + [511]->level2_fixmap_pgt[0..505]->module + +And now no module vmalloc space can be used without using the corrupt +level2_fixmap_pgt entries. + +Fix this by properly converting the level2_fixmap_pgt entries to MFNs, +and setting level1_fixmap_pgt as read-only. + +A number of comments were also using the the wrong L3 offset for +level2_kernel_pgt. These have been corrected. + +Signed-off-by: Stefan Bader +Signed-off-by: David Vrabel +Reviewed-by: Boris Ostrovsky +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/pgtable_64.h | 1 + + arch/x86/xen/mmu.c | 27 ++++++++++++--------------- + 2 files changed, 13 insertions(+), 15 deletions(-) + +--- a/arch/x86/include/asm/pgtable_64.h ++++ b/arch/x86/include/asm/pgtable_64.h +@@ -19,6 +19,7 @@ extern pud_t level3_ident_pgt[512]; + extern pmd_t level2_kernel_pgt[512]; + extern pmd_t level2_fixmap_pgt[512]; + extern pmd_t level2_ident_pgt[512]; ++extern pte_t level1_fixmap_pgt[512]; + extern pgd_t init_level4_pgt[]; + + #define swapper_pg_dir init_level4_pgt +--- a/arch/x86/xen/mmu.c ++++ b/arch/x86/xen/mmu.c +@@ -1866,12 +1866,11 @@ static void __init check_pt_base(unsigne + * + * We can construct this by grafting the Xen provided pagetable into + * head_64.S's preconstructed pagetables. We copy the Xen L2's into +- * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This +- * means that only the kernel has a physical mapping to start with - +- * but that's enough to get __va working. We need to fill in the rest +- * of the physical mapping once some sort of allocator has been set +- * up. +- * NOTE: for PVH, the page tables are native. ++ * level2_ident_pgt, and level2_kernel_pgt. This means that only the ++ * kernel has a physical mapping to start with - but that's enough to ++ * get __va working. We need to fill in the rest of the physical ++ * mapping once some sort of allocator has been set up. NOTE: for ++ * PVH, the page tables are native. + */ + void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) + { +@@ -1902,8 +1901,11 @@ void __init xen_setup_kernel_pagetable(p + /* L3_i[0] -> level2_ident_pgt */ + convert_pfn_mfn(level3_ident_pgt); + /* L3_k[510] -> level2_kernel_pgt +- * L3_i[511] -> level2_fixmap_pgt */ ++ * L3_k[511] -> level2_fixmap_pgt */ + convert_pfn_mfn(level3_kernel_pgt); ++ ++ /* L3_k[511][506] -> level1_fixmap_pgt */ ++ convert_pfn_mfn(level2_fixmap_pgt); + } + /* We get [511][511] and have Xen's version of level2_kernel_pgt */ + l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); +@@ -1913,21 +1915,15 @@ void __init xen_setup_kernel_pagetable(p + addr[1] = (unsigned long)l3; + addr[2] = (unsigned long)l2; + /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: +- * Both L4[272][0] and L4[511][511] have entries that point to the same ++ * Both L4[272][0] and L4[511][510] have entries that point to the same + * L2 (PMD) tables. Meaning that if you modify it in __va space + * it will be also modified in the __ka space! (But if you just + * modify the PMD table to point to other PTE's or none, then you + * are OK - which is what cleanup_highmap does) */ + copy_page(level2_ident_pgt, l2); +- /* Graft it onto L4[511][511] */ ++ /* Graft it onto L4[511][510] */ + copy_page(level2_kernel_pgt, l2); + +- /* Get [511][510] and graft that in level2_fixmap_pgt */ +- l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); +- l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); +- copy_page(level2_fixmap_pgt, l2); +- /* Note that we don't do anything with level1_fixmap_pgt which +- * we don't need. */ + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Make pagetable pieces RO */ + set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); +@@ -1937,6 +1933,7 @@ void __init xen_setup_kernel_pagetable(p + set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); + set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); ++ set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO); + + /* Pin down new L4 */ + pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, diff --git a/queue-3.16/xen-manage-always-freeze-thaw-processes-when-suspend-resuming.patch b/queue-3.16/xen-manage-always-freeze-thaw-processes-when-suspend-resuming.patch new file mode 100644 index 00000000000..60a5ae00daf --- /dev/null +++ b/queue-3.16/xen-manage-always-freeze-thaw-processes-when-suspend-resuming.patch @@ -0,0 +1,59 @@ +From 61a734d305e16944b42730ef582a7171dc733321 Mon Sep 17 00:00:00 2001 +From: Ross Lagerwall +Date: Mon, 18 Aug 2014 10:41:36 +0100 +Subject: xen/manage: Always freeze/thaw processes when suspend/resuming + +From: Ross Lagerwall + +commit 61a734d305e16944b42730ef582a7171dc733321 upstream. + +Always freeze processes when suspending and thaw processes when resuming +to prevent a race noticeable with HVM guests. + +This prevents a deadlock where the khubd kthread (which is designed to +be freezable) acquires a usb device lock and then tries to allocate +memory which requires the disk which hasn't been resumed yet. +Meanwhile, the xenwatch thread deadlocks waiting for the usb device +lock. + +Freezing processes fixes this because the khubd thread is only thawed +after the xenwatch thread finishes resuming all the devices. + +Signed-off-by: Ross Lagerwall +Signed-off-by: David Vrabel +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/xen/manage.c | 7 ------- + 1 file changed, 7 deletions(-) + +--- a/drivers/xen/manage.c ++++ b/drivers/xen/manage.c +@@ -103,16 +103,11 @@ static void do_suspend(void) + + shutting_down = SHUTDOWN_SUSPEND; + +-#ifdef CONFIG_PREEMPT +- /* If the kernel is preemptible, we need to freeze all the processes +- to prevent them from being in the middle of a pagetable update +- during suspend. */ + err = freeze_processes(); + if (err) { + pr_err("%s: freeze failed %d\n", __func__, err); + goto out; + } +-#endif + + err = dpm_suspend_start(PMSG_FREEZE); + if (err) { +@@ -157,10 +152,8 @@ out_resume: + dpm_resume_end(si.cancelled ? PMSG_THAW : PMSG_RESTORE); + + out_thaw: +-#ifdef CONFIG_PREEMPT + thaw_processes(); + out: +-#endif + shutting_down = SHUTDOWN_INVALID; + } + #endif /* CONFIG_HIBERNATE_CALLBACKS */