]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.16-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 3 Oct 2014 03:11:55 +0000 (20:11 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 3 Oct 2014 03:11:55 +0000 (20:11 -0700)
added patches:
arm64-add-brackets-around-user_stack_pointer.patch
cgroup-delay-the-clearing-of-cgrp-kn-priv.patch
cgroup-fix-unbalanced-locking.patch
cgroup-reject-cgroup-names-with-n.patch
kvm-s390-fix-user-triggerable-bug-in-dead-code.patch
kvm-s390-mm-fix-guest-storage-key-corruption-in-ptep_set_access_flags.patch
kvm-s390-mm-fix-storage-key-corruption-during-swapping.patch
kvm-s390-mm-try-a-cow-on-read-only-pages-for-key-ops.patch
memblock-memhotplug-fix-wrong-type-in-memblock_find_in_range_node.patch
regmap-don-t-attempt-block-writes-when-syncing-cache-on-single_rw-devices.patch
regmap-fix-handling-of-volatile-registers-for-format_write-chips.patch
regmap-fix-regcache-debugfs-initialization.patch
shmem-fix-nlink-for-rename-overwrite-directory.patch
x86-early_ioremap-increase-fix_btmaps_slots-to-8.patch
x86-kaslr-avoid-the-setup_data-area-when-picking-location.patch
x86-xen-don-t-copy-bogus-duplicate-entries-into-kernel-page-tables.patch
xen-manage-always-freeze-thaw-processes-when-suspend-resuming.patch

18 files changed:
queue-3.16/arm64-add-brackets-around-user_stack_pointer.patch [new file with mode: 0644]
queue-3.16/cgroup-delay-the-clearing-of-cgrp-kn-priv.patch [new file with mode: 0644]
queue-3.16/cgroup-fix-unbalanced-locking.patch [new file with mode: 0644]
queue-3.16/cgroup-reject-cgroup-names-with-n.patch [new file with mode: 0644]
queue-3.16/kvm-s390-fix-user-triggerable-bug-in-dead-code.patch [new file with mode: 0644]
queue-3.16/kvm-s390-mm-fix-guest-storage-key-corruption-in-ptep_set_access_flags.patch [new file with mode: 0644]
queue-3.16/kvm-s390-mm-fix-storage-key-corruption-during-swapping.patch [new file with mode: 0644]
queue-3.16/kvm-s390-mm-try-a-cow-on-read-only-pages-for-key-ops.patch [new file with mode: 0644]
queue-3.16/memblock-memhotplug-fix-wrong-type-in-memblock_find_in_range_node.patch [new file with mode: 0644]
queue-3.16/regmap-don-t-attempt-block-writes-when-syncing-cache-on-single_rw-devices.patch [new file with mode: 0644]
queue-3.16/regmap-fix-handling-of-volatile-registers-for-format_write-chips.patch [new file with mode: 0644]
queue-3.16/regmap-fix-regcache-debugfs-initialization.patch [new file with mode: 0644]
queue-3.16/series
queue-3.16/shmem-fix-nlink-for-rename-overwrite-directory.patch [new file with mode: 0644]
queue-3.16/x86-early_ioremap-increase-fix_btmaps_slots-to-8.patch [new file with mode: 0644]
queue-3.16/x86-kaslr-avoid-the-setup_data-area-when-picking-location.patch [new file with mode: 0644]
queue-3.16/x86-xen-don-t-copy-bogus-duplicate-entries-into-kernel-page-tables.patch [new file with mode: 0644]
queue-3.16/xen-manage-always-freeze-thaw-processes-when-suspend-resuming.patch [new file with mode: 0644]

diff --git a/queue-3.16/arm64-add-brackets-around-user_stack_pointer.patch b/queue-3.16/arm64-add-brackets-around-user_stack_pointer.patch
new file mode 100644 (file)
index 0000000..7bb9c06
--- /dev/null
@@ -0,0 +1,35 @@
+From 2520d039728b2a3c5ae7f79fe2a0e9d182855b12 Mon Sep 17 00:00:00 2001
+From: Catalin Marinas <catalin.marinas@arm.com>
+Date: Fri, 29 Aug 2014 16:08:02 +0100
+Subject: arm64: Add brackets around user_stack_pointer()
+
+From: Catalin Marinas <catalin.marinas@arm.com>
+
+commit 2520d039728b2a3c5ae7f79fe2a0e9d182855b12 upstream.
+
+Commit 5f888a1d33 (ARM64: perf: support dwarf unwinding in compat mode)
+changes user_stack_pointer() to return the compat SP for 32-bit tasks
+but without brackets around the whole definition, with possible issues
+on the call sites (noticed with a subsequent fix for KSTK_ESP).
+
+Fixes: 5f888a1d33c4 (ARM64: perf: support dwarf unwinding in compat mode)
+Reported-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/include/asm/ptrace.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm64/include/asm/ptrace.h
++++ b/arch/arm64/include/asm/ptrace.h
+@@ -137,7 +137,7 @@ struct pt_regs {
+       (!((regs)->pstate & PSR_F_BIT))
+ #define user_stack_pointer(regs) \
+-      (!compat_user_mode(regs)) ? ((regs)->sp) : ((regs)->compat_sp)
++      (!compat_user_mode(regs) ? (regs)->sp : (regs)->compat_sp)
+ static inline unsigned long regs_return_value(struct pt_regs *regs)
+ {
diff --git a/queue-3.16/cgroup-delay-the-clearing-of-cgrp-kn-priv.patch b/queue-3.16/cgroup-delay-the-clearing-of-cgrp-kn-priv.patch
new file mode 100644 (file)
index 0000000..f2ba898
--- /dev/null
@@ -0,0 +1,107 @@
+From a4189487da1b4f8260c6006b9dc47c3c4107a5ae Mon Sep 17 00:00:00 2001
+From: Li Zefan <lizefan@huawei.com>
+Date: Thu, 4 Sep 2014 14:43:07 +0800
+Subject: cgroup: delay the clearing of cgrp->kn->priv
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Li Zefan <lizefan@huawei.com>
+
+commit a4189487da1b4f8260c6006b9dc47c3c4107a5ae upstream.
+
+Run these two scripts concurrently:
+
+    for ((; ;))
+    {
+        mkdir /cgroup/sub
+        rmdir /cgroup/sub
+    }
+
+    for ((; ;))
+    {
+        echo $$ > /cgroup/sub/cgroup.procs
+        echo $$ > /cgroup/cgroup.procs
+    }
+
+A kernel bug will be triggered:
+
+BUG: unable to handle kernel NULL pointer dereference at 00000038
+IP: [<c10bbd69>] cgroup_put+0x9/0x80
+...
+Call Trace:
+ [<c10bbe19>] cgroup_kn_unlock+0x39/0x50
+ [<c10bbe91>] cgroup_kn_lock_live+0x61/0x70
+ [<c10be3c1>] __cgroup_procs_write.isra.26+0x51/0x230
+ [<c10be5b2>] cgroup_tasks_write+0x12/0x20
+ [<c10bb7b0>] cgroup_file_write+0x40/0x130
+ [<c11aee71>] kernfs_fop_write+0xd1/0x160
+ [<c1148e58>] vfs_write+0x98/0x1e0
+ [<c114934d>] SyS_write+0x4d/0xa0
+ [<c16f656b>] sysenter_do_call+0x12/0x12
+
+We clear cgrp->kn->priv in the end of cgroup_rmdir(), but another
+concurrent thread can access kn->priv after the clearing.
+
+We should move the clearing to css_release_work_fn(). At that time
+no one is holding reference to the cgroup and no one can gain a new
+reference to access it.
+
+v2:
+- move RCU_INIT_POINTER() into the else block. (Tejun)
+- remove the cgroup_parent() check. (Tejun)
+- update the comment in css_tryget_online_from_dir().
+
+Reported-by: Toralf Förster <toralf.foerster@gmx.de>
+Signed-off-by: Zefan Li <lizefan@huawei.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/cgroup.c |   21 ++++++++++-----------
+ 1 file changed, 10 insertions(+), 11 deletions(-)
+
+--- a/kernel/cgroup.c
++++ b/kernel/cgroup.c
+@@ -4242,6 +4242,15 @@ static void css_release_work_fn(struct w
+               /* cgroup release path */
+               cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
+               cgrp->id = -1;
++
++              /*
++               * There are two control paths which try to determine
++               * cgroup from dentry without going through kernfs -
++               * cgroupstats_build() and css_tryget_online_from_dir().
++               * Those are supported by RCU protecting clearing of
++               * cgrp->kn->priv backpointer.
++               */
++              RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv, NULL);
+       }
+       mutex_unlock(&cgroup_mutex);
+@@ -4667,16 +4676,6 @@ static int cgroup_rmdir(struct kernfs_no
+       cgroup_kn_unlock(kn);
+-      /*
+-       * There are two control paths which try to determine cgroup from
+-       * dentry without going through kernfs - cgroupstats_build() and
+-       * css_tryget_online_from_dir().  Those are supported by RCU
+-       * protecting clearing of cgrp->kn->priv backpointer, which should
+-       * happen after all files under it have been removed.
+-       */
+-      if (!ret)
+-              RCU_INIT_POINTER(*(void __rcu __force **)&kn->priv, NULL);
+-
+       cgroup_put(cgrp);
+       return ret;
+ }
+@@ -5242,7 +5241,7 @@ struct cgroup_subsys_state *css_tryget_o
+       /*
+        * This path doesn't originate from kernfs and @kn could already
+        * have been or be removed at any point.  @kn->priv is RCU
+-       * protected for this access.  See cgroup_rmdir() for details.
++       * protected for this access.  See css_release_work_fn() for details.
+        */
+       cgrp = rcu_dereference(kn->priv);
+       if (cgrp)
diff --git a/queue-3.16/cgroup-fix-unbalanced-locking.patch b/queue-3.16/cgroup-fix-unbalanced-locking.patch
new file mode 100644 (file)
index 0000000..f6d9f6d
--- /dev/null
@@ -0,0 +1,36 @@
+From eb4aec84d6bdf98d00cedb41c18000f7a31e648a Mon Sep 17 00:00:00 2001
+From: Zefan Li <lizefan@huawei.com>
+Date: Thu, 18 Sep 2014 17:28:46 +0800
+Subject: cgroup: fix unbalanced locking
+
+From: Zefan Li <lizefan@huawei.com>
+
+commit eb4aec84d6bdf98d00cedb41c18000f7a31e648a upstream.
+
+cgroup_pidlist_start() holds cgrp->pidlist_mutex and then calls
+pidlist_array_load(), and cgroup_pidlist_stop() releases the mutex.
+
+It is wrong that we release the mutex in the failure path in
+pidlist_array_load(), because cgroup_pidlist_stop() will be called
+no matter if cgroup_pidlist_start() returns errno or not.
+
+Fixes: 4bac00d16a8760eae7205e41d2c246477d42a210
+Signed-off-by: Zefan Li <lizefan@huawei.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/cgroup.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/kernel/cgroup.c
++++ b/kernel/cgroup.c
+@@ -3833,7 +3833,6 @@ static int pidlist_array_load(struct cgr
+       l = cgroup_pidlist_find_create(cgrp, type);
+       if (!l) {
+-              mutex_unlock(&cgrp->pidlist_mutex);
+               pidlist_free(array);
+               return -ENOMEM;
+       }
diff --git a/queue-3.16/cgroup-reject-cgroup-names-with-n.patch b/queue-3.16/cgroup-reject-cgroup-names-with-n.patch
new file mode 100644 (file)
index 0000000..348a203
--- /dev/null
@@ -0,0 +1,34 @@
+From 71b1fb5c4473a5b1e601d41b109bdfe001ec82e0 Mon Sep 17 00:00:00 2001
+From: Alban Crequy <alban.crequy@collabora.co.uk>
+Date: Mon, 18 Aug 2014 12:20:20 +0100
+Subject: cgroup: reject cgroup names with '\n'
+
+From: Alban Crequy <alban.crequy@collabora.co.uk>
+
+commit 71b1fb5c4473a5b1e601d41b109bdfe001ec82e0 upstream.
+
+/proc/<pid>/cgroup contains one cgroup path on each line. If cgroup names are
+allowed to contain "\n", applications cannot parse /proc/<pid>/cgroup safely.
+
+Signed-off-by: Alban Crequy <alban.crequy@collabora.co.uk>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/cgroup.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/kernel/cgroup.c
++++ b/kernel/cgroup.c
+@@ -4393,6 +4393,11 @@ static int cgroup_mkdir(struct kernfs_no
+       struct kernfs_node *kn;
+       int ssid, ret;
++      /* Do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable.
++       */
++      if (strchr(name, '\n'))
++              return -EINVAL;
++
+       parent = cgroup_kn_lock_live(parent_kn);
+       if (!parent)
+               return -ENODEV;
diff --git a/queue-3.16/kvm-s390-fix-user-triggerable-bug-in-dead-code.patch b/queue-3.16/kvm-s390-fix-user-triggerable-bug-in-dead-code.patch
new file mode 100644 (file)
index 0000000..6b66248
--- /dev/null
@@ -0,0 +1,50 @@
+From 614a80e474b227cace52fd6e3c790554db8a396e Mon Sep 17 00:00:00 2001
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+Date: Wed, 6 Aug 2014 16:17:58 +0200
+Subject: KVM: s390: Fix user triggerable bug in dead code
+
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+
+commit 614a80e474b227cace52fd6e3c790554db8a396e upstream.
+
+In the early days, we had some special handling for the
+KVM_EXIT_S390_SIEIC exit, but this was gone in 2009 with commit
+d7b0b5eb3000 (KVM: s390: Make psw available on all exits, not
+just a subset).
+
+Now this switch statement is just a sanity check for userspace
+not messing with the kvm_run structure. Unfortunately, this
+allows userspace to trigger a kernel BUG. Let's just remove
+this switch statement.
+
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
+Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kvm/kvm-s390.c |   13 -------------
+ 1 file changed, 13 deletions(-)
+
+--- a/arch/s390/kvm/kvm-s390.c
++++ b/arch/s390/kvm/kvm-s390.c
+@@ -1286,19 +1286,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v
+       kvm_s390_vcpu_start(vcpu);
+-      switch (kvm_run->exit_reason) {
+-      case KVM_EXIT_S390_SIEIC:
+-      case KVM_EXIT_UNKNOWN:
+-      case KVM_EXIT_INTR:
+-      case KVM_EXIT_S390_RESET:
+-      case KVM_EXIT_S390_UCONTROL:
+-      case KVM_EXIT_S390_TSCH:
+-      case KVM_EXIT_DEBUG:
+-              break;
+-      default:
+-              BUG();
+-      }
+-
+       vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
+       vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
+       if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) {
diff --git a/queue-3.16/kvm-s390-mm-fix-guest-storage-key-corruption-in-ptep_set_access_flags.patch b/queue-3.16/kvm-s390-mm-fix-guest-storage-key-corruption-in-ptep_set_access_flags.patch
new file mode 100644 (file)
index 0000000..89f945e
--- /dev/null
@@ -0,0 +1,36 @@
+From 1951497d90d6754201af3e65241a06f9ef6755cd Mon Sep 17 00:00:00 2001
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+Date: Thu, 28 Aug 2014 23:44:57 +0200
+Subject: KVM: s390/mm: Fix guest storage key corruption in ptep_set_access_flags
+
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+
+commit 1951497d90d6754201af3e65241a06f9ef6755cd upstream.
+
+commit 0944fe3f4a32 ("s390/mm: implement software referenced bits")
+triggered another paging/storage key corruption. There is an
+unhandled invalid->valid pte change where we have to set the real
+storage key from the pgste.
+When doing paging a guest page might be swapcache or swap and when
+faulted in it might be read-only and due to a parallel scan old.
+An do_wp_page will make it writeable and young. Due to software
+reference tracking this page was invalid and now becomes valid.
+
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/include/asm/pgtable.h |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/s390/include/asm/pgtable.h
++++ b/arch/s390/include/asm/pgtable.h
+@@ -1319,6 +1319,7 @@ static inline int ptep_set_access_flags(
+       ptep_flush_direct(vma->vm_mm, address, ptep);
+       if (mm_has_pgste(vma->vm_mm)) {
++              pgste_set_key(ptep, pgste, entry, vma->vm_mm);
+               pgste = pgste_set_pte(ptep, pgste, entry);
+               pgste_set_unlock(ptep, pgste);
+       } else
diff --git a/queue-3.16/kvm-s390-mm-fix-storage-key-corruption-during-swapping.patch b/queue-3.16/kvm-s390-mm-fix-storage-key-corruption-during-swapping.patch
new file mode 100644 (file)
index 0000000..698b6f5
--- /dev/null
@@ -0,0 +1,49 @@
+From 3e03d4c46daa849880837d802e41c14132a03ef9 Mon Sep 17 00:00:00 2001
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+Date: Thu, 28 Aug 2014 21:21:41 +0200
+Subject: KVM: s390/mm: Fix storage key corruption during swapping
+
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+
+commit 3e03d4c46daa849880837d802e41c14132a03ef9 upstream.
+
+Since 3.12 or more precisely  commit 0944fe3f4a32 ("s390/mm:
+implement software referenced bits") guest storage keys get
+corrupted during paging. This commit added another valid->invalid
+translation for page tables - namely ptep_test_and_clear_young.
+We have to transfer the storage key into the pgste in that case.
+
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/include/asm/pgtable.h |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/arch/s390/include/asm/pgtable.h
++++ b/arch/s390/include/asm/pgtable.h
+@@ -1115,7 +1115,7 @@ static inline int ptep_test_and_clear_yo
+                                           unsigned long addr, pte_t *ptep)
+ {
+       pgste_t pgste;
+-      pte_t pte;
++      pte_t pte, oldpte;
+       int young;
+       if (mm_has_pgste(vma->vm_mm)) {
+@@ -1123,12 +1123,13 @@ static inline int ptep_test_and_clear_yo
+               pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
+       }
+-      pte = *ptep;
++      oldpte = pte = *ptep;
+       ptep_flush_direct(vma->vm_mm, addr, ptep);
+       young = pte_young(pte);
+       pte = pte_mkold(pte);
+       if (mm_has_pgste(vma->vm_mm)) {
++              pgste = pgste_update_all(&oldpte, pgste, vma->vm_mm);
+               pgste = pgste_set_pte(ptep, pgste, pte);
+               pgste_set_unlock(ptep, pgste);
+       } else
diff --git a/queue-3.16/kvm-s390-mm-try-a-cow-on-read-only-pages-for-key-ops.patch b/queue-3.16/kvm-s390-mm-try-a-cow-on-read-only-pages-for-key-ops.patch
new file mode 100644 (file)
index 0000000..bee7300
--- /dev/null
@@ -0,0 +1,45 @@
+From ab3f285f227fec62868037e9b1b1fd18294a83b8 Mon Sep 17 00:00:00 2001
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+Date: Tue, 19 Aug 2014 16:19:35 +0200
+Subject: KVM: s390/mm: try a cow on read only pages for key ops
+
+From: Christian Borntraeger <borntraeger@de.ibm.com>
+
+commit ab3f285f227fec62868037e9b1b1fd18294a83b8 upstream.
+
+The PFMF instruction handler  blindly wrote the storage key even if
+the page was mapped R/O in the host. Lets try a COW before continuing
+and bail out in case of errors.
+
+Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
+Reviewed-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/mm/pgtable.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/arch/s390/mm/pgtable.c
++++ b/arch/s390/mm/pgtable.c
+@@ -986,11 +986,21 @@ int set_guest_storage_key(struct mm_stru
+       pte_t *ptep;
+       down_read(&mm->mmap_sem);
++retry:
+       ptep = get_locked_pte(current->mm, addr, &ptl);
+       if (unlikely(!ptep)) {
+               up_read(&mm->mmap_sem);
+               return -EFAULT;
+       }
++      if (!(pte_val(*ptep) & _PAGE_INVALID) &&
++           (pte_val(*ptep) & _PAGE_PROTECT)) {
++                      pte_unmap_unlock(*ptep, ptl);
++                      if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) {
++                              up_read(&mm->mmap_sem);
++                              return -EFAULT;
++                      }
++                      goto retry;
++              }
+       new = old = pgste_get_lock(ptep);
+       pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
diff --git a/queue-3.16/memblock-memhotplug-fix-wrong-type-in-memblock_find_in_range_node.patch b/queue-3.16/memblock-memhotplug-fix-wrong-type-in-memblock_find_in_range_node.patch
new file mode 100644 (file)
index 0000000..9c9d2fa
--- /dev/null
@@ -0,0 +1,48 @@
+From 0cfb8f0c3e21e36d4a6e472e4c419d58ba848698 Mon Sep 17 00:00:00 2001
+From: Tang Chen <tangchen@cn.fujitsu.com>
+Date: Fri, 29 Aug 2014 15:18:31 -0700
+Subject: memblock, memhotplug: fix wrong type in memblock_find_in_range_node().
+
+From: Tang Chen <tangchen@cn.fujitsu.com>
+
+commit 0cfb8f0c3e21e36d4a6e472e4c419d58ba848698 upstream.
+
+In memblock_find_in_range_node(), we defined ret as int.  But it should
+be phys_addr_t because it is used to store the return value from
+__memblock_find_range_bottom_up().
+
+The bug has not been triggered because when allocating low memory near
+the kernel end, the "int ret" won't turn out to be negative.  When we
+started to allocate memory on other nodes, and the "int ret" could be
+minus.  Then the kernel will panic.
+
+A simple way to reproduce this: comment out the following code in
+numa_init(),
+
+        memblock_set_bottom_up(false);
+
+and the kernel won't boot.
+
+Reported-by: Xishi Qiu <qiuxishi@huawei.com>
+Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
+Tested-by: Xishi Qiu <qiuxishi@huawei.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/memblock.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/mm/memblock.c
++++ b/mm/memblock.c
+@@ -192,8 +192,7 @@ phys_addr_t __init_memblock memblock_fin
+                                       phys_addr_t align, phys_addr_t start,
+                                       phys_addr_t end, int nid)
+ {
+-      int ret;
+-      phys_addr_t kernel_end;
++      phys_addr_t kernel_end, ret;
+       /* pump up @end */
+       if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
diff --git a/queue-3.16/regmap-don-t-attempt-block-writes-when-syncing-cache-on-single_rw-devices.patch b/queue-3.16/regmap-don-t-attempt-block-writes-when-syncing-cache-on-single_rw-devices.patch
new file mode 100644 (file)
index 0000000..79435ae
--- /dev/null
@@ -0,0 +1,33 @@
+From 5c1ebe7f73f9166893c3459915db8a09d6d1d715 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@linaro.org>
+Date: Wed, 27 Aug 2014 13:09:12 +0100
+Subject: regmap: Don't attempt block writes when syncing cache on single_rw devices
+
+From: Mark Brown <broonie@linaro.org>
+
+commit 5c1ebe7f73f9166893c3459915db8a09d6d1d715 upstream.
+
+If the device can't support block writes then don't attempt to use raw
+syncing which will automatically generate block writes for adjacent
+registers, use the existing _single() block syncing implementation.
+
+Reported-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
+Tested-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
+Signed-off-by: Mark Brown <broonie@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/base/regmap/regcache.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/base/regmap/regcache.c
++++ b/drivers/base/regmap/regcache.c
+@@ -698,7 +698,7 @@ int regcache_sync_block(struct regmap *m
+                       unsigned int block_base, unsigned int start,
+                       unsigned int end)
+ {
+-      if (regmap_can_raw_write(map))
++      if (regmap_can_raw_write(map) && !map->use_single_rw)
+               return regcache_sync_block_raw(map, block, cache_present,
+                                              block_base, start, end);
+       else
diff --git a/queue-3.16/regmap-fix-handling-of-volatile-registers-for-format_write-chips.patch b/queue-3.16/regmap-fix-handling-of-volatile-registers-for-format_write-chips.patch
new file mode 100644 (file)
index 0000000..61d351c
--- /dev/null
@@ -0,0 +1,39 @@
+From 5844a8b9d98ec11ce1d77610daacf3f0a0e14715 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@linaro.org>
+Date: Tue, 26 Aug 2014 12:12:17 +0100
+Subject: regmap: Fix handling of volatile registers for format_write() chips
+
+From: Mark Brown <broonie@linaro.org>
+
+commit 5844a8b9d98ec11ce1d77610daacf3f0a0e14715 upstream.
+
+A previous over-zealous factorisation of code means that we only treat
+registers as volatile if they are readable. For most devices this is fine
+since normally most registers can be read and volatility implies
+readability but for format_write() devices where there is no readback from
+the hardware and we use volatility to mean simply uncacheability this means
+that we end up treating all registers as cacheble.
+
+A bigger refactoring of the code to clarify this is in order but as a fix
+make a minimal change and only check readability when checking volatility
+if there is no format_write() operation defined for the device.
+
+Signed-off-by: Mark Brown <broonie@linaro.org>
+Tested-by: Lars-Peter Clausen <lars@metafoo.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/base/regmap/regmap.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/base/regmap/regmap.c
++++ b/drivers/base/regmap/regmap.c
+@@ -109,7 +109,7 @@ bool regmap_readable(struct regmap *map,
+ bool regmap_volatile(struct regmap *map, unsigned int reg)
+ {
+-      if (!regmap_readable(map, reg))
++      if (!map->format.format_write && !regmap_readable(map, reg))
+               return false;
+       if (map->volatile_reg)
diff --git a/queue-3.16/regmap-fix-regcache-debugfs-initialization.patch b/queue-3.16/regmap-fix-regcache-debugfs-initialization.patch
new file mode 100644 (file)
index 0000000..15c303e
--- /dev/null
@@ -0,0 +1,85 @@
+From 5e0cbe78762b5f02986bf9e59a188dad2f6e0be1 Mon Sep 17 00:00:00 2001
+From: Lars-Peter Clausen <lars@metafoo.de>
+Date: Sun, 24 Aug 2014 15:32:27 +0200
+Subject: regmap: Fix regcache debugfs initialization
+
+From: Lars-Peter Clausen <lars@metafoo.de>
+
+commit 5e0cbe78762b5f02986bf9e59a188dad2f6e0be1 upstream.
+
+Commit 6cfec04bcc05 ("regmap: Separate regmap dev initialization") moved the
+regmap debugfs initialization after regcache initialization. This means
+that the regmap debugfs directory is not created yet when the cache
+initialization runs and so any debugfs files registered by the regcache are
+created in the debugfs root directory rather than the debugfs directory of
+the regmap instance. Fix this by adding a separate callback for the
+regcache debugfs initialization which will be called after the parent
+debugfs entry has been created.
+
+Fixes: 6cfec04bcc05 (regmap: Separate regmap dev initialization)
+Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
+Signed-off-by: Mark Brown <broonie@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/base/regmap/internal.h        |    3 +++
+ drivers/base/regmap/regcache-rbtree.c |    9 +++------
+ drivers/base/regmap/regmap-debugfs.c  |    3 +++
+ 3 files changed, 9 insertions(+), 6 deletions(-)
+
+--- a/drivers/base/regmap/internal.h
++++ b/drivers/base/regmap/internal.h
+@@ -146,6 +146,9 @@ struct regcache_ops {
+       enum regcache_type type;
+       int (*init)(struct regmap *map);
+       int (*exit)(struct regmap *map);
++#ifdef CONFIG_DEBUG_FS
++      void (*debugfs_init)(struct regmap *map);
++#endif
+       int (*read)(struct regmap *map, unsigned int reg, unsigned int *value);
+       int (*write)(struct regmap *map, unsigned int reg, unsigned int value);
+       int (*sync)(struct regmap *map, unsigned int min, unsigned int max);
+--- a/drivers/base/regmap/regcache-rbtree.c
++++ b/drivers/base/regmap/regcache-rbtree.c
+@@ -194,10 +194,6 @@ static void rbtree_debugfs_init(struct r
+ {
+       debugfs_create_file("rbtree", 0400, map->debugfs, map, &rbtree_fops);
+ }
+-#else
+-static void rbtree_debugfs_init(struct regmap *map)
+-{
+-}
+ #endif
+ static int regcache_rbtree_init(struct regmap *map)
+@@ -222,8 +218,6 @@ static int regcache_rbtree_init(struct r
+                       goto err;
+       }
+-      rbtree_debugfs_init(map);
+-
+       return 0;
+ err:
+@@ -532,6 +526,9 @@ struct regcache_ops regcache_rbtree_ops
+       .name = "rbtree",
+       .init = regcache_rbtree_init,
+       .exit = regcache_rbtree_exit,
++#ifdef CONFIG_DEBUG_FS
++      .debugfs_init = rbtree_debugfs_init,
++#endif
+       .read = regcache_rbtree_read,
+       .write = regcache_rbtree_write,
+       .sync = regcache_rbtree_sync,
+--- a/drivers/base/regmap/regmap-debugfs.c
++++ b/drivers/base/regmap/regmap-debugfs.c
+@@ -538,6 +538,9 @@ void regmap_debugfs_init(struct regmap *
+               next = rb_next(&range_node->node);
+       }
++
++      if (map->cache_ops && map->cache_ops->debugfs_init)
++              map->cache_ops->debugfs_init(map);
+ }
+ void regmap_debugfs_exit(struct regmap *map)
index 75e7f578bf0191105985a888d0b7d9a94f1cad7b..d5c7aab81c13324177dc3e9ccb55136d7e2bdeae 100644 (file)
@@ -198,3 +198,20 @@ acpi-platform-lpss-disable-async-suspend-resume-of-lpss-devices.patch
 acpi-hotplug-generate-online-uevents-for-acpi-containers.patch
 acpi-scan-correct-error-return-value-of-create_modalias.patch
 acpi-video-disable-native-backlight-for-thinkpad-x201s.patch
+arm64-add-brackets-around-user_stack_pointer.patch
+memblock-memhotplug-fix-wrong-type-in-memblock_find_in_range_node.patch
+regmap-fix-regcache-debugfs-initialization.patch
+regmap-fix-handling-of-volatile-registers-for-format_write-chips.patch
+regmap-don-t-attempt-block-writes-when-syncing-cache-on-single_rw-devices.patch
+cgroup-reject-cgroup-names-with-n.patch
+cgroup-delay-the-clearing-of-cgrp-kn-priv.patch
+cgroup-fix-unbalanced-locking.patch
+kvm-s390-fix-user-triggerable-bug-in-dead-code.patch
+kvm-s390-mm-try-a-cow-on-read-only-pages-for-key-ops.patch
+kvm-s390-mm-fix-storage-key-corruption-during-swapping.patch
+kvm-s390-mm-fix-guest-storage-key-corruption-in-ptep_set_access_flags.patch
+xen-manage-always-freeze-thaw-processes-when-suspend-resuming.patch
+x86-xen-don-t-copy-bogus-duplicate-entries-into-kernel-page-tables.patch
+x86-early_ioremap-increase-fix_btmaps_slots-to-8.patch
+x86-kaslr-avoid-the-setup_data-area-when-picking-location.patch
+shmem-fix-nlink-for-rename-overwrite-directory.patch
diff --git a/queue-3.16/shmem-fix-nlink-for-rename-overwrite-directory.patch b/queue-3.16/shmem-fix-nlink-for-rename-overwrite-directory.patch
new file mode 100644 (file)
index 0000000..b15a5cc
--- /dev/null
@@ -0,0 +1,77 @@
+From b928095b0a7cff7fb9fcf4c706348ceb8ab2c295 Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@suse.cz>
+Date: Wed, 24 Sep 2014 17:56:17 +0200
+Subject: shmem: fix nlink for rename overwrite directory
+
+From: Miklos Szeredi <mszeredi@suse.cz>
+
+commit b928095b0a7cff7fb9fcf4c706348ceb8ab2c295 upstream.
+
+If overwriting an empty directory with rename, then need to drop the extra
+nlink.
+
+Test prog:
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <err.h>
+#include <sys/stat.h>
+
+int main(void)
+{
+       const char *test_dir1 = "test-dir1";
+       const char *test_dir2 = "test-dir2";
+       int res;
+       int fd;
+       struct stat statbuf;
+
+       res = mkdir(test_dir1, 0777);
+       if (res == -1)
+               err(1, "mkdir(\"%s\")", test_dir1);
+
+       res = mkdir(test_dir2, 0777);
+       if (res == -1)
+               err(1, "mkdir(\"%s\")", test_dir2);
+
+       fd = open(test_dir2, O_RDONLY);
+       if (fd == -1)
+               err(1, "open(\"%s\")", test_dir2);
+
+       res = rename(test_dir1, test_dir2);
+       if (res == -1)
+               err(1, "rename(\"%s\", \"%s\")", test_dir1, test_dir2);
+
+       res = fstat(fd, &statbuf);
+       if (res == -1)
+               err(1, "fstat(%i)", fd);
+
+       if (statbuf.st_nlink != 0) {
+               fprintf(stderr, "nlink is %lu, should be 0\n", statbuf.st_nlink);
+               return 1;
+       }
+
+       return 0;
+}
+
+Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ mm/shmem.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -2064,8 +2064,10 @@ static int shmem_rename(struct inode *ol
+       if (new_dentry->d_inode) {
+               (void) shmem_unlink(new_dir, new_dentry);
+-              if (they_are_dirs)
++              if (they_are_dirs) {
++                      drop_nlink(new_dentry->d_inode);
+                       drop_nlink(old_dir);
++              }
+       } else if (they_are_dirs) {
+               drop_nlink(old_dir);
+               inc_nlink(new_dir);
diff --git a/queue-3.16/x86-early_ioremap-increase-fix_btmaps_slots-to-8.patch b/queue-3.16/x86-early_ioremap-increase-fix_btmaps_slots-to-8.patch
new file mode 100644 (file)
index 0000000..d7aa1f3
--- /dev/null
@@ -0,0 +1,109 @@
+From 3eddc69ffeba092d288c386646bfa5ec0fce25fd Mon Sep 17 00:00:00 2001
+From: Dave Young <dyoung@redhat.com>
+Date: Tue, 26 Aug 2014 17:06:41 +0800
+Subject: x86 early_ioremap: Increase FIX_BTMAPS_SLOTS to 8
+
+From: Dave Young <dyoung@redhat.com>
+
+commit 3eddc69ffeba092d288c386646bfa5ec0fce25fd upstream.
+
+3.16 kernel boot fail with earlyprintk=efi, it keeps scrolling at the
+bottom line of screen.
+
+Bisected, the first bad commit is below:
+commit 86dfc6f339886559d80ee0d4bd20fe5ee90450f0
+Author: Lv Zheng <lv.zheng@intel.com>
+Date:   Fri Apr 4 12:38:57 2014 +0800
+
+    ACPICA: Tables: Fix table checksums verification before installation.
+
+I did some debugging by enabling both serial and efi earlyprintk, below is
+some debug dmesg, seems early_ioremap fails in scroll up function due to
+no free slot, see below dmesg output:
+
+  WARNING: CPU: 0 PID: 0 at mm/early_ioremap.c:116 __early_ioremap+0x90/0x1c4()
+  __early_ioremap(ed00c800, 00000c80) not found slot
+  Modules linked in:
+  CPU: 0 PID: 0 Comm: swapper Not tainted 3.17.0-rc1+ #204
+  Hardware name: Hewlett-Packard HP Z420 Workstation/1589, BIOS J61 v03.15 05/09/2013
+  Call Trace:
+    dump_stack+0x4e/0x7a
+    warn_slowpath_common+0x75/0x8e
+    ? __early_ioremap+0x90/0x1c4
+    warn_slowpath_fmt+0x47/0x49
+    __early_ioremap+0x90/0x1c4
+    ? sprintf+0x46/0x48
+    early_ioremap+0x13/0x15
+    early_efi_map+0x24/0x26
+    early_efi_scroll_up+0x6d/0xc0
+    early_efi_write+0x1b0/0x214
+    call_console_drivers.constprop.21+0x73/0x7e
+    console_unlock+0x151/0x3b2
+    ? vprintk_emit+0x49f/0x532
+    vprintk_emit+0x521/0x532
+    ? console_unlock+0x383/0x3b2
+    printk+0x4f/0x51
+    acpi_os_vprintf+0x2b/0x2d
+    acpi_os_printf+0x43/0x45
+    acpi_info+0x5c/0x63
+    ? __acpi_map_table+0x13/0x18
+    ? acpi_os_map_iomem+0x21/0x147
+    acpi_tb_print_table_header+0x177/0x186
+    acpi_tb_install_table_with_override+0x4b/0x62
+    acpi_tb_install_standard_table+0xd9/0x215
+    ? early_ioremap+0x13/0x15
+    ? __acpi_map_table+0x13/0x18
+    acpi_tb_parse_root_table+0x16e/0x1b4
+    acpi_initialize_tables+0x57/0x59
+    acpi_table_init+0x50/0xce
+    acpi_boot_table_init+0x1e/0x85
+    setup_arch+0x9b7/0xcc4
+    start_kernel+0x94/0x42d
+    ? early_idt_handlers+0x120/0x120
+    x86_64_start_reservations+0x2a/0x2c
+    x86_64_start_kernel+0xf3/0x100
+
+Quote reply from Lv.zheng about the early ioremap slot usage in this case:
+
+"""
+In early_efi_scroll_up(), 2 mapping entries will be used for the src/dst screen buffer.
+In drivers/acpi/acpica/tbutils.c, we've improved the early table loading code in acpi_tb_parse_root_table().
+We now need 2 mapping entries:
+1. One mapping entry is used for RSDT table mapping. Each RSDT entry contains an address for another ACPI table.
+2. For each entry in RSDP, we need another mapping entry to map the table to perform necessary check/override before installing it.
+
+When acpi_tb_parse_root_table() prints something through EFI earlyprintk console, we'll have 4 mapping entries used.
+The current 4 slots setting of early_ioremap() seems to be too small for such a use case.
+"""
+
+Thus increase the slot to 8 in this patch to fix this issue.
+boot-time mappings become 512 page with this patch.
+
+Signed-off-by: Dave Young <dyoung@redhat.com>
+Signed-off-by: Matt Fleming <matt.fleming@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/fixmap.h |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/include/asm/fixmap.h
++++ b/arch/x86/include/asm/fixmap.h
+@@ -106,14 +106,14 @@ enum fixed_addresses {
+       __end_of_permanent_fixed_addresses,
+       /*
+-       * 256 temporary boot-time mappings, used by early_ioremap(),
++       * 512 temporary boot-time mappings, used by early_ioremap(),
+        * before ioremap() is functional.
+        *
+-       * If necessary we round it up to the next 256 pages boundary so
++       * If necessary we round it up to the next 512 pages boundary so
+        * that we can have a single pgd entry and a single pte table:
+        */
+ #define NR_FIX_BTMAPS         64
+-#define FIX_BTMAPS_SLOTS      4
++#define FIX_BTMAPS_SLOTS      8
+ #define TOTAL_FIX_BTMAPS      (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
+       FIX_BTMAP_END =
+        (__end_of_permanent_fixed_addresses ^
diff --git a/queue-3.16/x86-kaslr-avoid-the-setup_data-area-when-picking-location.patch b/queue-3.16/x86-kaslr-avoid-the-setup_data-area-when-picking-location.patch
new file mode 100644 (file)
index 0000000..bcc09ae
--- /dev/null
@@ -0,0 +1,59 @@
+From 0cacbfbeb5077b63d5d3cf6df88b14ac12ad584b Mon Sep 17 00:00:00 2001
+From: Kees Cook <keescook@chromium.org>
+Date: Thu, 11 Sep 2014 09:19:31 -0700
+Subject: x86/kaslr: Avoid the setup_data area when picking location
+
+From: Kees Cook <keescook@chromium.org>
+
+commit 0cacbfbeb5077b63d5d3cf6df88b14ac12ad584b upstream.
+
+The KASLR location-choosing logic needs to avoid the setup_data
+list memory areas as well. Without this, it would be possible to
+have the ASLR position stomp on the memory, ultimately causing
+the boot to fail.
+
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Tested-by: Baoquan He <bhe@redhat.com>
+Cc: Vivek Goyal <vgoyal@redhat.com>
+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Cc: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
+Cc: Pavel Machek <pavel@ucw.cz>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: http://lkml.kernel.org/r/20140911161931.GA12001@www.outflux.net
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/boot/compressed/aslr.c |   15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+--- a/arch/x86/boot/compressed/aslr.c
++++ b/arch/x86/boot/compressed/aslr.c
+@@ -183,12 +183,27 @@ static void mem_avoid_init(unsigned long
+ static bool mem_avoid_overlap(struct mem_vector *img)
+ {
+       int i;
++      struct setup_data *ptr;
+       for (i = 0; i < MEM_AVOID_MAX; i++) {
+               if (mem_overlaps(img, &mem_avoid[i]))
+                       return true;
+       }
++      /* Avoid all entries in the setup_data linked list. */
++      ptr = (struct setup_data *)(unsigned long)real_mode->hdr.setup_data;
++      while (ptr) {
++              struct mem_vector avoid;
++
++              avoid.start = (u64)ptr;
++              avoid.size = sizeof(*ptr) + ptr->len;
++
++              if (mem_overlaps(img, &avoid))
++                      return true;
++
++              ptr = (struct setup_data *)(unsigned long)ptr->next;
++      }
++
+       return false;
+ }
diff --git a/queue-3.16/x86-xen-don-t-copy-bogus-duplicate-entries-into-kernel-page-tables.patch b/queue-3.16/x86-xen-don-t-copy-bogus-duplicate-entries-into-kernel-page-tables.patch
new file mode 100644 (file)
index 0000000..807056d
--- /dev/null
@@ -0,0 +1,131 @@
+From 0b5a50635fc916cf46e3de0b819a61fc3f17e7ee Mon Sep 17 00:00:00 2001
+From: Stefan Bader <stefan.bader@canonical.com>
+Date: Tue, 2 Sep 2014 11:16:01 +0100
+Subject: x86/xen: don't copy bogus duplicate entries into kernel page tables
+
+From: Stefan Bader <stefan.bader@canonical.com>
+
+commit 0b5a50635fc916cf46e3de0b819a61fc3f17e7ee upstream.
+
+When RANDOMIZE_BASE (KASLR) is enabled; or the sum of all loaded
+modules exceeds 512 MiB, then loading modules fails with a warning
+(and hence a vmalloc allocation failure) because the PTEs for the
+newly-allocated vmalloc address space are not zero.
+
+  WARNING: CPU: 0 PID: 494 at linux/mm/vmalloc.c:128
+           vmap_page_range_noflush+0x2a1/0x360()
+
+This is caused by xen_setup_kernel_pagetables() copying
+level2_kernel_pgt into level2_fixmap_pgt, overwriting many non-present
+entries.
+
+Without KASLR, the normal kernel image size only covers the first half
+of level2_kernel_pgt and module space starts after that.
+
+L4[511]->level3_kernel_pgt[510]->level2_kernel_pgt[  0..255]->kernel
+                                                  [256..511]->module
+                          [511]->level2_fixmap_pgt[  0..505]->module
+
+This allows 512 MiB of of module vmalloc space to be used before
+having to use the corrupted level2_fixmap_pgt entries.
+
+With KASLR enabled, the kernel image uses the full PUD range of 1G and
+module space starts in the level2_fixmap_pgt. So basically:
+
+L4[511]->level3_kernel_pgt[510]->level2_kernel_pgt[0..511]->kernel
+                          [511]->level2_fixmap_pgt[0..505]->module
+
+And now no module vmalloc space can be used without using the corrupt
+level2_fixmap_pgt entries.
+
+Fix this by properly converting the level2_fixmap_pgt entries to MFNs,
+and setting level1_fixmap_pgt as read-only.
+
+A number of comments were also using the the wrong L3 offset for
+level2_kernel_pgt.  These have been corrected.
+
+Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/include/asm/pgtable_64.h |    1 +
+ arch/x86/xen/mmu.c                |   27 ++++++++++++---------------
+ 2 files changed, 13 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -19,6 +19,7 @@ extern pud_t level3_ident_pgt[512];
+ extern pmd_t level2_kernel_pgt[512];
+ extern pmd_t level2_fixmap_pgt[512];
+ extern pmd_t level2_ident_pgt[512];
++extern pte_t level1_fixmap_pgt[512];
+ extern pgd_t init_level4_pgt[];
+ #define swapper_pg_dir init_level4_pgt
+--- a/arch/x86/xen/mmu.c
++++ b/arch/x86/xen/mmu.c
+@@ -1866,12 +1866,11 @@ static void __init check_pt_base(unsigne
+  *
+  * We can construct this by grafting the Xen provided pagetable into
+  * head_64.S's preconstructed pagetables.  We copy the Xen L2's into
+- * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt.  This
+- * means that only the kernel has a physical mapping to start with -
+- * but that's enough to get __va working.  We need to fill in the rest
+- * of the physical mapping once some sort of allocator has been set
+- * up.
+- * NOTE: for PVH, the page tables are native.
++ * level2_ident_pgt, and level2_kernel_pgt.  This means that only the
++ * kernel has a physical mapping to start with - but that's enough to
++ * get __va working.  We need to fill in the rest of the physical
++ * mapping once some sort of allocator has been set up.  NOTE: for
++ * PVH, the page tables are native.
+  */
+ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
+ {
+@@ -1902,8 +1901,11 @@ void __init xen_setup_kernel_pagetable(p
+               /* L3_i[0] -> level2_ident_pgt */
+               convert_pfn_mfn(level3_ident_pgt);
+               /* L3_k[510] -> level2_kernel_pgt
+-               * L3_i[511] -> level2_fixmap_pgt */
++               * L3_k[511] -> level2_fixmap_pgt */
+               convert_pfn_mfn(level3_kernel_pgt);
++
++              /* L3_k[511][506] -> level1_fixmap_pgt */
++              convert_pfn_mfn(level2_fixmap_pgt);
+       }
+       /* We get [511][511] and have Xen's version of level2_kernel_pgt */
+       l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
+@@ -1913,21 +1915,15 @@ void __init xen_setup_kernel_pagetable(p
+       addr[1] = (unsigned long)l3;
+       addr[2] = (unsigned long)l2;
+       /* Graft it onto L4[272][0]. Note that we creating an aliasing problem:
+-       * Both L4[272][0] and L4[511][511] have entries that point to the same
++       * Both L4[272][0] and L4[511][510] have entries that point to the same
+        * L2 (PMD) tables. Meaning that if you modify it in __va space
+        * it will be also modified in the __ka space! (But if you just
+        * modify the PMD table to point to other PTE's or none, then you
+        * are OK - which is what cleanup_highmap does) */
+       copy_page(level2_ident_pgt, l2);
+-      /* Graft it onto L4[511][511] */
++      /* Graft it onto L4[511][510] */
+       copy_page(level2_kernel_pgt, l2);
+-      /* Get [511][510] and graft that in level2_fixmap_pgt */
+-      l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
+-      l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
+-      copy_page(level2_fixmap_pgt, l2);
+-      /* Note that we don't do anything with level1_fixmap_pgt which
+-       * we don't need. */
+       if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+               /* Make pagetable pieces RO */
+               set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
+@@ -1937,6 +1933,7 @@ void __init xen_setup_kernel_pagetable(p
+               set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
+               set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
+               set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
++              set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO);
+               /* Pin down new L4 */
+               pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
diff --git a/queue-3.16/xen-manage-always-freeze-thaw-processes-when-suspend-resuming.patch b/queue-3.16/xen-manage-always-freeze-thaw-processes-when-suspend-resuming.patch
new file mode 100644 (file)
index 0000000..60a5ae0
--- /dev/null
@@ -0,0 +1,59 @@
+From 61a734d305e16944b42730ef582a7171dc733321 Mon Sep 17 00:00:00 2001
+From: Ross Lagerwall <ross.lagerwall@citrix.com>
+Date: Mon, 18 Aug 2014 10:41:36 +0100
+Subject: xen/manage: Always freeze/thaw processes when suspend/resuming
+
+From: Ross Lagerwall <ross.lagerwall@citrix.com>
+
+commit 61a734d305e16944b42730ef582a7171dc733321 upstream.
+
+Always freeze processes when suspending and thaw processes when resuming
+to prevent a race noticeable with HVM guests.
+
+This prevents a deadlock where the khubd kthread (which is designed to
+be freezable) acquires a usb device lock and then tries to allocate
+memory which requires the disk which hasn't been resumed yet.
+Meanwhile, the xenwatch thread deadlocks waiting for the usb device
+lock.
+
+Freezing processes fixes this because the khubd thread is only thawed
+after the xenwatch thread finishes resuming all the devices.
+
+Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/xen/manage.c |    7 -------
+ 1 file changed, 7 deletions(-)
+
+--- a/drivers/xen/manage.c
++++ b/drivers/xen/manage.c
+@@ -103,16 +103,11 @@ static void do_suspend(void)
+       shutting_down = SHUTDOWN_SUSPEND;
+-#ifdef CONFIG_PREEMPT
+-      /* If the kernel is preemptible, we need to freeze all the processes
+-         to prevent them from being in the middle of a pagetable update
+-         during suspend. */
+       err = freeze_processes();
+       if (err) {
+               pr_err("%s: freeze failed %d\n", __func__, err);
+               goto out;
+       }
+-#endif
+       err = dpm_suspend_start(PMSG_FREEZE);
+       if (err) {
+@@ -157,10 +152,8 @@ out_resume:
+       dpm_resume_end(si.cancelled ? PMSG_THAW : PMSG_RESTORE);
+ out_thaw:
+-#ifdef CONFIG_PREEMPT
+       thaw_processes();
+ out:
+-#endif
+       shutting_down = SHUTDOWN_INVALID;
+ }
+ #endif        /* CONFIG_HIBERNATE_CALLBACKS */