From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 6 Nov 2015 17:18:01 +0000 (-0800)
Subject: 4.1-stable patches
X-Git-Tag: v3.10.93~11
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=fe2d473897a09536568b464c9b3e395991af072d;p=thirdparty%2Fkernel%2Fstable-queue.git

4.1-stable patches

added patches:
	arm64-compat-fix-stxr-failure-case-in-swp-emulation.patch
	arm64-kernel-fix-tcr_el1.t0sz-restore-on-systems-with-extended-idmap.patch
	arm64-kernel-rename-__cpu_suspend-to-keep-it-aligned-with-arm.patch
	drm-vmwgfx-fix-up-user_dmabuf-refcounting.patch
	nvme-fix-memory-leak-on-retried-commands.patch
	thp-use-is_zero_pfn-only-after-pte_present-check.patch
---

diff --git a/queue-4.1/arm64-compat-fix-stxr-failure-case-in-swp-emulation.patch b/queue-4.1/arm64-compat-fix-stxr-failure-case-in-swp-emulation.patch
new file mode 100644
index 00000000000..bd64518bbed
--- /dev/null
+++ b/queue-4.1/arm64-compat-fix-stxr-failure-case-in-swp-emulation.patch
@@ -0,0 +1,61 @@
+From 589cb22bbedacf325951014c07a35a2b01ca57f6 Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Thu, 15 Oct 2015 13:55:53 +0100
+Subject: arm64: compat: fix stxr failure case in SWP emulation
+
+From: Will Deacon <will.deacon@arm.com>
+
+commit 589cb22bbedacf325951014c07a35a2b01ca57f6 upstream.
+
+If the STXR instruction fails in the SWP emulation code, we leave *data
+overwritten with the loaded value, therefore corrupting the data written
+by a subsequent, successful attempt.
+
+This patch re-jigs the code so that we only write back to *data once we
+know that the update has happened.
+
+Fixes: bd35a4adc413 ("arm64: Port SWP/SWPB emulation support from arm")
+Reported-by: Shengjiu Wang <shengjiu.wang@freescale.com>
+Reported-by: Vladimir Murzin <vladimir.murzin@arm.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/kernel/armv8_deprecated.c |   18 ++++++++++--------
+ 1 file changed, 10 insertions(+), 8 deletions(-)
+
+--- a/arch/arm64/kernel/armv8_deprecated.c
++++ b/arch/arm64/kernel/armv8_deprecated.c
+@@ -279,22 +279,24 @@ static void register_insn_emulation_sysc
+  */
+ #define __user_swpX_asm(data, addr, res, temp, B)		\
+ 	__asm__ __volatile__(					\
+-	"	mov		%w2, %w1\n"			\
+-	"0:	ldxr"B"		%w1, [%3]\n"			\
+-	"1:	stxr"B"		%w0, %w2, [%3]\n"		\
++	"0:	ldxr"B"		%w2, [%3]\n"			\
++	"1:	stxr"B"		%w0, %w1, [%3]\n"		\
+ 	"	cbz		%w0, 2f\n"			\
+ 	"	mov		%w0, %w4\n"			\
++	"	b		3f\n"				\
+ 	"2:\n"							\
++	"	mov		%w1, %w2\n"			\
++	"3:\n"							\
+ 	"	.pushsection	 .fixup,\"ax\"\n"		\
+ 	"	.align		2\n"				\
+-	"3:	mov		%w0, %w5\n"			\
+-	"	b		2b\n"				\
++	"4:	mov		%w0, %w5\n"			\
++	"	b		3b\n"				\
+ 	"	.popsection"					\
+ 	"	.pushsection	 __ex_table,\"a\"\n"		\
+ 	"	.align		3\n"				\
+-	"	.quad		0b, 3b\n"			\
+-	"	.quad		1b, 3b\n"			\
+-	"	.popsection"					\
++	"	.quad		0b, 4b\n"			\
++	"	.quad		1b, 4b\n"			\
++	"	.popsection\n"					\
+ 	: "=&r" (res), "+r" (data), "=&r" (temp)		\
+ 	: "r" (addr), "i" (-EAGAIN), "i" (-EFAULT)		\
+ 	: "memory")
diff --git a/queue-4.1/arm64-kernel-fix-tcr_el1.t0sz-restore-on-systems-with-extended-idmap.patch b/queue-4.1/arm64-kernel-fix-tcr_el1.t0sz-restore-on-systems-with-extended-idmap.patch
new file mode 100644
index 00000000000..8c4ebf47f22
--- /dev/null
+++ b/queue-4.1/arm64-kernel-fix-tcr_el1.t0sz-restore-on-systems-with-extended-idmap.patch
@@ -0,0 +1,85 @@
+From e13d918a19a7b6cba62b32884f5e336e764c2cc6 Mon Sep 17 00:00:00 2001
+From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+Date: Tue, 27 Oct 2015 17:29:10 +0000
+Subject: arm64: kernel: fix tcr_el1.t0sz restore on systems with extended idmap
+
+From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+
+commit e13d918a19a7b6cba62b32884f5e336e764c2cc6 upstream.
+
+Commit dd006da21646 ("arm64: mm: increase VA range of identity map")
+introduced a mechanism to extend the virtual memory map range
+to support arm64 systems with system RAM located at very high offset,
+where the identity mapping used to enable/disable the MMU requires
+additional translation levels to map the physical memory at an equal
+virtual offset.
+
+The kernel detects at boot time the tcr_el1.t0sz value required by the
+identity mapping and sets-up the tcr_el1.t0sz register field accordingly,
+any time the identity map is required in the kernel (ie when enabling the
+MMU).
+
+After enabling the MMU, in the cold boot path the kernel resets the
+tcr_el1.t0sz to its default value (ie the actual configuration value for
+the system virtual address space) so that after enabling the MMU the
+memory space translated by ttbr0_el1 is restored as expected.
+
+Commit dd006da21646 ("arm64: mm: increase VA range of identity map")
+also added code to set-up the tcr_el1.t0sz value when the kernel resumes
+from low-power states with the MMU off through cpu_resume() in order to
+effectively use the identity mapping to enable the MMU but failed to add
+the code required to restore the tcr_el1.t0sz to its default value, when
+the core returns to the kernel with the MMU enabled, so that the kernel
+might end up running with tcr_el1.t0sz value set-up for the identity
+mapping which can be lower than the value required by the actual virtual
+address space, resulting in an erroneous set-up.
+
+This patchs adds code in the resume path that restores the tcr_el1.t0sz
+default value upon core resume, mirroring this way the cold boot path
+behaviour therefore fixing the issue.
+
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Fixes: dd006da21646 ("arm64: mm: increase VA range of identity map")
+Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+Signed-off-by: James Morse <james.morse@arm.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/kernel/suspend.c |   22 +++++++++++++---------
+ 1 file changed, 13 insertions(+), 9 deletions(-)
+
+--- a/arch/arm64/kernel/suspend.c
++++ b/arch/arm64/kernel/suspend.c
+@@ -80,17 +80,21 @@ int cpu_suspend(unsigned long arg, int (
+ 	if (ret == 0) {
+ 		/*
+ 		 * We are resuming from reset with TTBR0_EL1 set to the
+-		 * idmap to enable the MMU; restore the active_mm mappings in
+-		 * TTBR0_EL1 unless the active_mm == &init_mm, in which case
+-		 * the thread entered cpu_suspend with TTBR0_EL1 set to
+-		 * reserved TTBR0 page tables and should be restored as such.
++		 * idmap to enable the MMU; set the TTBR0 to the reserved
++		 * page tables to prevent speculative TLB allocations, flush
++		 * the local tlb and set the default tcr_el1.t0sz so that
++		 * the TTBR0 address space set-up is properly restored.
++		 * If the current active_mm != &init_mm we entered cpu_suspend
++		 * with mappings in TTBR0 that must be restored, so we switch
++		 * them back to complete the address space configuration
++		 * restoration before returning.
+ 		 */
+-		if (mm == &init_mm)
+-			cpu_set_reserved_ttbr0();
+-		else
+-			cpu_switch_mm(mm->pgd, mm);
+-
++		cpu_set_reserved_ttbr0();
+ 		flush_tlb_all();
++		cpu_set_default_tcr_t0sz();
++
++		if (mm != &init_mm)
++			cpu_switch_mm(mm->pgd, mm);
+ 
+ 		/*
+ 		 * Restore per-cpu offset before any kernel
diff --git a/queue-4.1/arm64-kernel-rename-__cpu_suspend-to-keep-it-aligned-with-arm.patch b/queue-4.1/arm64-kernel-rename-__cpu_suspend-to-keep-it-aligned-with-arm.patch
new file mode 100644
index 00000000000..18ac1b05bae
--- /dev/null
+++ b/queue-4.1/arm64-kernel-rename-__cpu_suspend-to-keep-it-aligned-with-arm.patch
@@ -0,0 +1,122 @@
+From af391b15f7b56ce19f52862d36595637dd42b575 Mon Sep 17 00:00:00 2001
+From: Sudeep Holla <sudeep.holla@arm.com>
+Date: Thu, 18 Jun 2015 15:41:32 +0100
+Subject: arm64: kernel: rename __cpu_suspend to keep it aligned with arm
+
+From: Sudeep Holla <sudeep.holla@arm.com>
+
+commit af391b15f7b56ce19f52862d36595637dd42b575 upstream.
+
+This patch renames __cpu_suspend to cpu_suspend so that it's aligned
+with ARM32. It also removes the redundant wrapper created.
+
+This is in preparation to implement generic PSCI system suspend using
+the cpu_{suspend,resume} which now has the same interface on both ARM
+and ARM64.
+
+Cc: Mark Rutland <mark.rutland@arm.com>
+Reviewed-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+Reviewed-by: Ashwin Chaugule <ashwin.chaugule@linaro.org>
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm64/include/asm/cpuidle.h |    8 ++------
+ arch/arm64/include/asm/suspend.h |    2 +-
+ arch/arm64/kernel/cpuidle.c      |    4 ++--
+ arch/arm64/kernel/psci.c         |    2 +-
+ arch/arm64/kernel/suspend.c      |    6 +++---
+ 5 files changed, 9 insertions(+), 13 deletions(-)
+
+--- a/arch/arm64/include/asm/cpuidle.h
++++ b/arch/arm64/include/asm/cpuidle.h
+@@ -5,20 +5,16 @@
+ 
+ #ifdef CONFIG_CPU_IDLE
+ extern int arm_cpuidle_init(unsigned int cpu);
+-extern int cpu_suspend(unsigned long arg);
++extern int arm_cpuidle_suspend(int index);
+ #else
+ static inline int arm_cpuidle_init(unsigned int cpu)
+ {
+ 	return -EOPNOTSUPP;
+ }
+ 
+-static inline int cpu_suspend(unsigned long arg)
++static inline int arm_cpuidle_suspend(int index)
+ {
+ 	return -EOPNOTSUPP;
+ }
+ #endif
+-static inline int arm_cpuidle_suspend(int index)
+-{
+-	return cpu_suspend(index);
+-}
+ #endif
+--- a/arch/arm64/include/asm/suspend.h
++++ b/arch/arm64/include/asm/suspend.h
+@@ -21,6 +21,6 @@ struct sleep_save_sp {
+ 	phys_addr_t save_ptr_stash_phys;
+ };
+ 
+-extern int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long));
++extern int cpu_suspend(unsigned long arg, int (*fn)(unsigned long));
+ extern void cpu_resume(void);
+ #endif
+--- a/arch/arm64/kernel/cpuidle.c
++++ b/arch/arm64/kernel/cpuidle.c
+@@ -37,7 +37,7 @@ int arm_cpuidle_init(unsigned int cpu)
+  * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU
+  * operations back-end error code otherwise.
+  */
+-int cpu_suspend(unsigned long arg)
++int arm_cpuidle_suspend(int index)
+ {
+ 	int cpu = smp_processor_id();
+ 
+@@ -47,5 +47,5 @@ int cpu_suspend(unsigned long arg)
+ 	 */
+ 	if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend)
+ 		return -EOPNOTSUPP;
+-	return cpu_ops[cpu]->cpu_suspend(arg);
++	return cpu_ops[cpu]->cpu_suspend(index);
+ }
+--- a/arch/arm64/kernel/psci.c
++++ b/arch/arm64/kernel/psci.c
+@@ -546,7 +546,7 @@ static int __maybe_unused cpu_psci_cpu_s
+ 	if (state[index - 1].type == PSCI_POWER_STATE_TYPE_STANDBY)
+ 		ret = psci_ops.cpu_suspend(state[index - 1], 0);
+ 	else
+-		ret = __cpu_suspend(index, psci_suspend_finisher);
++		ret = cpu_suspend(index, psci_suspend_finisher);
+ 
+ 	return ret;
+ }
+--- a/arch/arm64/kernel/suspend.c
++++ b/arch/arm64/kernel/suspend.c
+@@ -51,13 +51,13 @@ void __init cpu_suspend_set_dbg_restorer
+ }
+ 
+ /*
+- * __cpu_suspend
++ * cpu_suspend
+  *
+  * arg: argument to pass to the finisher function
+  * fn: finisher function pointer
+  *
+  */
+-int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
++int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
+ {
+ 	struct mm_struct *mm = current->active_mm;
+ 	int ret;
+@@ -82,7 +82,7 @@ int __cpu_suspend(unsigned long arg, int
+ 		 * We are resuming from reset with TTBR0_EL1 set to the
+ 		 * idmap to enable the MMU; restore the active_mm mappings in
+ 		 * TTBR0_EL1 unless the active_mm == &init_mm, in which case
+-		 * the thread entered __cpu_suspend with TTBR0_EL1 set to
++		 * the thread entered cpu_suspend with TTBR0_EL1 set to
+ 		 * reserved TTBR0 page tables and should be restored as such.
+ 		 */
+ 		if (mm == &init_mm)
diff --git a/queue-4.1/drm-vmwgfx-fix-up-user_dmabuf-refcounting.patch b/queue-4.1/drm-vmwgfx-fix-up-user_dmabuf-refcounting.patch
new file mode 100644
index 00000000000..77d37b87a3b
--- /dev/null
+++ b/queue-4.1/drm-vmwgfx-fix-up-user_dmabuf-refcounting.patch
@@ -0,0 +1,271 @@
+From 54c12bc374408faddbff75dbf1a6167c19af39c4 Mon Sep 17 00:00:00 2001
+From: Thomas Hellstrom <thellstrom@vmware.com>
+Date: Mon, 14 Sep 2015 01:13:11 -0700
+Subject: drm/vmwgfx: Fix up user_dmabuf refcounting
+
+From: Thomas Hellstrom <thellstrom@vmware.com>
+
+commit 54c12bc374408faddbff75dbf1a6167c19af39c4 upstream.
+
+If user space calls unreference on a user_dmabuf it will typically
+kill the struct ttm_base_object member which is responsible for the
+user-space visibility. However the dmabuf part may still be alive and
+refcounted. In some situations, like for shared guest-backed surface
+referencing/opening, the driver may try to reference the
+struct ttm_base_object member again, causing an immediate kernel warning
+and a later kernel NULL pointer dereference.
+
+Fix this by always maintaining a reference on the struct
+ttm_base_object member, in situations where it might subsequently be
+referenced.
+
+Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
+Reviewed-by: Brian Paul <brianp@vmware.com>
+Reviewed-by: Sinclair Yeh <syeh@vmware.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/gpu/drm/vmwgfx/vmwgfx_drv.c      |    3 +++
+ drivers/gpu/drm/vmwgfx/vmwgfx_drv.h      |    6 ++++--
+ drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c  |    6 ++++--
+ drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c  |    2 +-
+ drivers/gpu/drm/vmwgfx/vmwgfx_resource.c |   29 +++++++++++++++++++++--------
+ drivers/gpu/drm/vmwgfx/vmwgfx_shader.c   |    2 +-
+ drivers/gpu/drm/vmwgfx/vmwgfx_surface.c  |   12 +++++++++---
+ 7 files changed, 43 insertions(+), 17 deletions(-)
+
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+@@ -1458,6 +1458,9 @@ static void __exit vmwgfx_exit(void)
+ 	drm_pci_exit(&driver, &vmw_pci_driver);
+ }
+ 
++MODULE_INFO(vmw_patch, "ed7d78b2");
++MODULE_INFO(vmw_patch, "54c12bc3");
++
+ module_init(vmwgfx_init);
+ module_exit(vmwgfx_exit);
+ 
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+@@ -636,7 +636,8 @@ extern int vmw_user_dmabuf_alloc(struct
+ 				 uint32_t size,
+ 				 bool shareable,
+ 				 uint32_t *handle,
+-				 struct vmw_dma_buffer **p_dma_buf);
++				 struct vmw_dma_buffer **p_dma_buf,
++				 struct ttm_base_object **p_base);
+ extern int vmw_user_dmabuf_reference(struct ttm_object_file *tfile,
+ 				     struct vmw_dma_buffer *dma_buf,
+ 				     uint32_t *handle);
+@@ -650,7 +651,8 @@ extern uint32_t vmw_dmabuf_validate_node
+ 					 uint32_t cur_validate_node);
+ extern void vmw_dmabuf_validate_clear(struct ttm_buffer_object *bo);
+ extern int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile,
+-				  uint32_t id, struct vmw_dma_buffer **out);
++				  uint32_t id, struct vmw_dma_buffer **out,
++				  struct ttm_base_object **base);
+ extern int vmw_stream_claim_ioctl(struct drm_device *dev, void *data,
+ 				  struct drm_file *file_priv);
+ extern int vmw_stream_unref_ioctl(struct drm_device *dev, void *data,
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+@@ -887,7 +887,8 @@ static int vmw_translate_mob_ptr(struct
+ 	struct vmw_relocation *reloc;
+ 	int ret;
+ 
+-	ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo);
++	ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo,
++				     NULL);
+ 	if (unlikely(ret != 0)) {
+ 		DRM_ERROR("Could not find or use MOB buffer.\n");
+ 		ret = -EINVAL;
+@@ -949,7 +950,8 @@ static int vmw_translate_guest_ptr(struc
+ 	struct vmw_relocation *reloc;
+ 	int ret;
+ 
+-	ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo);
++	ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo,
++				     NULL);
+ 	if (unlikely(ret != 0)) {
+ 		DRM_ERROR("Could not find or use GMR region.\n");
+ 		ret = -EINVAL;
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
+@@ -484,7 +484,7 @@ int vmw_overlay_ioctl(struct drm_device
+ 		goto out_unlock;
+ 	}
+ 
+-	ret = vmw_user_dmabuf_lookup(tfile, arg->handle, &buf);
++	ret = vmw_user_dmabuf_lookup(tfile, arg->handle, &buf, NULL);
+ 	if (ret)
+ 		goto out_unlock;
+ 
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+@@ -356,7 +356,7 @@ int vmw_user_lookup_handle(struct vmw_pr
+ 	}
+ 
+ 	*out_surf = NULL;
+-	ret = vmw_user_dmabuf_lookup(tfile, handle, out_buf);
++	ret = vmw_user_dmabuf_lookup(tfile, handle, out_buf, NULL);
+ 	return ret;
+ }
+ 
+@@ -483,7 +483,8 @@ int vmw_user_dmabuf_alloc(struct vmw_pri
+ 			  uint32_t size,
+ 			  bool shareable,
+ 			  uint32_t *handle,
+-			  struct vmw_dma_buffer **p_dma_buf)
++			  struct vmw_dma_buffer **p_dma_buf,
++			  struct ttm_base_object **p_base)
+ {
+ 	struct vmw_user_dma_buffer *user_bo;
+ 	struct ttm_buffer_object *tmp;
+@@ -517,6 +518,10 @@ int vmw_user_dmabuf_alloc(struct vmw_pri
+ 	}
+ 
+ 	*p_dma_buf = &user_bo->dma;
++	if (p_base) {
++		*p_base = &user_bo->prime.base;
++		kref_get(&(*p_base)->refcount);
++	}
+ 	*handle = user_bo->prime.base.hash.key;
+ 
+ out_no_base_object:
+@@ -633,6 +638,7 @@ int vmw_user_dmabuf_synccpu_ioctl(struct
+ 	struct vmw_dma_buffer *dma_buf;
+ 	struct vmw_user_dma_buffer *user_bo;
+ 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
++	struct ttm_base_object *buffer_base;
+ 	int ret;
+ 
+ 	if ((arg->flags & (drm_vmw_synccpu_read | drm_vmw_synccpu_write)) == 0
+@@ -645,7 +651,8 @@ int vmw_user_dmabuf_synccpu_ioctl(struct
+ 
+ 	switch (arg->op) {
+ 	case drm_vmw_synccpu_grab:
+-		ret = vmw_user_dmabuf_lookup(tfile, arg->handle, &dma_buf);
++		ret = vmw_user_dmabuf_lookup(tfile, arg->handle, &dma_buf,
++					     &buffer_base);
+ 		if (unlikely(ret != 0))
+ 			return ret;
+ 
+@@ -653,6 +660,7 @@ int vmw_user_dmabuf_synccpu_ioctl(struct
+ 				       dma);
+ 		ret = vmw_user_dmabuf_synccpu_grab(user_bo, tfile, arg->flags);
+ 		vmw_dmabuf_unreference(&dma_buf);
++		ttm_base_object_unref(&buffer_base);
+ 		if (unlikely(ret != 0 && ret != -ERESTARTSYS &&
+ 			     ret != -EBUSY)) {
+ 			DRM_ERROR("Failed synccpu grab on handle 0x%08x.\n",
+@@ -694,7 +702,8 @@ int vmw_dmabuf_alloc_ioctl(struct drm_de
+ 		return ret;
+ 
+ 	ret = vmw_user_dmabuf_alloc(dev_priv, vmw_fpriv(file_priv)->tfile,
+-				    req->size, false, &handle, &dma_buf);
++				    req->size, false, &handle, &dma_buf,
++				    NULL);
+ 	if (unlikely(ret != 0))
+ 		goto out_no_dmabuf;
+ 
+@@ -723,7 +732,8 @@ int vmw_dmabuf_unref_ioctl(struct drm_de
+ }
+ 
+ int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile,
+-			   uint32_t handle, struct vmw_dma_buffer **out)
++			   uint32_t handle, struct vmw_dma_buffer **out,
++			   struct ttm_base_object **p_base)
+ {
+ 	struct vmw_user_dma_buffer *vmw_user_bo;
+ 	struct ttm_base_object *base;
+@@ -745,7 +755,10 @@ int vmw_user_dmabuf_lookup(struct ttm_ob
+ 	vmw_user_bo = container_of(base, struct vmw_user_dma_buffer,
+ 				   prime.base);
+ 	(void)ttm_bo_reference(&vmw_user_bo->dma.base);
+-	ttm_base_object_unref(&base);
++	if (p_base)
++		*p_base = base;
++	else
++		ttm_base_object_unref(&base);
+ 	*out = &vmw_user_bo->dma;
+ 
+ 	return 0;
+@@ -1006,7 +1019,7 @@ int vmw_dumb_create(struct drm_file *fil
+ 
+ 	ret = vmw_user_dmabuf_alloc(dev_priv, vmw_fpriv(file_priv)->tfile,
+ 				    args->size, false, &args->handle,
+-				    &dma_buf);
++				    &dma_buf, NULL);
+ 	if (unlikely(ret != 0))
+ 		goto out_no_dmabuf;
+ 
+@@ -1034,7 +1047,7 @@ int vmw_dumb_map_offset(struct drm_file
+ 	struct vmw_dma_buffer *out_buf;
+ 	int ret;
+ 
+-	ret = vmw_user_dmabuf_lookup(tfile, handle, &out_buf);
++	ret = vmw_user_dmabuf_lookup(tfile, handle, &out_buf, NULL);
+ 	if (ret != 0)
+ 		return -EINVAL;
+ 
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
+@@ -470,7 +470,7 @@ int vmw_shader_define_ioctl(struct drm_d
+ 
+ 	if (arg->buffer_handle != SVGA3D_INVALID_ID) {
+ 		ret = vmw_user_dmabuf_lookup(tfile, arg->buffer_handle,
+-					     &buffer);
++					     &buffer, NULL);
+ 		if (unlikely(ret != 0)) {
+ 			DRM_ERROR("Could not find buffer for shader "
+ 				  "creation.\n");
+--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+@@ -43,6 +43,7 @@ struct vmw_user_surface {
+ 	struct vmw_surface srf;
+ 	uint32_t size;
+ 	struct drm_master *master;
++	struct ttm_base_object *backup_base;
+ };
+ 
+ /**
+@@ -652,6 +653,8 @@ static void vmw_user_surface_base_releas
+ 	struct vmw_resource *res = &user_srf->srf.res;
+ 
+ 	*p_base = NULL;
++	if (user_srf->backup_base)
++		ttm_base_object_unref(&user_srf->backup_base);
+ 	vmw_resource_unreference(&res);
+ }
+ 
+@@ -846,7 +849,8 @@ int vmw_surface_define_ioctl(struct drm_
+ 					    res->backup_size,
+ 					    true,
+ 					    &backup_handle,
+-					    &res->backup);
++					    &res->backup,
++					    &user_srf->backup_base);
+ 		if (unlikely(ret != 0)) {
+ 			vmw_resource_unreference(&res);
+ 			goto out_unlock;
+@@ -1309,7 +1313,8 @@ int vmw_gb_surface_define_ioctl(struct d
+ 
+ 	if (req->buffer_handle != SVGA3D_INVALID_ID) {
+ 		ret = vmw_user_dmabuf_lookup(tfile, req->buffer_handle,
+-					     &res->backup);
++					     &res->backup,
++					     &user_srf->backup_base);
+ 	} else if (req->drm_surface_flags &
+ 		   drm_vmw_surface_flag_create_buffer)
+ 		ret = vmw_user_dmabuf_alloc(dev_priv, tfile,
+@@ -1317,7 +1322,8 @@ int vmw_gb_surface_define_ioctl(struct d
+ 					    req->drm_surface_flags &
+ 					    drm_vmw_surface_flag_shareable,
+ 					    &backup_handle,
+-					    &res->backup);
++					    &res->backup,
++					    &user_srf->backup_base);
+ 
+ 	if (unlikely(ret != 0)) {
+ 		vmw_resource_unreference(&res);
diff --git a/queue-4.1/nvme-fix-memory-leak-on-retried-commands.patch b/queue-4.1/nvme-fix-memory-leak-on-retried-commands.patch
new file mode 100644
index 00000000000..5f3300e8f1c
--- /dev/null
+++ b/queue-4.1/nvme-fix-memory-leak-on-retried-commands.patch
@@ -0,0 +1,74 @@
+From 0dfc70c33409afc232ef0b9ec210535dfbf9bc61 Mon Sep 17 00:00:00 2001
+From: Keith Busch <keith.busch@intel.com>
+Date: Thu, 15 Oct 2015 13:38:48 -0600
+Subject: NVMe: Fix memory leak on retried commands
+
+From: Keith Busch <keith.busch@intel.com>
+
+commit 0dfc70c33409afc232ef0b9ec210535dfbf9bc61 upstream.
+
+Resources are reallocated for requeued commands, so unmap and release
+the iod for the failed command.
+
+It's a pretty bad memory leak and causes a kernel hang if you remove a
+drive because of a busy dma pool. You'll get messages spewing like this:
+
+  nvme 0000:xx:xx.x: dma_pool_destroy prp list 256, ffff880420dec000 busy
+
+and lock up pci and the driver since removal never completes while
+holding a lock.
+
+Signed-off-by: Keith Busch <keith.busch@intel.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Jens Axboe <axboe@fb.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ drivers/block/nvme-core.c |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/drivers/block/nvme-core.c
++++ b/drivers/block/nvme-core.c
+@@ -590,6 +590,7 @@ static void req_completion(struct nvme_q
+ 	struct nvme_iod *iod = ctx;
+ 	struct request *req = iod_get_private(iod);
+ 	struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
++	bool requeue = false;
+ 
+ 	u16 status = le16_to_cpup(&cqe->status) >> 1;
+ 
+@@ -598,12 +599,13 @@ static void req_completion(struct nvme_q
+ 		    && (jiffies - req->start_time) < req->timeout) {
+ 			unsigned long flags;
+ 
++			requeue = true;
+ 			blk_mq_requeue_request(req);
+ 			spin_lock_irqsave(req->q->queue_lock, flags);
+ 			if (!blk_queue_stopped(req->q))
+ 				blk_mq_kick_requeue_list(req->q);
+ 			spin_unlock_irqrestore(req->q->queue_lock, flags);
+-			return;
++			goto release_iod;
+ 		}
+ 		req->errors = nvme_error_status(status);
+ 	} else
+@@ -613,7 +615,7 @@ static void req_completion(struct nvme_q
+ 		dev_warn(&nvmeq->dev->pci_dev->dev,
+ 			"completing aborted command with status:%04x\n",
+ 			status);
+-
++ release_iod:
+ 	if (iod->nents) {
+ 		dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, iod->nents,
+ 			rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+@@ -626,7 +628,8 @@ static void req_completion(struct nvme_q
+ 	}
+ 	nvme_free_iod(nvmeq->dev, iod);
+ 
+-	blk_mq_complete_request(req);
++	if (likely(!requeue))
++		blk_mq_complete_request(req);
+ }
+ 
+ /* length is in bytes.  gfp flags indicates whether we may sleep. */
diff --git a/queue-4.1/series b/queue-4.1/series
index 7965c37a49c..3f09456a96f 100644
--- a/queue-4.1/series
+++ b/queue-4.1/series
@@ -73,3 +73,9 @@ mfd-of-document-mfd-devices-and-handle-simple-mfd.patch
 btrfs-fix-possible-leak-in-btrfs_ioctl_balance.patch
 ib-cm-fix-rb-tree-duplicate-free-and-use-after-free.patch
 cpufreq-intel_pstate-fix-divide-by-zero-on-knights-landing-knl.patch
+arm64-kernel-rename-__cpu_suspend-to-keep-it-aligned-with-arm.patch
+arm64-kernel-fix-tcr_el1.t0sz-restore-on-systems-with-extended-idmap.patch
+arm64-compat-fix-stxr-failure-case-in-swp-emulation.patch
+nvme-fix-memory-leak-on-retried-commands.patch
+drm-vmwgfx-fix-up-user_dmabuf-refcounting.patch
+thp-use-is_zero_pfn-only-after-pte_present-check.patch
diff --git a/queue-4.1/thp-use-is_zero_pfn-only-after-pte_present-check.patch b/queue-4.1/thp-use-is_zero_pfn-only-after-pte_present-check.patch
new file mode 100644
index 00000000000..b76d3cf6653
--- /dev/null
+++ b/queue-4.1/thp-use-is_zero_pfn-only-after-pte_present-check.patch
@@ -0,0 +1,49 @@
+From 47aee4d8e314384807e98b67ade07f6da476aa75 Mon Sep 17 00:00:00 2001
+From: Minchan Kim <minchan@kernel.org>
+Date: Thu, 22 Oct 2015 13:32:19 -0700
+Subject: thp: use is_zero_pfn() only after pte_present() check
+
+From: Minchan Kim <minchan@kernel.org>
+
+commit 47aee4d8e314384807e98b67ade07f6da476aa75 upstream.
+
+Use is_zero_pfn() on pteval only after pte_present() check on pteval
+(It might be better idea to introduce is_zero_pte() which checks
+pte_present() first).
+
+Otherwise when working on a swap or migration entry and if pte_pfn's
+result is equal to zero_pfn by chance, we lose user's data in
+__collapse_huge_page_copy().  So if you're unlucky, the application
+segfaults and finally you could see below message on exit:
+
+BUG: Bad rss-counter state mm:ffff88007f099300 idx:2 val:3
+
+Fixes: ca0984caa823 ("mm: incorporate zero pages into transparent huge pages")
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Mel Gorman <mgorman@suse.de>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Rik van Riel <riel@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+
+---
+ mm/huge_memory.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2137,7 +2137,8 @@ static int __collapse_huge_page_isolate(
+ 	for (_pte = pte; _pte < pte+HPAGE_PMD_NR;
+ 	     _pte++, address += PAGE_SIZE) {
+ 		pte_t pteval = *_pte;
+-		if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
++		if (pte_none(pteval) || (pte_present(pteval) &&
++			is_zero_pfn(pte_pfn(pteval)))) {
+ 			if (++none_or_zero <= khugepaged_max_ptes_none)
+ 				continue;
+ 			else