From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 12 Dec 2017 08:32:49 +0000 (+0100)
Subject: 4.4-stable patches
X-Git-Tag: v4.9.69~9
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7570806f70057bdc7e091738560819dfb63f921c;p=thirdparty%2Fkernel%2Fstable-queue.git

4.4-stable patches

added patches:
	arm-avoid-faulting-on-qemu.patch
	arm-bug-if-jumping-to-usermode-address-in-kernel-mode.patch
	mm-drop-unused-pmdp_huge_get_and_clear_notify.patch
	scsi-storvsc-workaround-for-virtual-dvd-scsi-version.patch
	thp-fix-madv_dontneed-vs.-numa-balancing-race.patch
	thp-reduce-indentation-level-in-change_huge_pmd.patch
---

diff --git a/queue-4.4/arm-avoid-faulting-on-qemu.patch b/queue-4.4/arm-avoid-faulting-on-qemu.patch
new file mode 100644
index 00000000000..526c9e42d4f
--- /dev/null
+++ b/queue-4.4/arm-avoid-faulting-on-qemu.patch
@@ -0,0 +1,48 @@
+From 3aaf33bebda8d4ffcc0fc8ef39e6c1ac68823b11 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Mon, 27 Nov 2017 11:22:42 +0000
+Subject: ARM: avoid faulting on qemu
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit 3aaf33bebda8d4ffcc0fc8ef39e6c1ac68823b11 upstream.
+
+When qemu starts a kernel in a bare environment, the default SCR has
+the AW and FW bits clear, which means that the kernel can't modify
+the PSR A or PSR F bits, and means that FIQs and imprecise aborts are
+always masked.
+
+When running uboot under qemu, the AW and FW SCR bits are set, and the
+kernel functions normally - and this is how real hardware behaves.
+
+Fix this for qemu by ignoring the FIQ bit.
+
+Fixes: 8bafae202c82 ("ARM: BUG if jumping to usermode address in kernel mode")
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Cc: Alex Shi <alex.shi@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/kernel/entry-header.S |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/arm/kernel/entry-header.S
++++ b/arch/arm/kernel/entry-header.S
+@@ -295,7 +295,7 @@
+ 	mov	r2, sp
+ 	ldr	r1, [r2, #\offset + S_PSR]	@ get calling cpsr
+ 	ldr	lr, [r2, #\offset + S_PC]!	@ get pc
+-	tst	r1, #0xcf
++	tst	r1, #PSR_I_BIT | 0x0f
+ 	bne	1f
+ 	msr	spsr_cxsf, r1			@ save in spsr_svc
+ #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)
+@@ -327,7 +327,7 @@
+ 	ldr	r1, [sp, #\offset + S_PSR]	@ get calling cpsr
+ 	ldr	lr, [sp, #\offset + S_PC]	@ get pc
+ 	add	sp, sp, #\offset + S_SP
+-	tst	r1, #0xcf
++	tst	r1, #PSR_I_BIT | 0x0f
+ 	bne	1f
+ 	msr	spsr_cxsf, r1			@ save in spsr_svc
+ 
diff --git a/queue-4.4/arm-bug-if-jumping-to-usermode-address-in-kernel-mode.patch b/queue-4.4/arm-bug-if-jumping-to-usermode-address-in-kernel-mode.patch
new file mode 100644
index 00000000000..437bb4daa1a
--- /dev/null
+++ b/queue-4.4/arm-bug-if-jumping-to-usermode-address-in-kernel-mode.patch
@@ -0,0 +1,86 @@
+From 8bafae202c82dc257f649ea3c275a0f35ee15113 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Fri, 24 Nov 2017 23:49:34 +0000
+Subject: ARM: BUG if jumping to usermode address in kernel mode
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit 8bafae202c82dc257f649ea3c275a0f35ee15113 upstream.
+
+Detect if we are returning to usermode via the normal kernel exit paths
+but the saved PSR value indicates that we are in kernel mode.  This
+could occur due to corrupted stack state, which has been observed with
+"ftracetest".
+
+This ensures that we catch the problem case before we get to user code.
+
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Cc: Alex Shi <alex.shi@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/include/asm/assembler.h |   18 ++++++++++++++++++
+ arch/arm/kernel/entry-header.S   |    6 ++++++
+ 2 files changed, 24 insertions(+)
+
+--- a/arch/arm/include/asm/assembler.h
++++ b/arch/arm/include/asm/assembler.h
+@@ -512,4 +512,22 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
+ #endif
+ 	.endm
+ 
++	.macro	bug, msg, line
++#ifdef CONFIG_THUMB2_KERNEL
++1:	.inst	0xde02
++#else
++1:	.inst	0xe7f001f2
++#endif
++#ifdef CONFIG_DEBUG_BUGVERBOSE
++	.pushsection .rodata.str, "aMS", %progbits, 1
++2:	.asciz	"\msg"
++	.popsection
++	.pushsection __bug_table, "aw"
++	.align	2
++	.word	1b, 2b
++	.hword	\line
++	.popsection
++#endif
++	.endm
++
+ #endif /* __ASM_ASSEMBLER_H__ */
+--- a/arch/arm/kernel/entry-header.S
++++ b/arch/arm/kernel/entry-header.S
+@@ -295,6 +295,8 @@
+ 	mov	r2, sp
+ 	ldr	r1, [r2, #\offset + S_PSR]	@ get calling cpsr
+ 	ldr	lr, [r2, #\offset + S_PC]!	@ get pc
++	tst	r1, #0xcf
++	bne	1f
+ 	msr	spsr_cxsf, r1			@ save in spsr_svc
+ #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)
+ 	@ We must avoid clrex due to Cortex-A15 erratum #830321
+@@ -309,6 +311,7 @@
+ 						@ after ldm {}^
+ 	add	sp, sp, #\offset + S_FRAME_SIZE
+ 	movs	pc, lr				@ return & move spsr_svc into cpsr
++1:	bug	"Returning to usermode but unexpected PSR bits set?", \@
+ #elif defined(CONFIG_CPU_V7M)
+ 	@ V7M restore.
+ 	@ Note that we don't need to do clrex here as clearing the local
+@@ -324,6 +327,8 @@
+ 	ldr	r1, [sp, #\offset + S_PSR]	@ get calling cpsr
+ 	ldr	lr, [sp, #\offset + S_PC]	@ get pc
+ 	add	sp, sp, #\offset + S_SP
++	tst	r1, #0xcf
++	bne	1f
+ 	msr	spsr_cxsf, r1			@ save in spsr_svc
+ 
+ 	@ We must avoid clrex due to Cortex-A15 erratum #830321
+@@ -336,6 +341,7 @@
+ 	.endif
+ 	add	sp, sp, #S_FRAME_SIZE - S_SP
+ 	movs	pc, lr				@ return & move spsr_svc into cpsr
++1:	bug	"Returning to usermode but unexpected PSR bits set?", \@
+ #endif	/* !CONFIG_THUMB2_KERNEL */
+ 	.endm
+ 
diff --git a/queue-4.4/mm-drop-unused-pmdp_huge_get_and_clear_notify.patch b/queue-4.4/mm-drop-unused-pmdp_huge_get_and_clear_notify.patch
new file mode 100644
index 00000000000..7830fbf0354
--- /dev/null
+++ b/queue-4.4/mm-drop-unused-pmdp_huge_get_and_clear_notify.patch
@@ -0,0 +1,56 @@
+From c0c379e2931b05facef538e53bf3b21f283d9a0b Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Thu, 13 Apr 2017 14:56:23 -0700
+Subject: mm: drop unused pmdp_huge_get_and_clear_notify()
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+commit c0c379e2931b05facef538e53bf3b21f283d9a0b upstream.
+
+Dave noticed that after fixing MADV_DONTNEED vs numa balancing race the
+last pmdp_huge_get_and_clear_notify() user is gone.
+
+Let's drop the helper.
+
+Link: http://lkml.kernel.org/r/20170306112047.24809-1-kirill.shutemov@linux.intel.com
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[jwang: adjust context for 4.4]
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mmu_notifier.h |   13 -------------
+ 1 file changed, 13 deletions(-)
+
+--- a/include/linux/mmu_notifier.h
++++ b/include/linux/mmu_notifier.h
+@@ -381,18 +381,6 @@ static inline void mmu_notifier_mm_destr
+ 	___pmd;								\
+ })
+ 
+-#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd)		\
+-({									\
+-	unsigned long ___haddr = __haddr & HPAGE_PMD_MASK;		\
+-	pmd_t ___pmd;							\
+-									\
+-	___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd);		\
+-	mmu_notifier_invalidate_range(__mm, ___haddr,			\
+-				      ___haddr + HPAGE_PMD_SIZE);	\
+-									\
+-	___pmd;								\
+-})
+-
+ /*
+  * set_pte_at_notify() sets the pte _after_ running the notifier.
+  * This is safe to start by updating the secondary MMUs, because the primary MMU
+@@ -475,7 +463,6 @@ static inline void mmu_notifier_mm_destr
+ #define pmdp_clear_young_notify pmdp_test_and_clear_young
+ #define	ptep_clear_flush_notify ptep_clear_flush
+ #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
+-#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear
+ #define set_pte_at_notify set_pte_at
+ 
+ #endif /* CONFIG_MMU_NOTIFIER */
diff --git a/queue-4.4/scsi-storvsc-workaround-for-virtual-dvd-scsi-version.patch b/queue-4.4/scsi-storvsc-workaround-for-virtual-dvd-scsi-version.patch
new file mode 100644
index 00000000000..162f86cdab2
--- /dev/null
+++ b/queue-4.4/scsi-storvsc-workaround-for-virtual-dvd-scsi-version.patch
@@ -0,0 +1,94 @@
+From f1c635b439a5c01776fe3a25b1e2dc546ea82e6f Mon Sep 17 00:00:00 2001
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Tue, 7 Mar 2017 09:15:53 -0800
+Subject: scsi: storvsc: Workaround for virtual DVD SCSI version
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+commit f1c635b439a5c01776fe3a25b1e2dc546ea82e6f upstream.
+
+Hyper-V host emulation of SCSI for virtual DVD device reports SCSI
+version 0 (UNKNOWN) but is still capable of supporting REPORTLUN.
+
+Without this patch, a GEN2 Linux guest on Hyper-V will not boot 4.11
+successfully with virtual DVD ROM device. What happens is that the SCSI
+scan process falls back to doing sequential probing by INQUIRY.  But the
+storvsc driver has a previous workaround that masks/blocks all errors
+reports from INQUIRY (or MODE_SENSE) commands.  This workaround causes
+the scan to then populate a full set of bogus LUN's on the target and
+then sends kernel spinning off into a death spiral doing block reads on
+the non-existent LUNs.
+
+By setting the correct blacklist flags, the target with the DVD device
+is scanned with REPORTLUN and that works correctly.
+
+Patch needs to go in current 4.11, it is safe but not necessary in older
+kernels.
+
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/storvsc_drv.c |   27 +++++++++++++++++----------
+ 1 file changed, 17 insertions(+), 10 deletions(-)
+
+--- a/drivers/scsi/storvsc_drv.c
++++ b/drivers/scsi/storvsc_drv.c
+@@ -379,8 +379,6 @@ MODULE_PARM_DESC(vcpus_per_sub_channel,
+  */
+ static int storvsc_timeout = 180;
+ 
+-static int msft_blist_flags = BLIST_TRY_VPD_PAGES;
+-
+ 
+ static void storvsc_on_channel_callback(void *context);
+ 
+@@ -1241,6 +1239,22 @@ static int storvsc_do_io(struct hv_devic
+ 	return ret;
+ }
+ 
++static int storvsc_device_alloc(struct scsi_device *sdevice)
++{
++	/*
++	 * Set blist flag to permit the reading of the VPD pages even when
++	 * the target may claim SPC-2 compliance. MSFT targets currently
++	 * claim SPC-2 compliance while they implement post SPC-2 features.
++	 * With this flag we can correctly handle WRITE_SAME_16 issues.
++	 *
++	 * Hypervisor reports SCSI_UNKNOWN type for DVD ROM device but
++	 * still supports REPORT LUN.
++	 */
++	sdevice->sdev_bflags = BLIST_REPORTLUN2 | BLIST_TRY_VPD_PAGES;
++
++	return 0;
++}
++
+ static int storvsc_device_configure(struct scsi_device *sdevice)
+ {
+ 
+@@ -1256,14 +1270,6 @@ static int storvsc_device_configure(stru
+ 	sdevice->no_write_same = 1;
+ 
+ 	/*
+-	 * Add blist flags to permit the reading of the VPD pages even when
+-	 * the target may claim SPC-2 compliance. MSFT targets currently
+-	 * claim SPC-2 compliance while they implement post SPC-2 features.
+-	 * With this patch we can correctly handle WRITE_SAME_16 issues.
+-	 */
+-	sdevice->sdev_bflags |= msft_blist_flags;
+-
+-	/*
+ 	 * If the host is WIN8 or WIN8 R2, claim conformance to SPC-3
+ 	 * if the device is a MSFT virtual device.  If the host is
+ 	 * WIN10 or newer, allow write_same.
+@@ -1529,6 +1535,7 @@ static struct scsi_host_template scsi_dr
+ 	.eh_host_reset_handler =	storvsc_host_reset_handler,
+ 	.proc_name =		"storvsc_host",
+ 	.eh_timed_out =		storvsc_eh_timed_out,
++	.slave_alloc =		storvsc_device_alloc,
+ 	.slave_configure =	storvsc_device_configure,
+ 	.cmd_per_lun =		255,
+ 	.this_id =		-1,
diff --git a/queue-4.4/series b/queue-4.4/series
index d231f991fb6..f19b2bf41bc 100644
--- a/queue-4.4/series
+++ b/queue-4.4/series
@@ -30,3 +30,9 @@ media-dvb-i2c-transfers-over-usb-cannot-be-done-from-stack.patch
 arm64-kvm-fix-vttbr_baddr_mask-bug_on-off-by-one.patch
 kvm-vmx-remove-i-o-port-0x80-bypass-on-intel-hosts.patch
 arm64-fpsimd-prevent-registers-leaking-from-dead-tasks.patch
+arm-bug-if-jumping-to-usermode-address-in-kernel-mode.patch
+arm-avoid-faulting-on-qemu.patch
+scsi-storvsc-workaround-for-virtual-dvd-scsi-version.patch
+thp-reduce-indentation-level-in-change_huge_pmd.patch
+thp-fix-madv_dontneed-vs.-numa-balancing-race.patch
+mm-drop-unused-pmdp_huge_get_and_clear_notify.patch
diff --git a/queue-4.4/thp-fix-madv_dontneed-vs.-numa-balancing-race.patch b/queue-4.4/thp-fix-madv_dontneed-vs.-numa-balancing-race.patch
new file mode 100644
index 00000000000..11c40e3a0dd
--- /dev/null
+++ b/queue-4.4/thp-fix-madv_dontneed-vs.-numa-balancing-race.patch
@@ -0,0 +1,85 @@
+From ced108037c2aa542b3ed8b7afd1576064ad1362a Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Thu, 13 Apr 2017 14:56:20 -0700
+Subject: thp: fix MADV_DONTNEED vs. numa balancing race
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+commit ced108037c2aa542b3ed8b7afd1576064ad1362a upstream.
+
+In case prot_numa, we are under down_read(mmap_sem).  It's critical to
+not clear pmd intermittently to avoid race with MADV_DONTNEED which is
+also under down_read(mmap_sem):
+
+	CPU0:				CPU1:
+				change_huge_pmd(prot_numa=1)
+				 pmdp_huge_get_and_clear_notify()
+madvise_dontneed()
+ zap_pmd_range()
+  pmd_trans_huge(*pmd) == 0 (without ptl)
+  // skip the pmd
+				 set_pmd_at();
+				 // pmd is re-established
+
+The race makes MADV_DONTNEED miss the huge pmd and don't clear it
+which may break userspace.
+
+Found by code analysis, never saw triggered.
+
+Link: http://lkml.kernel.org/r/20170302151034.27829-3-kirill.shutemov@linux.intel.com
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[jwang: adjust context for 4.4]
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/huge_memory.c |   34 +++++++++++++++++++++++++++++++++-
+ 1 file changed, 33 insertions(+), 1 deletion(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1588,7 +1588,39 @@ int change_huge_pmd(struct vm_area_struc
+ 	if (prot_numa && pmd_protnone(*pmd))
+ 		goto unlock;
+ 
+-	entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
++	/*
++	 * In case prot_numa, we are under down_read(mmap_sem). It's critical
++	 * to not clear pmd intermittently to avoid race with MADV_DONTNEED
++	 * which is also under down_read(mmap_sem):
++	 *
++	 *	CPU0:				CPU1:
++	 *				change_huge_pmd(prot_numa=1)
++	 *				 pmdp_huge_get_and_clear_notify()
++	 * madvise_dontneed()
++	 *  zap_pmd_range()
++	 *   pmd_trans_huge(*pmd) == 0 (without ptl)
++	 *   // skip the pmd
++	 *				 set_pmd_at();
++	 *				 // pmd is re-established
++	 *
++	 * The race makes MADV_DONTNEED miss the huge pmd and don't clear it
++	 * which may break userspace.
++	 *
++	 * pmdp_invalidate() is required to make sure we don't miss
++	 * dirty/young flags set by hardware.
++	 */
++	entry = *pmd;
++	pmdp_invalidate(vma, addr, pmd);
++
++	/*
++	 * Recover dirty/young flags.  It relies on pmdp_invalidate to not
++	 * corrupt them.
++	 */
++	if (pmd_dirty(*pmd))
++		entry = pmd_mkdirty(entry);
++	if (pmd_young(*pmd))
++		entry = pmd_mkyoung(entry);
++
+ 	entry = pmd_modify(entry, newprot);
+ 	if (preserve_write)
+ 		entry = pmd_mkwrite(entry);
diff --git a/queue-4.4/thp-reduce-indentation-level-in-change_huge_pmd.patch b/queue-4.4/thp-reduce-indentation-level-in-change_huge_pmd.patch
new file mode 100644
index 00000000000..5ceb52458c0
--- /dev/null
+++ b/queue-4.4/thp-reduce-indentation-level-in-change_huge_pmd.patch
@@ -0,0 +1,107 @@
+From 0a85e51d37645e9ce57e5e1a30859e07810ed07c Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Thu, 13 Apr 2017 14:56:17 -0700
+Subject: thp: reduce indentation level in change_huge_pmd()
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+commit 0a85e51d37645e9ce57e5e1a30859e07810ed07c upstream.
+
+Patch series "thp: fix few MADV_DONTNEED races"
+
+For MADV_DONTNEED to work properly with huge pages, it's critical to not
+clear pmd intermittently unless you hold down_write(mmap_sem).
+
+Otherwise MADV_DONTNEED can miss the THP which can lead to userspace
+breakage.
+
+See example of such race in commit message of patch 2/4.
+
+All these races are found by code inspection.  I haven't seen them
+triggered.  I don't think it's worth to apply them to stable@.
+
+This patch (of 4):
+
+Restructure code in preparation for a fix.
+
+Link: http://lkml.kernel.org/r/20170302151034.27829-2-kirill.shutemov@linux.intel.com
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[jwang: adjust context for 4.4 kernel]
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/huge_memory.c |   54 ++++++++++++++++++++++++++++--------------------------
+ 1 file changed, 28 insertions(+), 26 deletions(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1566,35 +1566,37 @@ int change_huge_pmd(struct vm_area_struc
+ {
+ 	struct mm_struct *mm = vma->vm_mm;
+ 	spinlock_t *ptl;
++	pmd_t entry;
++	bool preserve_write;
++
+ 	int ret = 0;
+ 
+-	if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
+-		pmd_t entry;
+-		bool preserve_write = prot_numa && pmd_write(*pmd);
+-		ret = 1;
+-
+-		/*
+-		 * Avoid trapping faults against the zero page. The read-only
+-		 * data is likely to be read-cached on the local CPU and
+-		 * local/remote hits to the zero page are not interesting.
+-		 */
+-		if (prot_numa && is_huge_zero_pmd(*pmd)) {
+-			spin_unlock(ptl);
+-			return ret;
+-		}
+-
+-		if (!prot_numa || !pmd_protnone(*pmd)) {
+-			entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
+-			entry = pmd_modify(entry, newprot);
+-			if (preserve_write)
+-				entry = pmd_mkwrite(entry);
+-			ret = HPAGE_PMD_NR;
+-			set_pmd_at(mm, addr, pmd, entry);
+-			BUG_ON(!preserve_write && pmd_write(entry));
+-		}
+-		spin_unlock(ptl);
+-	}
++	if (__pmd_trans_huge_lock(pmd, vma, &ptl) != 1)
++		return 0;
++
++	preserve_write = prot_numa && pmd_write(*pmd);
++	ret = 1;
++
++	/*
++	 * Avoid trapping faults against the zero page. The read-only
++	 * data is likely to be read-cached on the local CPU and
++	 * local/remote hits to the zero page are not interesting.
++	 */
++	if (prot_numa && is_huge_zero_pmd(*pmd))
++		goto unlock;
++
++	if (prot_numa && pmd_protnone(*pmd))
++		goto unlock;
+ 
++	entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
++	entry = pmd_modify(entry, newprot);
++	if (preserve_write)
++		entry = pmd_mkwrite(entry);
++	ret = HPAGE_PMD_NR;
++	set_pmd_at(mm, addr, pmd, entry);
++	BUG_ON(!preserve_write && pmd_write(entry));
++unlock:
++	spin_unlock(ptl);
+ 	return ret;
+ }
+