]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 12 Dec 2017 08:32:49 +0000 (09:32 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 12 Dec 2017 08:32:49 +0000 (09:32 +0100)
added patches:
arm-avoid-faulting-on-qemu.patch
arm-bug-if-jumping-to-usermode-address-in-kernel-mode.patch
mm-drop-unused-pmdp_huge_get_and_clear_notify.patch
scsi-storvsc-workaround-for-virtual-dvd-scsi-version.patch
thp-fix-madv_dontneed-vs.-numa-balancing-race.patch
thp-reduce-indentation-level-in-change_huge_pmd.patch

queue-4.4/arm-avoid-faulting-on-qemu.patch [new file with mode: 0644]
queue-4.4/arm-bug-if-jumping-to-usermode-address-in-kernel-mode.patch [new file with mode: 0644]
queue-4.4/mm-drop-unused-pmdp_huge_get_and_clear_notify.patch [new file with mode: 0644]
queue-4.4/scsi-storvsc-workaround-for-virtual-dvd-scsi-version.patch [new file with mode: 0644]
queue-4.4/series
queue-4.4/thp-fix-madv_dontneed-vs.-numa-balancing-race.patch [new file with mode: 0644]
queue-4.4/thp-reduce-indentation-level-in-change_huge_pmd.patch [new file with mode: 0644]

diff --git a/queue-4.4/arm-avoid-faulting-on-qemu.patch b/queue-4.4/arm-avoid-faulting-on-qemu.patch
new file mode 100644 (file)
index 0000000..526c9e4
--- /dev/null
@@ -0,0 +1,48 @@
+From 3aaf33bebda8d4ffcc0fc8ef39e6c1ac68823b11 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Mon, 27 Nov 2017 11:22:42 +0000
+Subject: ARM: avoid faulting on qemu
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit 3aaf33bebda8d4ffcc0fc8ef39e6c1ac68823b11 upstream.
+
+When qemu starts a kernel in a bare environment, the default SCR has
+the AW and FW bits clear, which means that the kernel can't modify
+the PSR A or PSR F bits, and means that FIQs and imprecise aborts are
+always masked.
+
+When running uboot under qemu, the AW and FW SCR bits are set, and the
+kernel functions normally - and this is how real hardware behaves.
+
+Fix this for qemu by ignoring the FIQ bit.
+
+Fixes: 8bafae202c82 ("ARM: BUG if jumping to usermode address in kernel mode")
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Cc: Alex Shi <alex.shi@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/kernel/entry-header.S |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/arm/kernel/entry-header.S
++++ b/arch/arm/kernel/entry-header.S
+@@ -295,7 +295,7 @@
+       mov     r2, sp
+       ldr     r1, [r2, #\offset + S_PSR]      @ get calling cpsr
+       ldr     lr, [r2, #\offset + S_PC]!      @ get pc
+-      tst     r1, #0xcf
++      tst     r1, #PSR_I_BIT | 0x0f
+       bne     1f
+       msr     spsr_cxsf, r1                   @ save in spsr_svc
+ #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)
+@@ -327,7 +327,7 @@
+       ldr     r1, [sp, #\offset + S_PSR]      @ get calling cpsr
+       ldr     lr, [sp, #\offset + S_PC]       @ get pc
+       add     sp, sp, #\offset + S_SP
+-      tst     r1, #0xcf
++      tst     r1, #PSR_I_BIT | 0x0f
+       bne     1f
+       msr     spsr_cxsf, r1                   @ save in spsr_svc
diff --git a/queue-4.4/arm-bug-if-jumping-to-usermode-address-in-kernel-mode.patch b/queue-4.4/arm-bug-if-jumping-to-usermode-address-in-kernel-mode.patch
new file mode 100644 (file)
index 0000000..437bb4d
--- /dev/null
@@ -0,0 +1,86 @@
+From 8bafae202c82dc257f649ea3c275a0f35ee15113 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Fri, 24 Nov 2017 23:49:34 +0000
+Subject: ARM: BUG if jumping to usermode address in kernel mode
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit 8bafae202c82dc257f649ea3c275a0f35ee15113 upstream.
+
+Detect if we are returning to usermode via the normal kernel exit paths
+but the saved PSR value indicates that we are in kernel mode.  This
+could occur due to corrupted stack state, which has been observed with
+"ftracetest".
+
+This ensures that we catch the problem case before we get to user code.
+
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Cc: Alex Shi <alex.shi@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/arm/include/asm/assembler.h |   18 ++++++++++++++++++
+ arch/arm/kernel/entry-header.S   |    6 ++++++
+ 2 files changed, 24 insertions(+)
+
+--- a/arch/arm/include/asm/assembler.h
++++ b/arch/arm/include/asm/assembler.h
+@@ -512,4 +512,22 @@ THUMB(    orr     \reg , \reg , #PSR_T_BIT        )
+ #endif
+       .endm
++      .macro  bug, msg, line
++#ifdef CONFIG_THUMB2_KERNEL
++1:    .inst   0xde02
++#else
++1:    .inst   0xe7f001f2
++#endif
++#ifdef CONFIG_DEBUG_BUGVERBOSE
++      .pushsection .rodata.str, "aMS", %progbits, 1
++2:    .asciz  "\msg"
++      .popsection
++      .pushsection __bug_table, "aw"
++      .align  2
++      .word   1b, 2b
++      .hword  \line
++      .popsection
++#endif
++      .endm
++
+ #endif /* __ASM_ASSEMBLER_H__ */
+--- a/arch/arm/kernel/entry-header.S
++++ b/arch/arm/kernel/entry-header.S
+@@ -295,6 +295,8 @@
+       mov     r2, sp
+       ldr     r1, [r2, #\offset + S_PSR]      @ get calling cpsr
+       ldr     lr, [r2, #\offset + S_PC]!      @ get pc
++      tst     r1, #0xcf
++      bne     1f
+       msr     spsr_cxsf, r1                   @ save in spsr_svc
+ #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)
+       @ We must avoid clrex due to Cortex-A15 erratum #830321
+@@ -309,6 +311,7 @@
+                                               @ after ldm {}^
+       add     sp, sp, #\offset + S_FRAME_SIZE
+       movs    pc, lr                          @ return & move spsr_svc into cpsr
++1:    bug     "Returning to usermode but unexpected PSR bits set?", \@
+ #elif defined(CONFIG_CPU_V7M)
+       @ V7M restore.
+       @ Note that we don't need to do clrex here as clearing the local
+@@ -324,6 +327,8 @@
+       ldr     r1, [sp, #\offset + S_PSR]      @ get calling cpsr
+       ldr     lr, [sp, #\offset + S_PC]       @ get pc
+       add     sp, sp, #\offset + S_SP
++      tst     r1, #0xcf
++      bne     1f
+       msr     spsr_cxsf, r1                   @ save in spsr_svc
+       @ We must avoid clrex due to Cortex-A15 erratum #830321
+@@ -336,6 +341,7 @@
+       .endif
+       add     sp, sp, #S_FRAME_SIZE - S_SP
+       movs    pc, lr                          @ return & move spsr_svc into cpsr
++1:    bug     "Returning to usermode but unexpected PSR bits set?", \@
+ #endif        /* !CONFIG_THUMB2_KERNEL */
+       .endm
diff --git a/queue-4.4/mm-drop-unused-pmdp_huge_get_and_clear_notify.patch b/queue-4.4/mm-drop-unused-pmdp_huge_get_and_clear_notify.patch
new file mode 100644 (file)
index 0000000..7830fbf
--- /dev/null
@@ -0,0 +1,56 @@
+From c0c379e2931b05facef538e53bf3b21f283d9a0b Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Thu, 13 Apr 2017 14:56:23 -0700
+Subject: mm: drop unused pmdp_huge_get_and_clear_notify()
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+commit c0c379e2931b05facef538e53bf3b21f283d9a0b upstream.
+
+Dave noticed that after fixing MADV_DONTNEED vs numa balancing race the
+last pmdp_huge_get_and_clear_notify() user is gone.
+
+Let's drop the helper.
+
+Link: http://lkml.kernel.org/r/20170306112047.24809-1-kirill.shutemov@linux.intel.com
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[jwang: adjust context for 4.4]
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mmu_notifier.h |   13 -------------
+ 1 file changed, 13 deletions(-)
+
+--- a/include/linux/mmu_notifier.h
++++ b/include/linux/mmu_notifier.h
+@@ -381,18 +381,6 @@ static inline void mmu_notifier_mm_destr
+       ___pmd;                                                         \
+ })
+-#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd)          \
+-({                                                                    \
+-      unsigned long ___haddr = __haddr & HPAGE_PMD_MASK;              \
+-      pmd_t ___pmd;                                                   \
+-                                                                      \
+-      ___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd);         \
+-      mmu_notifier_invalidate_range(__mm, ___haddr,                   \
+-                                    ___haddr + HPAGE_PMD_SIZE);       \
+-                                                                      \
+-      ___pmd;                                                         \
+-})
+-
+ /*
+  * set_pte_at_notify() sets the pte _after_ running the notifier.
+  * This is safe to start by updating the secondary MMUs, because the primary MMU
+@@ -475,7 +463,6 @@ static inline void mmu_notifier_mm_destr
+ #define pmdp_clear_young_notify pmdp_test_and_clear_young
+ #define       ptep_clear_flush_notify ptep_clear_flush
+ #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
+-#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear
+ #define set_pte_at_notify set_pte_at
+ #endif /* CONFIG_MMU_NOTIFIER */
diff --git a/queue-4.4/scsi-storvsc-workaround-for-virtual-dvd-scsi-version.patch b/queue-4.4/scsi-storvsc-workaround-for-virtual-dvd-scsi-version.patch
new file mode 100644 (file)
index 0000000..162f86c
--- /dev/null
@@ -0,0 +1,94 @@
+From f1c635b439a5c01776fe3a25b1e2dc546ea82e6f Mon Sep 17 00:00:00 2001
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Tue, 7 Mar 2017 09:15:53 -0800
+Subject: scsi: storvsc: Workaround for virtual DVD SCSI version
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+commit f1c635b439a5c01776fe3a25b1e2dc546ea82e6f upstream.
+
+Hyper-V host emulation of SCSI for virtual DVD device reports SCSI
+version 0 (UNKNOWN) but is still capable of supporting REPORTLUN.
+
+Without this patch, a GEN2 Linux guest on Hyper-V will not boot 4.11
+successfully with virtual DVD ROM device. What happens is that the SCSI
+scan process falls back to doing sequential probing by INQUIRY.  But the
+storvsc driver has a previous workaround that masks/blocks all errors
+reports from INQUIRY (or MODE_SENSE) commands.  This workaround causes
+the scan to then populate a full set of bogus LUN's on the target and
+then sends kernel spinning off into a death spiral doing block reads on
+the non-existent LUNs.
+
+By setting the correct blacklist flags, the target with the DVD device
+is scanned with REPORTLUN and that works correctly.
+
+Patch needs to go in current 4.11, it is safe but not necessary in older
+kernels.
+
+Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
+Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/storvsc_drv.c |   27 +++++++++++++++++----------
+ 1 file changed, 17 insertions(+), 10 deletions(-)
+
+--- a/drivers/scsi/storvsc_drv.c
++++ b/drivers/scsi/storvsc_drv.c
+@@ -379,8 +379,6 @@ MODULE_PARM_DESC(vcpus_per_sub_channel,
+  */
+ static int storvsc_timeout = 180;
+-static int msft_blist_flags = BLIST_TRY_VPD_PAGES;
+-
+ static void storvsc_on_channel_callback(void *context);
+@@ -1241,6 +1239,22 @@ static int storvsc_do_io(struct hv_devic
+       return ret;
+ }
++static int storvsc_device_alloc(struct scsi_device *sdevice)
++{
++      /*
++       * Set blist flag to permit the reading of the VPD pages even when
++       * the target may claim SPC-2 compliance. MSFT targets currently
++       * claim SPC-2 compliance while they implement post SPC-2 features.
++       * With this flag we can correctly handle WRITE_SAME_16 issues.
++       *
++       * Hypervisor reports SCSI_UNKNOWN type for DVD ROM device but
++       * still supports REPORT LUN.
++       */
++      sdevice->sdev_bflags = BLIST_REPORTLUN2 | BLIST_TRY_VPD_PAGES;
++
++      return 0;
++}
++
+ static int storvsc_device_configure(struct scsi_device *sdevice)
+ {
+@@ -1256,14 +1270,6 @@ static int storvsc_device_configure(stru
+       sdevice->no_write_same = 1;
+       /*
+-       * Add blist flags to permit the reading of the VPD pages even when
+-       * the target may claim SPC-2 compliance. MSFT targets currently
+-       * claim SPC-2 compliance while they implement post SPC-2 features.
+-       * With this patch we can correctly handle WRITE_SAME_16 issues.
+-       */
+-      sdevice->sdev_bflags |= msft_blist_flags;
+-
+-      /*
+        * If the host is WIN8 or WIN8 R2, claim conformance to SPC-3
+        * if the device is a MSFT virtual device.  If the host is
+        * WIN10 or newer, allow write_same.
+@@ -1529,6 +1535,7 @@ static struct scsi_host_template scsi_dr
+       .eh_host_reset_handler =        storvsc_host_reset_handler,
+       .proc_name =            "storvsc_host",
+       .eh_timed_out =         storvsc_eh_timed_out,
++      .slave_alloc =          storvsc_device_alloc,
+       .slave_configure =      storvsc_device_configure,
+       .cmd_per_lun =          255,
+       .this_id =              -1,
index d231f991fb6d1d6e1d2d083d0f7c0c7aabbd1ed4..f19b2bf41bcea1ccb12d5fd3f6b8406bd0ca61b9 100644 (file)
@@ -30,3 +30,9 @@ media-dvb-i2c-transfers-over-usb-cannot-be-done-from-stack.patch
 arm64-kvm-fix-vttbr_baddr_mask-bug_on-off-by-one.patch
 kvm-vmx-remove-i-o-port-0x80-bypass-on-intel-hosts.patch
 arm64-fpsimd-prevent-registers-leaking-from-dead-tasks.patch
+arm-bug-if-jumping-to-usermode-address-in-kernel-mode.patch
+arm-avoid-faulting-on-qemu.patch
+scsi-storvsc-workaround-for-virtual-dvd-scsi-version.patch
+thp-reduce-indentation-level-in-change_huge_pmd.patch
+thp-fix-madv_dontneed-vs.-numa-balancing-race.patch
+mm-drop-unused-pmdp_huge_get_and_clear_notify.patch
diff --git a/queue-4.4/thp-fix-madv_dontneed-vs.-numa-balancing-race.patch b/queue-4.4/thp-fix-madv_dontneed-vs.-numa-balancing-race.patch
new file mode 100644 (file)
index 0000000..11c40e3
--- /dev/null
@@ -0,0 +1,85 @@
+From ced108037c2aa542b3ed8b7afd1576064ad1362a Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Thu, 13 Apr 2017 14:56:20 -0700
+Subject: thp: fix MADV_DONTNEED vs. numa balancing race
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+commit ced108037c2aa542b3ed8b7afd1576064ad1362a upstream.
+
+In case prot_numa, we are under down_read(mmap_sem).  It's critical to
+not clear pmd intermittently to avoid race with MADV_DONTNEED which is
+also under down_read(mmap_sem):
+
+       CPU0:                           CPU1:
+                               change_huge_pmd(prot_numa=1)
+                                pmdp_huge_get_and_clear_notify()
+madvise_dontneed()
+ zap_pmd_range()
+  pmd_trans_huge(*pmd) == 0 (without ptl)
+  // skip the pmd
+                                set_pmd_at();
+                                // pmd is re-established
+
+The race makes MADV_DONTNEED miss the huge pmd and don't clear it
+which may break userspace.
+
+Found by code analysis, never saw triggered.
+
+Link: http://lkml.kernel.org/r/20170302151034.27829-3-kirill.shutemov@linux.intel.com
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[jwang: adjust context for 4.4]
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/huge_memory.c |   34 +++++++++++++++++++++++++++++++++-
+ 1 file changed, 33 insertions(+), 1 deletion(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1588,7 +1588,39 @@ int change_huge_pmd(struct vm_area_struc
+       if (prot_numa && pmd_protnone(*pmd))
+               goto unlock;
+-      entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
++      /*
++       * In case prot_numa, we are under down_read(mmap_sem). It's critical
++       * to not clear pmd intermittently to avoid race with MADV_DONTNEED
++       * which is also under down_read(mmap_sem):
++       *
++       *      CPU0:                           CPU1:
++       *                              change_huge_pmd(prot_numa=1)
++       *                               pmdp_huge_get_and_clear_notify()
++       * madvise_dontneed()
++       *  zap_pmd_range()
++       *   pmd_trans_huge(*pmd) == 0 (without ptl)
++       *   // skip the pmd
++       *                               set_pmd_at();
++       *                               // pmd is re-established
++       *
++       * The race makes MADV_DONTNEED miss the huge pmd and don't clear it
++       * which may break userspace.
++       *
++       * pmdp_invalidate() is required to make sure we don't miss
++       * dirty/young flags set by hardware.
++       */
++      entry = *pmd;
++      pmdp_invalidate(vma, addr, pmd);
++
++      /*
++       * Recover dirty/young flags.  It relies on pmdp_invalidate to not
++       * corrupt them.
++       */
++      if (pmd_dirty(*pmd))
++              entry = pmd_mkdirty(entry);
++      if (pmd_young(*pmd))
++              entry = pmd_mkyoung(entry);
++
+       entry = pmd_modify(entry, newprot);
+       if (preserve_write)
+               entry = pmd_mkwrite(entry);
diff --git a/queue-4.4/thp-reduce-indentation-level-in-change_huge_pmd.patch b/queue-4.4/thp-reduce-indentation-level-in-change_huge_pmd.patch
new file mode 100644 (file)
index 0000000..5ceb524
--- /dev/null
@@ -0,0 +1,107 @@
+From 0a85e51d37645e9ce57e5e1a30859e07810ed07c Mon Sep 17 00:00:00 2001
+From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
+Date: Thu, 13 Apr 2017 14:56:17 -0700
+Subject: thp: reduce indentation level in change_huge_pmd()
+
+From: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+
+commit 0a85e51d37645e9ce57e5e1a30859e07810ed07c upstream.
+
+Patch series "thp: fix few MADV_DONTNEED races"
+
+For MADV_DONTNEED to work properly with huge pages, it's critical to not
+clear pmd intermittently unless you hold down_write(mmap_sem).
+
+Otherwise MADV_DONTNEED can miss the THP which can lead to userspace
+breakage.
+
+See example of such race in commit message of patch 2/4.
+
+All these races are found by code inspection.  I haven't seen them
+triggered.  I don't think it's worth to apply them to stable@.
+
+This patch (of 4):
+
+Restructure code in preparation for a fix.
+
+Link: http://lkml.kernel.org/r/20170302151034.27829-2-kirill.shutemov@linux.intel.com
+Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[jwang: adjust context for 4.4 kernel]
+Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/huge_memory.c |   54 ++++++++++++++++++++++++++++--------------------------
+ 1 file changed, 28 insertions(+), 26 deletions(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1566,35 +1566,37 @@ int change_huge_pmd(struct vm_area_struc
+ {
+       struct mm_struct *mm = vma->vm_mm;
+       spinlock_t *ptl;
++      pmd_t entry;
++      bool preserve_write;
++
+       int ret = 0;
+-      if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
+-              pmd_t entry;
+-              bool preserve_write = prot_numa && pmd_write(*pmd);
+-              ret = 1;
+-
+-              /*
+-               * Avoid trapping faults against the zero page. The read-only
+-               * data is likely to be read-cached on the local CPU and
+-               * local/remote hits to the zero page are not interesting.
+-               */
+-              if (prot_numa && is_huge_zero_pmd(*pmd)) {
+-                      spin_unlock(ptl);
+-                      return ret;
+-              }
+-
+-              if (!prot_numa || !pmd_protnone(*pmd)) {
+-                      entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
+-                      entry = pmd_modify(entry, newprot);
+-                      if (preserve_write)
+-                              entry = pmd_mkwrite(entry);
+-                      ret = HPAGE_PMD_NR;
+-                      set_pmd_at(mm, addr, pmd, entry);
+-                      BUG_ON(!preserve_write && pmd_write(entry));
+-              }
+-              spin_unlock(ptl);
+-      }
++      if (__pmd_trans_huge_lock(pmd, vma, &ptl) != 1)
++              return 0;
++
++      preserve_write = prot_numa && pmd_write(*pmd);
++      ret = 1;
++
++      /*
++       * Avoid trapping faults against the zero page. The read-only
++       * data is likely to be read-cached on the local CPU and
++       * local/remote hits to the zero page are not interesting.
++       */
++      if (prot_numa && is_huge_zero_pmd(*pmd))
++              goto unlock;
++
++      if (prot_numa && pmd_protnone(*pmd))
++              goto unlock;
++      entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
++      entry = pmd_modify(entry, newprot);
++      if (preserve_write)
++              entry = pmd_mkwrite(entry);
++      ret = HPAGE_PMD_NR;
++      set_pmd_at(mm, addr, pmd, entry);
++      BUG_ON(!preserve_write && pmd_write(entry));
++unlock:
++      spin_unlock(ptl);
+       return ret;
+ }