From: Greg Kroah-Hartman Date: Tue, 12 Dec 2017 08:32:49 +0000 (+0100) Subject: 4.4-stable patches X-Git-Tag: v4.9.69~9 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7570806f70057bdc7e091738560819dfb63f921c;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: arm-avoid-faulting-on-qemu.patch arm-bug-if-jumping-to-usermode-address-in-kernel-mode.patch mm-drop-unused-pmdp_huge_get_and_clear_notify.patch scsi-storvsc-workaround-for-virtual-dvd-scsi-version.patch thp-fix-madv_dontneed-vs.-numa-balancing-race.patch thp-reduce-indentation-level-in-change_huge_pmd.patch --- diff --git a/queue-4.4/arm-avoid-faulting-on-qemu.patch b/queue-4.4/arm-avoid-faulting-on-qemu.patch new file mode 100644 index 00000000000..526c9e42d4f --- /dev/null +++ b/queue-4.4/arm-avoid-faulting-on-qemu.patch @@ -0,0 +1,48 @@ +From 3aaf33bebda8d4ffcc0fc8ef39e6c1ac68823b11 Mon Sep 17 00:00:00 2001 +From: Russell King +Date: Mon, 27 Nov 2017 11:22:42 +0000 +Subject: ARM: avoid faulting on qemu + +From: Russell King + +commit 3aaf33bebda8d4ffcc0fc8ef39e6c1ac68823b11 upstream. + +When qemu starts a kernel in a bare environment, the default SCR has +the AW and FW bits clear, which means that the kernel can't modify +the PSR A or PSR F bits, and means that FIQs and imprecise aborts are +always masked. + +When running uboot under qemu, the AW and FW SCR bits are set, and the +kernel functions normally - and this is how real hardware behaves. + +Fix this for qemu by ignoring the FIQ bit. + +Fixes: 8bafae202c82 ("ARM: BUG if jumping to usermode address in kernel mode") +Signed-off-by: Russell King +Cc: Alex Shi +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/kernel/entry-header.S | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/arm/kernel/entry-header.S ++++ b/arch/arm/kernel/entry-header.S +@@ -295,7 +295,7 @@ + mov r2, sp + ldr r1, [r2, #\offset + S_PSR] @ get calling cpsr + ldr lr, [r2, #\offset + S_PC]! @ get pc +- tst r1, #0xcf ++ tst r1, #PSR_I_BIT | 0x0f + bne 1f + msr spsr_cxsf, r1 @ save in spsr_svc + #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) +@@ -327,7 +327,7 @@ + ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr + ldr lr, [sp, #\offset + S_PC] @ get pc + add sp, sp, #\offset + S_SP +- tst r1, #0xcf ++ tst r1, #PSR_I_BIT | 0x0f + bne 1f + msr spsr_cxsf, r1 @ save in spsr_svc + diff --git a/queue-4.4/arm-bug-if-jumping-to-usermode-address-in-kernel-mode.patch b/queue-4.4/arm-bug-if-jumping-to-usermode-address-in-kernel-mode.patch new file mode 100644 index 00000000000..437bb4daa1a --- /dev/null +++ b/queue-4.4/arm-bug-if-jumping-to-usermode-address-in-kernel-mode.patch @@ -0,0 +1,86 @@ +From 8bafae202c82dc257f649ea3c275a0f35ee15113 Mon Sep 17 00:00:00 2001 +From: Russell King +Date: Fri, 24 Nov 2017 23:49:34 +0000 +Subject: ARM: BUG if jumping to usermode address in kernel mode + +From: Russell King + +commit 8bafae202c82dc257f649ea3c275a0f35ee15113 upstream. + +Detect if we are returning to usermode via the normal kernel exit paths +but the saved PSR value indicates that we are in kernel mode. This +could occur due to corrupted stack state, which has been observed with +"ftracetest". + +This ensures that we catch the problem case before we get to user code. + +Signed-off-by: Russell King +Cc: Alex Shi +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/include/asm/assembler.h | 18 ++++++++++++++++++ + arch/arm/kernel/entry-header.S | 6 ++++++ + 2 files changed, 24 insertions(+) + +--- a/arch/arm/include/asm/assembler.h ++++ b/arch/arm/include/asm/assembler.h +@@ -512,4 +512,22 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) + #endif + .endm + ++ .macro bug, msg, line ++#ifdef CONFIG_THUMB2_KERNEL ++1: .inst 0xde02 ++#else ++1: .inst 0xe7f001f2 ++#endif ++#ifdef CONFIG_DEBUG_BUGVERBOSE ++ .pushsection .rodata.str, "aMS", %progbits, 1 ++2: .asciz "\msg" ++ .popsection ++ .pushsection __bug_table, "aw" ++ .align 2 ++ .word 1b, 2b ++ .hword \line ++ .popsection ++#endif ++ .endm ++ + #endif /* __ASM_ASSEMBLER_H__ */ +--- a/arch/arm/kernel/entry-header.S ++++ b/arch/arm/kernel/entry-header.S +@@ -295,6 +295,8 @@ + mov r2, sp + ldr r1, [r2, #\offset + S_PSR] @ get calling cpsr + ldr lr, [r2, #\offset + S_PC]! @ get pc ++ tst r1, #0xcf ++ bne 1f + msr spsr_cxsf, r1 @ save in spsr_svc + #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) + @ We must avoid clrex due to Cortex-A15 erratum #830321 +@@ -309,6 +311,7 @@ + @ after ldm {}^ + add sp, sp, #\offset + S_FRAME_SIZE + movs pc, lr @ return & move spsr_svc into cpsr ++1: bug "Returning to usermode but unexpected PSR bits set?", \@ + #elif defined(CONFIG_CPU_V7M) + @ V7M restore. + @ Note that we don't need to do clrex here as clearing the local +@@ -324,6 +327,8 @@ + ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr + ldr lr, [sp, #\offset + S_PC] @ get pc + add sp, sp, #\offset + S_SP ++ tst r1, #0xcf ++ bne 1f + msr spsr_cxsf, r1 @ save in spsr_svc + + @ We must avoid clrex due to Cortex-A15 erratum #830321 +@@ -336,6 +341,7 @@ + .endif + add sp, sp, #S_FRAME_SIZE - S_SP + movs pc, lr @ return & move spsr_svc into cpsr ++1: bug "Returning to usermode but unexpected PSR bits set?", \@ + #endif /* !CONFIG_THUMB2_KERNEL */ + .endm + diff --git a/queue-4.4/mm-drop-unused-pmdp_huge_get_and_clear_notify.patch b/queue-4.4/mm-drop-unused-pmdp_huge_get_and_clear_notify.patch new file mode 100644 index 00000000000..7830fbf0354 --- /dev/null +++ b/queue-4.4/mm-drop-unused-pmdp_huge_get_and_clear_notify.patch @@ -0,0 +1,56 @@ +From c0c379e2931b05facef538e53bf3b21f283d9a0b Mon Sep 17 00:00:00 2001 +From: "Kirill A. Shutemov" +Date: Thu, 13 Apr 2017 14:56:23 -0700 +Subject: mm: drop unused pmdp_huge_get_and_clear_notify() + +From: Kirill A. Shutemov + +commit c0c379e2931b05facef538e53bf3b21f283d9a0b upstream. + +Dave noticed that after fixing MADV_DONTNEED vs numa balancing race the +last pmdp_huge_get_and_clear_notify() user is gone. + +Let's drop the helper. + +Link: http://lkml.kernel.org/r/20170306112047.24809-1-kirill.shutemov@linux.intel.com +Signed-off-by: Kirill A. Shutemov +Cc: Dave Hansen +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +[jwang: adjust context for 4.4] +Signed-off-by: Jack Wang +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mmu_notifier.h | 13 ------------- + 1 file changed, 13 deletions(-) + +--- a/include/linux/mmu_notifier.h ++++ b/include/linux/mmu_notifier.h +@@ -381,18 +381,6 @@ static inline void mmu_notifier_mm_destr + ___pmd; \ + }) + +-#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \ +-({ \ +- unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ +- pmd_t ___pmd; \ +- \ +- ___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd); \ +- mmu_notifier_invalidate_range(__mm, ___haddr, \ +- ___haddr + HPAGE_PMD_SIZE); \ +- \ +- ___pmd; \ +-}) +- + /* + * set_pte_at_notify() sets the pte _after_ running the notifier. + * This is safe to start by updating the secondary MMUs, because the primary MMU +@@ -475,7 +463,6 @@ static inline void mmu_notifier_mm_destr + #define pmdp_clear_young_notify pmdp_test_and_clear_young + #define ptep_clear_flush_notify ptep_clear_flush + #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush +-#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear + #define set_pte_at_notify set_pte_at + + #endif /* CONFIG_MMU_NOTIFIER */ diff --git a/queue-4.4/scsi-storvsc-workaround-for-virtual-dvd-scsi-version.patch b/queue-4.4/scsi-storvsc-workaround-for-virtual-dvd-scsi-version.patch new file mode 100644 index 00000000000..162f86cdab2 --- /dev/null +++ b/queue-4.4/scsi-storvsc-workaround-for-virtual-dvd-scsi-version.patch @@ -0,0 +1,94 @@ +From f1c635b439a5c01776fe3a25b1e2dc546ea82e6f Mon Sep 17 00:00:00 2001 +From: Stephen Hemminger +Date: Tue, 7 Mar 2017 09:15:53 -0800 +Subject: scsi: storvsc: Workaround for virtual DVD SCSI version + +From: Stephen Hemminger + +commit f1c635b439a5c01776fe3a25b1e2dc546ea82e6f upstream. + +Hyper-V host emulation of SCSI for virtual DVD device reports SCSI +version 0 (UNKNOWN) but is still capable of supporting REPORTLUN. + +Without this patch, a GEN2 Linux guest on Hyper-V will not boot 4.11 +successfully with virtual DVD ROM device. What happens is that the SCSI +scan process falls back to doing sequential probing by INQUIRY. But the +storvsc driver has a previous workaround that masks/blocks all errors +reports from INQUIRY (or MODE_SENSE) commands. This workaround causes +the scan to then populate a full set of bogus LUN's on the target and +then sends kernel spinning off into a death spiral doing block reads on +the non-existent LUNs. + +By setting the correct blacklist flags, the target with the DVD device +is scanned with REPORTLUN and that works correctly. + +Patch needs to go in current 4.11, it is safe but not necessary in older +kernels. + +Signed-off-by: Stephen Hemminger +Reviewed-by: K. Y. Srinivasan +Reviewed-by: Christoph Hellwig +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/storvsc_drv.c | 27 +++++++++++++++++---------- + 1 file changed, 17 insertions(+), 10 deletions(-) + +--- a/drivers/scsi/storvsc_drv.c ++++ b/drivers/scsi/storvsc_drv.c +@@ -379,8 +379,6 @@ MODULE_PARM_DESC(vcpus_per_sub_channel, + */ + static int storvsc_timeout = 180; + +-static int msft_blist_flags = BLIST_TRY_VPD_PAGES; +- + + static void storvsc_on_channel_callback(void *context); + +@@ -1241,6 +1239,22 @@ static int storvsc_do_io(struct hv_devic + return ret; + } + ++static int storvsc_device_alloc(struct scsi_device *sdevice) ++{ ++ /* ++ * Set blist flag to permit the reading of the VPD pages even when ++ * the target may claim SPC-2 compliance. MSFT targets currently ++ * claim SPC-2 compliance while they implement post SPC-2 features. ++ * With this flag we can correctly handle WRITE_SAME_16 issues. ++ * ++ * Hypervisor reports SCSI_UNKNOWN type for DVD ROM device but ++ * still supports REPORT LUN. ++ */ ++ sdevice->sdev_bflags = BLIST_REPORTLUN2 | BLIST_TRY_VPD_PAGES; ++ ++ return 0; ++} ++ + static int storvsc_device_configure(struct scsi_device *sdevice) + { + +@@ -1256,14 +1270,6 @@ static int storvsc_device_configure(stru + sdevice->no_write_same = 1; + + /* +- * Add blist flags to permit the reading of the VPD pages even when +- * the target may claim SPC-2 compliance. MSFT targets currently +- * claim SPC-2 compliance while they implement post SPC-2 features. +- * With this patch we can correctly handle WRITE_SAME_16 issues. +- */ +- sdevice->sdev_bflags |= msft_blist_flags; +- +- /* + * If the host is WIN8 or WIN8 R2, claim conformance to SPC-3 + * if the device is a MSFT virtual device. If the host is + * WIN10 or newer, allow write_same. +@@ -1529,6 +1535,7 @@ static struct scsi_host_template scsi_dr + .eh_host_reset_handler = storvsc_host_reset_handler, + .proc_name = "storvsc_host", + .eh_timed_out = storvsc_eh_timed_out, ++ .slave_alloc = storvsc_device_alloc, + .slave_configure = storvsc_device_configure, + .cmd_per_lun = 255, + .this_id = -1, diff --git a/queue-4.4/series b/queue-4.4/series index d231f991fb6..f19b2bf41bc 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -30,3 +30,9 @@ media-dvb-i2c-transfers-over-usb-cannot-be-done-from-stack.patch arm64-kvm-fix-vttbr_baddr_mask-bug_on-off-by-one.patch kvm-vmx-remove-i-o-port-0x80-bypass-on-intel-hosts.patch arm64-fpsimd-prevent-registers-leaking-from-dead-tasks.patch +arm-bug-if-jumping-to-usermode-address-in-kernel-mode.patch +arm-avoid-faulting-on-qemu.patch +scsi-storvsc-workaround-for-virtual-dvd-scsi-version.patch +thp-reduce-indentation-level-in-change_huge_pmd.patch +thp-fix-madv_dontneed-vs.-numa-balancing-race.patch +mm-drop-unused-pmdp_huge_get_and_clear_notify.patch diff --git a/queue-4.4/thp-fix-madv_dontneed-vs.-numa-balancing-race.patch b/queue-4.4/thp-fix-madv_dontneed-vs.-numa-balancing-race.patch new file mode 100644 index 00000000000..11c40e3a0dd --- /dev/null +++ b/queue-4.4/thp-fix-madv_dontneed-vs.-numa-balancing-race.patch @@ -0,0 +1,85 @@ +From ced108037c2aa542b3ed8b7afd1576064ad1362a Mon Sep 17 00:00:00 2001 +From: "Kirill A. Shutemov" +Date: Thu, 13 Apr 2017 14:56:20 -0700 +Subject: thp: fix MADV_DONTNEED vs. numa balancing race + +From: Kirill A. Shutemov + +commit ced108037c2aa542b3ed8b7afd1576064ad1362a upstream. + +In case prot_numa, we are under down_read(mmap_sem). It's critical to +not clear pmd intermittently to avoid race with MADV_DONTNEED which is +also under down_read(mmap_sem): + + CPU0: CPU1: + change_huge_pmd(prot_numa=1) + pmdp_huge_get_and_clear_notify() +madvise_dontneed() + zap_pmd_range() + pmd_trans_huge(*pmd) == 0 (without ptl) + // skip the pmd + set_pmd_at(); + // pmd is re-established + +The race makes MADV_DONTNEED miss the huge pmd and don't clear it +which may break userspace. + +Found by code analysis, never saw triggered. + +Link: http://lkml.kernel.org/r/20170302151034.27829-3-kirill.shutemov@linux.intel.com +Signed-off-by: Kirill A. Shutemov +Cc: Andrea Arcangeli +Cc: Hillf Danton +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +[jwang: adjust context for 4.4] +Signed-off-by: Jack Wang +Signed-off-by: Greg Kroah-Hartman +--- + mm/huge_memory.c | 34 +++++++++++++++++++++++++++++++++- + 1 file changed, 33 insertions(+), 1 deletion(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -1588,7 +1588,39 @@ int change_huge_pmd(struct vm_area_struc + if (prot_numa && pmd_protnone(*pmd)) + goto unlock; + +- entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd); ++ /* ++ * In case prot_numa, we are under down_read(mmap_sem). It's critical ++ * to not clear pmd intermittently to avoid race with MADV_DONTNEED ++ * which is also under down_read(mmap_sem): ++ * ++ * CPU0: CPU1: ++ * change_huge_pmd(prot_numa=1) ++ * pmdp_huge_get_and_clear_notify() ++ * madvise_dontneed() ++ * zap_pmd_range() ++ * pmd_trans_huge(*pmd) == 0 (without ptl) ++ * // skip the pmd ++ * set_pmd_at(); ++ * // pmd is re-established ++ * ++ * The race makes MADV_DONTNEED miss the huge pmd and don't clear it ++ * which may break userspace. ++ * ++ * pmdp_invalidate() is required to make sure we don't miss ++ * dirty/young flags set by hardware. ++ */ ++ entry = *pmd; ++ pmdp_invalidate(vma, addr, pmd); ++ ++ /* ++ * Recover dirty/young flags. It relies on pmdp_invalidate to not ++ * corrupt them. ++ */ ++ if (pmd_dirty(*pmd)) ++ entry = pmd_mkdirty(entry); ++ if (pmd_young(*pmd)) ++ entry = pmd_mkyoung(entry); ++ + entry = pmd_modify(entry, newprot); + if (preserve_write) + entry = pmd_mkwrite(entry); diff --git a/queue-4.4/thp-reduce-indentation-level-in-change_huge_pmd.patch b/queue-4.4/thp-reduce-indentation-level-in-change_huge_pmd.patch new file mode 100644 index 00000000000..5ceb52458c0 --- /dev/null +++ b/queue-4.4/thp-reduce-indentation-level-in-change_huge_pmd.patch @@ -0,0 +1,107 @@ +From 0a85e51d37645e9ce57e5e1a30859e07810ed07c Mon Sep 17 00:00:00 2001 +From: "Kirill A. Shutemov" +Date: Thu, 13 Apr 2017 14:56:17 -0700 +Subject: thp: reduce indentation level in change_huge_pmd() + +From: Kirill A. Shutemov + +commit 0a85e51d37645e9ce57e5e1a30859e07810ed07c upstream. + +Patch series "thp: fix few MADV_DONTNEED races" + +For MADV_DONTNEED to work properly with huge pages, it's critical to not +clear pmd intermittently unless you hold down_write(mmap_sem). + +Otherwise MADV_DONTNEED can miss the THP which can lead to userspace +breakage. + +See example of such race in commit message of patch 2/4. + +All these races are found by code inspection. I haven't seen them +triggered. I don't think it's worth to apply them to stable@. + +This patch (of 4): + +Restructure code in preparation for a fix. + +Link: http://lkml.kernel.org/r/20170302151034.27829-2-kirill.shutemov@linux.intel.com +Signed-off-by: Kirill A. Shutemov +Acked-by: Vlastimil Babka +Cc: Andrea Arcangeli +Cc: Hillf Danton +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +[jwang: adjust context for 4.4 kernel] +Signed-off-by: Jack Wang +Signed-off-by: Greg Kroah-Hartman +--- + mm/huge_memory.c | 54 ++++++++++++++++++++++++++++-------------------------- + 1 file changed, 28 insertions(+), 26 deletions(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -1566,35 +1566,37 @@ int change_huge_pmd(struct vm_area_struc + { + struct mm_struct *mm = vma->vm_mm; + spinlock_t *ptl; ++ pmd_t entry; ++ bool preserve_write; ++ + int ret = 0; + +- if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { +- pmd_t entry; +- bool preserve_write = prot_numa && pmd_write(*pmd); +- ret = 1; +- +- /* +- * Avoid trapping faults against the zero page. The read-only +- * data is likely to be read-cached on the local CPU and +- * local/remote hits to the zero page are not interesting. +- */ +- if (prot_numa && is_huge_zero_pmd(*pmd)) { +- spin_unlock(ptl); +- return ret; +- } +- +- if (!prot_numa || !pmd_protnone(*pmd)) { +- entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd); +- entry = pmd_modify(entry, newprot); +- if (preserve_write) +- entry = pmd_mkwrite(entry); +- ret = HPAGE_PMD_NR; +- set_pmd_at(mm, addr, pmd, entry); +- BUG_ON(!preserve_write && pmd_write(entry)); +- } +- spin_unlock(ptl); +- } ++ if (__pmd_trans_huge_lock(pmd, vma, &ptl) != 1) ++ return 0; ++ ++ preserve_write = prot_numa && pmd_write(*pmd); ++ ret = 1; ++ ++ /* ++ * Avoid trapping faults against the zero page. The read-only ++ * data is likely to be read-cached on the local CPU and ++ * local/remote hits to the zero page are not interesting. ++ */ ++ if (prot_numa && is_huge_zero_pmd(*pmd)) ++ goto unlock; ++ ++ if (prot_numa && pmd_protnone(*pmd)) ++ goto unlock; + ++ entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd); ++ entry = pmd_modify(entry, newprot); ++ if (preserve_write) ++ entry = pmd_mkwrite(entry); ++ ret = HPAGE_PMD_NR; ++ set_pmd_at(mm, addr, pmd, entry); ++ BUG_ON(!preserve_write && pmd_write(entry)); ++unlock: ++ spin_unlock(ptl); + return ret; + } +