From: Greg Kroah-Hartman Date: Mon, 8 Feb 2021 11:37:12 +0000 (+0100) Subject: 5.10-stable patches X-Git-Tag: v4.4.257~20 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=b2a80fbd6827f65af749378084382f88485237ec;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: arm-9043-1-tegra-fix-misplaced-tegra_uart_config-in-decompressor.patch arm-dts-gta04-spi-panel-chip-select-is-active-low.patch arm-footbridge-fix-dc21285-pci-configuration-accessors.patch dts-arm-gta04-remove-legacy-spi-cs-high-to-make-display-work-again.patch kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch kvm-x86-allow-guests-to-see-msr_ia32_tsx_ctrl-even-if-tsx-off.patch kvm-x86-fix-cpuid-entries-returned-by-kvm_get_cpuid2-ioctl.patch kvm-x86-mmu-fix-tdp-mmu-zap-collapsible-sptes.patch kvm-x86-set-so-called-reserved-cr3-bits-in-lm-mask-at-vcpu-reset.patch kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch mm-compaction-move-high_pfn-to-the-for-loop-scope.patch mm-filemap-add-missing-mem_cgroup_uncharge-to-__add_to_page_cache_locked.patch mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch mm-vmalloc-separate-put-pages-and-flush-vm-flags.patch --- diff --git a/queue-5.10/arm-9043-1-tegra-fix-misplaced-tegra_uart_config-in-decompressor.patch b/queue-5.10/arm-9043-1-tegra-fix-misplaced-tegra_uart_config-in-decompressor.patch new file mode 100644 index 00000000000..ad38cd99535 --- /dev/null +++ b/queue-5.10/arm-9043-1-tegra-fix-misplaced-tegra_uart_config-in-decompressor.patch @@ -0,0 +1,93 @@ +From 538eea5362a1179dfa7770dd2b6607dc30cc50c6 Mon Sep 17 00:00:00 2001 +From: Dmitry Osipenko +Date: Tue, 15 Dec 2020 16:16:44 +0100 +Subject: ARM: 9043/1: tegra: Fix misplaced tegra_uart_config in decompressor + +From: Dmitry Osipenko + +commit 538eea5362a1179dfa7770dd2b6607dc30cc50c6 upstream. + +The tegra_uart_config of the DEBUG_LL code is now placed right at the +start of the .text section after commit which enabled debug output in the +decompressor. Tegra devices are not booting anymore if DEBUG_LL is enabled +since tegra_uart_config data is executes as a code. Fix the misplaced +tegra_uart_config storage by embedding it into the code. + +Cc: stable@vger.kernel.org +Fixes: 2596a72d3384 ("ARM: 9009/1: uncompress: Enable debug in head.S") +Reviewed-by: Linus Walleij +Signed-off-by: Dmitry Osipenko +Signed-off-by: Russell King +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/include/debug/tegra.S | 54 ++++++++++++++++++++--------------------- + 1 file changed, 27 insertions(+), 27 deletions(-) + +--- a/arch/arm/include/debug/tegra.S ++++ b/arch/arm/include/debug/tegra.S +@@ -149,7 +149,34 @@ + + .align + 99: .word . ++#if defined(ZIMAGE) ++ .word . + 4 ++/* ++ * Storage for the state maintained by the macro. ++ * ++ * In the kernel proper, this data is located in arch/arm/mach-tegra/tegra.c. ++ * That's because this header is included from multiple files, and we only ++ * want a single copy of the data. In particular, the UART probing code above ++ * assumes it's running using physical addresses. This is true when this file ++ * is included from head.o, but not when included from debug.o. So we need ++ * to share the probe results between the two copies, rather than having ++ * to re-run the probing again later. ++ * ++ * In the decompressor, we put the storage right here, since common.c ++ * isn't included in the decompressor build. This storage data gets put in ++ * .text even though it's really data, since .data is discarded from the ++ * decompressor. Luckily, .text is writeable in the decompressor, unless ++ * CONFIG_ZBOOT_ROM. That dependency is handled in arch/arm/Kconfig.debug. ++ */ ++ /* Debug UART initialization required */ ++ .word 1 ++ /* Debug UART physical address */ ++ .word 0 ++ /* Debug UART virtual address */ ++ .word 0 ++#else + .word tegra_uart_config ++#endif + .ltorg + + /* Load previously selected UART address */ +@@ -189,30 +216,3 @@ + + .macro waituarttxrdy,rd,rx + .endm +- +-/* +- * Storage for the state maintained by the macros above. +- * +- * In the kernel proper, this data is located in arch/arm/mach-tegra/tegra.c. +- * That's because this header is included from multiple files, and we only +- * want a single copy of the data. In particular, the UART probing code above +- * assumes it's running using physical addresses. This is true when this file +- * is included from head.o, but not when included from debug.o. So we need +- * to share the probe results between the two copies, rather than having +- * to re-run the probing again later. +- * +- * In the decompressor, we put the symbol/storage right here, since common.c +- * isn't included in the decompressor build. This symbol gets put in .text +- * even though it's really data, since .data is discarded from the +- * decompressor. Luckily, .text is writeable in the decompressor, unless +- * CONFIG_ZBOOT_ROM. That dependency is handled in arch/arm/Kconfig.debug. +- */ +-#if defined(ZIMAGE) +-tegra_uart_config: +- /* Debug UART initialization required */ +- .word 1 +- /* Debug UART physical address */ +- .word 0 +- /* Debug UART virtual address */ +- .word 0 +-#endif diff --git a/queue-5.10/arm-dts-gta04-spi-panel-chip-select-is-active-low.patch b/queue-5.10/arm-dts-gta04-spi-panel-chip-select-is-active-low.patch new file mode 100644 index 00000000000..c03c33b461b --- /dev/null +++ b/queue-5.10/arm-dts-gta04-spi-panel-chip-select-is-active-low.patch @@ -0,0 +1,47 @@ +From 181739822cf6f8f4e12b173913af2967a28906c0 Mon Sep 17 00:00:00 2001 +From: "H. Nikolaus Schaller" +Date: Wed, 23 Dec 2020 11:30:21 +0100 +Subject: ARM: dts; gta04: SPI panel chip select is active low + +From: H. Nikolaus Schaller + +commit 181739822cf6f8f4e12b173913af2967a28906c0 upstream. + +With the arrival of + +commit 2fee9583198eb9 ("spi: dt-bindings: clarify CS behavior for spi-cs-high and gpio descriptors") + +it was clarified what the proper state for cs-gpios should be, even if the +flag is ignored. The driver code is doing the right thing since + +766c6b63aa04 ("spi: fix client driver breakages when using GPIO descriptors") + +The chip-select of the td028ttec1 panel is active-low, so we must omit spi-cs-high; +attribute (already removed by separate patch) and should now use GPIO_ACTIVE_LOW for +the client device description to be fully consistent. + +Fixes: 766c6b63aa04 ("spi: fix client driver breakages when using GPIO descriptors") +CC: stable@vger.kernel.org +Signed-off-by: H. Nikolaus Schaller +Signed-off-by: Tony Lindgren +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/boot/dts/omap3-gta04.dtsi | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi +index 003202d12990..7b8c18e6605e 100644 +--- a/arch/arm/boot/dts/omap3-gta04.dtsi ++++ b/arch/arm/boot/dts/omap3-gta04.dtsi +@@ -114,7 +114,7 @@ spi_lcd: spi_lcd { + gpio-sck = <&gpio1 12 GPIO_ACTIVE_HIGH>; + gpio-miso = <&gpio1 18 GPIO_ACTIVE_HIGH>; + gpio-mosi = <&gpio1 20 GPIO_ACTIVE_HIGH>; +- cs-gpios = <&gpio1 19 GPIO_ACTIVE_HIGH>; ++ cs-gpios = <&gpio1 19 GPIO_ACTIVE_LOW>; + num-chipselects = <1>; + + /* lcd panel */ +-- +2.30.0 + diff --git a/queue-5.10/arm-footbridge-fix-dc21285-pci-configuration-accessors.patch b/queue-5.10/arm-footbridge-fix-dc21285-pci-configuration-accessors.patch new file mode 100644 index 00000000000..5570b657690 --- /dev/null +++ b/queue-5.10/arm-footbridge-fix-dc21285-pci-configuration-accessors.patch @@ -0,0 +1,62 @@ +From 39d3454c3513840eb123b3913fda6903e45ce671 Mon Sep 17 00:00:00 2001 +From: Russell King +Date: Sun, 18 Oct 2020 09:39:21 +0100 +Subject: ARM: footbridge: fix dc21285 PCI configuration accessors + +From: Russell King + +commit 39d3454c3513840eb123b3913fda6903e45ce671 upstream. + +Building with gcc 4.9.2 reveals a latent bug in the PCI accessors +for Footbridge platforms, which causes a fatal alignment fault +while accessing IO memory. Fix this by making the assembly volatile. + +Cc: stable@vger.kernel.org +Signed-off-by: Russell King +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/mach-footbridge/dc21285.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/arch/arm/mach-footbridge/dc21285.c ++++ b/arch/arm/mach-footbridge/dc21285.c +@@ -65,15 +65,15 @@ dc21285_read_config(struct pci_bus *bus, + if (addr) + switch (size) { + case 1: +- asm("ldrb %0, [%1, %2]" ++ asm volatile("ldrb %0, [%1, %2]" + : "=r" (v) : "r" (addr), "r" (where) : "cc"); + break; + case 2: +- asm("ldrh %0, [%1, %2]" ++ asm volatile("ldrh %0, [%1, %2]" + : "=r" (v) : "r" (addr), "r" (where) : "cc"); + break; + case 4: +- asm("ldr %0, [%1, %2]" ++ asm volatile("ldr %0, [%1, %2]" + : "=r" (v) : "r" (addr), "r" (where) : "cc"); + break; + } +@@ -99,17 +99,17 @@ dc21285_write_config(struct pci_bus *bus + if (addr) + switch (size) { + case 1: +- asm("strb %0, [%1, %2]" ++ asm volatile("strb %0, [%1, %2]" + : : "r" (value), "r" (addr), "r" (where) + : "cc"); + break; + case 2: +- asm("strh %0, [%1, %2]" ++ asm volatile("strh %0, [%1, %2]" + : : "r" (value), "r" (addr), "r" (where) + : "cc"); + break; + case 4: +- asm("str %0, [%1, %2]" ++ asm volatile("str %0, [%1, %2]" + : : "r" (value), "r" (addr), "r" (where) + : "cc"); + break; diff --git a/queue-5.10/dts-arm-gta04-remove-legacy-spi-cs-high-to-make-display-work-again.patch b/queue-5.10/dts-arm-gta04-remove-legacy-spi-cs-high-to-make-display-work-again.patch new file mode 100644 index 00000000000..5e8f3d458c6 --- /dev/null +++ b/queue-5.10/dts-arm-gta04-remove-legacy-spi-cs-high-to-make-display-work-again.patch @@ -0,0 +1,53 @@ +From 07af7810e0a5bc4e51682c90f9fa19fc4cb93f18 Mon Sep 17 00:00:00 2001 +From: "H. Nikolaus Schaller" +Date: Sat, 12 Dec 2020 10:55:25 +0100 +Subject: DTS: ARM: gta04: remove legacy spi-cs-high to make display work again + +From: H. Nikolaus Schaller + +commit 07af7810e0a5bc4e51682c90f9fa19fc4cb93f18 upstream. + +This reverts + +commit f1f028ff89cb ("DTS: ARM: gta04: introduce legacy spi-cs-high to make display work again") + +which had to be intruduced after + +commit 6953c57ab172 ("gpio: of: Handle SPI chipselect legacy bindings") + +broke the GTA04 display. This contradicted the data sheet but was the only +way to get it as an spi client operational again. + +The panel data sheet defines the chip-select to be active low. + +Now, with the arrival of + +commit 766c6b63aa04 ("spi: fix client driver breakages when using GPIO descriptors") + +the logic of interaction between spi-cs-high and the gpio descriptor flags +has been changed a second time, making the display broken again. So we have +to remove the original fix which in retrospect was a workaround of a bug in +the spi subsystem and not a feature of the panel or bug in the device tree. + +With this fix the device tree is back in sync with the data sheet and +spi subsystem code. + +Fixes: 766c6b63aa04 ("spi: fix client driver breakages when using GPIO descriptors") +CC: stable@vger.kernel.org +Signed-off-by: H. Nikolaus Schaller +Signed-off-by: Tony Lindgren +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/boot/dts/omap3-gta04.dtsi | 1 - + 1 file changed, 1 deletion(-) + +--- a/arch/arm/boot/dts/omap3-gta04.dtsi ++++ b/arch/arm/boot/dts/omap3-gta04.dtsi +@@ -124,7 +124,6 @@ + spi-max-frequency = <100000>; + spi-cpol; + spi-cpha; +- spi-cs-high; + + backlight= <&backlight>; + label = "lcd"; diff --git a/queue-5.10/kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch b/queue-5.10/kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch new file mode 100644 index 00000000000..3dea4b056c3 --- /dev/null +++ b/queue-5.10/kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch @@ -0,0 +1,59 @@ +From ccd85d90ce092bdb047a7f6580f3955393833b22 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Tue, 2 Feb 2021 13:20:17 -0800 +Subject: KVM: SVM: Treat SVM as unsupported when running as an SEV guest + +From: Sean Christopherson + +commit ccd85d90ce092bdb047a7f6580f3955393833b22 upstream. + +Don't let KVM load when running as an SEV guest, regardless of what +CPUID says. Memory is encrypted with a key that is not accessible to +the host (L0), thus it's impossible for L0 to emulate SVM, e.g. it'll +see garbage when reading the VMCB. + +Technically, KVM could decrypt all memory that needs to be accessible to +the L0 and use shadow paging so that L0 does not need to shadow NPT, but +exposing such information to L0 largely defeats the purpose of running as +an SEV guest. This can always be revisited if someone comes up with a +use case for running VMs inside SEV guests. + +Note, VMLOAD, VMRUN, etc... will also #GP on GPAs with C-bit set, i.e. KVM +is doomed even if the SEV guest is debuggable and the hypervisor is willing +to decrypt the VMCB. This may or may not be fixed on CPUs that have the +SVME_ADDR_CHK fix. + +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20210202212017.2486595-1-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/svm.c | 5 +++++ + arch/x86/mm/mem_encrypt.c | 1 + + 2 files changed, 6 insertions(+) + +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -438,6 +438,11 @@ static int has_svm(void) + return 0; + } + ++ if (sev_active()) { ++ pr_info("KVM is unsupported when running as an SEV guest\n"); ++ return 0; ++ } ++ + return 1; + } + +--- a/arch/x86/mm/mem_encrypt.c ++++ b/arch/x86/mm/mem_encrypt.c +@@ -351,6 +351,7 @@ bool sev_active(void) + { + return sev_status & MSR_AMD64_SEV_ENABLED; + } ++EXPORT_SYMBOL_GPL(sev_active); + + /* Needs to be called from non-instrumentable code */ + bool noinstr sev_es_active(void) diff --git a/queue-5.10/kvm-x86-allow-guests-to-see-msr_ia32_tsx_ctrl-even-if-tsx-off.patch b/queue-5.10/kvm-x86-allow-guests-to-see-msr_ia32_tsx_ctrl-even-if-tsx-off.patch new file mode 100644 index 00000000000..1b86f5dead4 --- /dev/null +++ b/queue-5.10/kvm-x86-allow-guests-to-see-msr_ia32_tsx_ctrl-even-if-tsx-off.patch @@ -0,0 +1,98 @@ +From 7131636e7ea5b50ca910f8953f6365ef2d1f741c Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 28 Jan 2021 11:45:00 -0500 +Subject: KVM: x86: Allow guests to see MSR_IA32_TSX_CTRL even if tsx=off + +From: Paolo Bonzini + +commit 7131636e7ea5b50ca910f8953f6365ef2d1f741c upstream. + +Userspace that does not know about KVM_GET_MSR_FEATURE_INDEX_LIST +will generally use the default value for MSR_IA32_ARCH_CAPABILITIES. +When this happens and the host has tsx=on, it is possible to end up with +virtual machines that have HLE and RTM disabled, but TSX_CTRL available. + +If the fleet is then switched to tsx=off, kvm_get_arch_capabilities() +will clear the ARCH_CAP_TSX_CTRL_MSR bit and it will not be possible to +use the tsx=off hosts as migration destinations, even though the guests +do not have TSX enabled. + +To allow this migration, allow guests to write to their TSX_CTRL MSR, +while keeping the host MSR unchanged for the entire life of the guests. +This ensures that TSX remains disabled and also saves MSR reads and +writes, and it's okay to do because with tsx=off we know that guests will +not have the HLE and RTM features in their CPUID. (If userspace sets +bogus CPUID data, we do not expect HLE and RTM to work in guests anyway). + +Cc: stable@vger.kernel.org +Fixes: cbbaa2727aa3 ("KVM: x86: fix presentation of TSX feature in ARCH_CAPABILITIES") +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/vmx.c | 17 +++++++++++++---- + arch/x86/kvm/x86.c | 26 +++++++++++++++++--------- + 2 files changed, 30 insertions(+), 13 deletions(-) + +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -6874,11 +6874,20 @@ static int vmx_create_vcpu(struct kvm_vc + switch (index) { + case MSR_IA32_TSX_CTRL: + /* +- * No need to pass TSX_CTRL_CPUID_CLEAR through, so +- * let's avoid changing CPUID bits under the host +- * kernel's feet. ++ * TSX_CTRL_CPUID_CLEAR is handled in the CPUID ++ * interception. Keep the host value unchanged to avoid ++ * changing CPUID bits under the host kernel's feet. ++ * ++ * hle=0, rtm=0, tsx_ctrl=1 can be found with some ++ * combinations of new kernel and old userspace. If ++ * those guests run on a tsx=off host, do allow guests ++ * to use TSX_CTRL, but do not change the value on the ++ * host so that TSX remains always disabled. + */ +- vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; ++ if (boot_cpu_has(X86_FEATURE_RTM)) ++ vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; ++ else ++ vmx->guest_uret_msrs[j].mask = 0; + break; + default: + vmx->guest_uret_msrs[j].mask = -1ull; +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -1376,16 +1376,24 @@ static u64 kvm_get_arch_capabilities(voi + if (!boot_cpu_has_bug(X86_BUG_MDS)) + data |= ARCH_CAP_MDS_NO; + +- /* +- * On TAA affected systems: +- * - nothing to do if TSX is disabled on the host. +- * - we emulate TSX_CTRL if present on the host. +- * This lets the guest use VERW to clear CPU buffers. +- */ +- if (!boot_cpu_has(X86_FEATURE_RTM)) +- data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR); +- else if (!boot_cpu_has_bug(X86_BUG_TAA)) ++ if (!boot_cpu_has(X86_FEATURE_RTM)) { ++ /* ++ * If RTM=0 because the kernel has disabled TSX, the host might ++ * have TAA_NO or TSX_CTRL. Clear TAA_NO (the guest sees RTM=0 ++ * and therefore knows that there cannot be TAA) but keep ++ * TSX_CTRL: some buggy userspaces leave it set on tsx=on hosts, ++ * and we want to allow migrating those guests to tsx=off hosts. ++ */ ++ data &= ~ARCH_CAP_TAA_NO; ++ } else if (!boot_cpu_has_bug(X86_BUG_TAA)) { + data |= ARCH_CAP_TAA_NO; ++ } else { ++ /* ++ * Nothing to do here; we emulate TSX_CTRL if present on the ++ * host so the guest can choose between disabling TSX or ++ * using VERW to clear CPU buffers. ++ */ ++ } + + return data; + } diff --git a/queue-5.10/kvm-x86-fix-cpuid-entries-returned-by-kvm_get_cpuid2-ioctl.patch b/queue-5.10/kvm-x86-fix-cpuid-entries-returned-by-kvm_get_cpuid2-ioctl.patch new file mode 100644 index 00000000000..e453131e10d --- /dev/null +++ b/queue-5.10/kvm-x86-fix-cpuid-entries-returned-by-kvm_get_cpuid2-ioctl.patch @@ -0,0 +1,41 @@ +From 181f494888d5b178ffda41bed965f187d5e5c432 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Wed, 27 Jan 2021 20:44:51 -0600 +Subject: KVM: x86: fix CPUID entries returned by KVM_GET_CPUID2 ioctl + +From: Michael Roth + +commit 181f494888d5b178ffda41bed965f187d5e5c432 upstream. + +Recent commit 255cbecfe0 modified struct kvm_vcpu_arch to make +'cpuid_entries' a pointer to an array of kvm_cpuid_entry2 entries +rather than embedding the array in the struct. KVM_SET_CPUID and +KVM_SET_CPUID2 were updated accordingly, but KVM_GET_CPUID2 was missed. + +As a result, KVM_GET_CPUID2 currently returns random fields from struct +kvm_vcpu_arch to userspace rather than the expected CPUID values. Fix +this by treating 'cpuid_entries' as a pointer when copying its +contents to userspace buffer. + +Fixes: 255cbecfe0c9 ("KVM: x86: allocate vcpu->arch.cpuid_entries dynamically") +Cc: Vitaly Kuznetsov +Signed-off-by: Michael Roth +Message-Id: <20210128024451.1816770-1-michael.roth@amd.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/cpuid.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -320,7 +320,7 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm + if (cpuid->nent < vcpu->arch.cpuid_nent) + goto out; + r = -EFAULT; +- if (copy_to_user(entries, &vcpu->arch.cpuid_entries, ++ if (copy_to_user(entries, vcpu->arch.cpuid_entries, + vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) + goto out; + return 0; diff --git a/queue-5.10/kvm-x86-mmu-fix-tdp-mmu-zap-collapsible-sptes.patch b/queue-5.10/kvm-x86-mmu-fix-tdp-mmu-zap-collapsible-sptes.patch new file mode 100644 index 00000000000..5e1a4a4f3c2 --- /dev/null +++ b/queue-5.10/kvm-x86-mmu-fix-tdp-mmu-zap-collapsible-sptes.patch @@ -0,0 +1,45 @@ +From 87aa9ec939ec7277b730786e19c161c9194cc8ca Mon Sep 17 00:00:00 2001 +From: Ben Gardon +Date: Tue, 2 Feb 2021 10:57:16 -0800 +Subject: KVM: x86/mmu: Fix TDP MMU zap collapsible SPTEs + +From: Ben Gardon + +commit 87aa9ec939ec7277b730786e19c161c9194cc8ca upstream. + +There is a bug in the TDP MMU function to zap SPTEs which could be +replaced with a larger mapping which prevents the function from doing +anything. Fix this by correctly zapping the last level SPTEs. + +Cc: stable@vger.kernel.org +Fixes: 14881998566d ("kvm: x86/mmu: Support disabling dirty logging for the tdp MMU") +Signed-off-by: Ben Gardon +Message-Id: <20210202185734.1680553-11-bgardon@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/mmu/tdp_mmu.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/x86/kvm/mmu/tdp_mmu.c ++++ b/arch/x86/kvm/mmu/tdp_mmu.c +@@ -1037,8 +1037,8 @@ bool kvm_tdp_mmu_slot_set_dirty(struct k + } + + /* +- * Clear non-leaf entries (and free associated page tables) which could +- * be replaced by large mappings, for GFNs within the slot. ++ * Clear leaf entries which could be replaced by large mappings, for ++ * GFNs within the slot. + */ + static void zap_collapsible_spte_range(struct kvm *kvm, + struct kvm_mmu_page *root, +@@ -1050,7 +1050,7 @@ static void zap_collapsible_spte_range(s + + tdp_root_for_each_pte(iter, root, start, end) { + if (!is_shadow_present_pte(iter.old_spte) || +- is_last_spte(iter.old_spte, iter.level)) ++ !is_last_spte(iter.old_spte, iter.level)) + continue; + + pfn = spte_to_pfn(iter.old_spte); diff --git a/queue-5.10/kvm-x86-set-so-called-reserved-cr3-bits-in-lm-mask-at-vcpu-reset.patch b/queue-5.10/kvm-x86-set-so-called-reserved-cr3-bits-in-lm-mask-at-vcpu-reset.patch new file mode 100644 index 00000000000..b0e8a102038 --- /dev/null +++ b/queue-5.10/kvm-x86-set-so-called-reserved-cr3-bits-in-lm-mask-at-vcpu-reset.patch @@ -0,0 +1,33 @@ +From 031b91a5fe6f1ce61b7617614ddde9ed61e252be Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 3 Feb 2021 16:01:06 -0800 +Subject: KVM: x86: Set so called 'reserved CR3 bits in LM mask' at vCPU reset + +From: Sean Christopherson + +commit 031b91a5fe6f1ce61b7617614ddde9ed61e252be upstream. + +Set cr3_lm_rsvd_bits, which is effectively an invalid GPA mask, at vCPU +reset. The reserved bits check needs to be done even if userspace never +configures the guest's CPUID model. + +Cc: stable@vger.kernel.org +Fixes: 0107973a80ad ("KVM: x86: Introduce cr3_lm_rsvd_bits in kvm_vcpu_arch") +Signed-off-by: Sean Christopherson +Message-Id: <20210204000117.3303214-2-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/x86.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -9915,6 +9915,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu + fx_init(vcpu); + + vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); ++ vcpu->arch.cr3_lm_rsvd_bits = rsvd_bits(cpuid_maxphyaddr(vcpu), 63); + + vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; + diff --git a/queue-5.10/kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch b/queue-5.10/kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch new file mode 100644 index 00000000000..d2435261edd --- /dev/null +++ b/queue-5.10/kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch @@ -0,0 +1,45 @@ +From 943dea8af21bd896e0d6c30ea221203fb3cd3265 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Tue, 2 Feb 2021 08:55:46 -0800 +Subject: KVM: x86: Update emulator context mode if SYSENTER xfers to 64-bit mode + +From: Sean Christopherson + +commit 943dea8af21bd896e0d6c30ea221203fb3cd3265 upstream. + +Set the emulator context to PROT64 if SYSENTER transitions from 32-bit +userspace (compat mode) to a 64-bit kernel, otherwise the RIP update at +the end of x86_emulate_insn() will incorrectly truncate the new RIP. + +Note, this bug is mostly limited to running an Intel virtual CPU model on +an AMD physical CPU, as other combinations of virtual and physical CPUs +do not trigger full emulation. On Intel CPUs, SYSENTER in compatibility +mode is legal, and unconditionally transitions to 64-bit mode. On AMD +CPUs, SYSENTER is illegal in compatibility mode and #UDs. If the vCPU is +AMD, KVM injects a #UD on SYSENTER in compat mode. If the pCPU is Intel, +SYSENTER will execute natively and not trigger #UD->VM-Exit (ignoring +guest TLB shenanigans). + +Fixes: fede8076aab4 ("KVM: x86: handle wrap around 32-bit address space") +Cc: stable@vger.kernel.org +Signed-off-by: Jonny Barker +[sean: wrote changelog] +Signed-off-by: Sean Christopherson +Message-Id: <20210202165546.2390296-1-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/emulate.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -2879,6 +2879,8 @@ static int em_sysenter(struct x86_emulat + ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); + *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data : + (u32)msr_data; ++ if (efer & EFER_LMA) ++ ctxt->mode = X86EMUL_MODE_PROT64; + + return X86EMUL_CONTINUE; + } diff --git a/queue-5.10/mm-compaction-move-high_pfn-to-the-for-loop-scope.patch b/queue-5.10/mm-compaction-move-high_pfn-to-the-for-loop-scope.patch new file mode 100644 index 00000000000..4644558c9b1 --- /dev/null +++ b/queue-5.10/mm-compaction-move-high_pfn-to-the-for-loop-scope.patch @@ -0,0 +1,85 @@ +From 74e21484e40bb8ce0f9828bbfe1c9fc9b04249c6 Mon Sep 17 00:00:00 2001 +From: Rokudo Yan +Date: Thu, 4 Feb 2021 18:32:20 -0800 +Subject: mm, compaction: move high_pfn to the for loop scope + +From: Rokudo Yan + +commit 74e21484e40bb8ce0f9828bbfe1c9fc9b04249c6 upstream. + +In fast_isolate_freepages, high_pfn will be used if a prefered one (ie +PFN >= low_fn) not found. + +But the high_pfn is not reset before searching an free area, so when it +was used as freepage, it may from another free area searched before. As +a result move_freelist_head(freelist, freepage) will have unexpected +behavior (eg corrupt the MOVABLE freelist) + + Unable to handle kernel paging request at virtual address dead000000000200 + Mem abort info: + ESR = 0x96000044 + Exception class = DABT (current EL), IL = 32 bits + SET = 0, FnV = 0 + EA = 0, S1PTW = 0 + Data abort info: + ISV = 0, ISS = 0x00000044 + CM = 0, WnR = 1 + [dead000000000200] address between user and kernel address ranges + + -000|list_cut_before(inline) + -000|move_freelist_head(inline) + -000|fast_isolate_freepages(inline) + -000|isolate_freepages(inline) + -000|compaction_alloc(?, ?) + -001|unmap_and_move(inline) + -001|migrate_pages([NSD:0xFFFFFF80088CBBD0] from = 0xFFFFFF80088CBD88, [NSD:0xFFFFFF80088CBBC8] get_new_p + -002|__read_once_size(inline) + -002|static_key_count(inline) + -002|static_key_false(inline) + -002|trace_mm_compaction_migratepages(inline) + -002|compact_zone(?, [NSD:0xFFFFFF80088CBCB0] capc = 0x0) + -003|kcompactd_do_work(inline) + -003|kcompactd([X19] p = 0xFFFFFF93227FBC40) + -004|kthread([X20] _create = 0xFFFFFFE1AFB26380) + -005|ret_from_fork(asm) + +The issue was reported on an smart phone product with 6GB ram and 3GB +zram as swap device. + +This patch fixes the issue by reset high_pfn before searching each free +area, which ensure freepage and freelist match when call +move_freelist_head in fast_isolate_freepages(). + +Link: http://lkml.kernel.org/r/20190118175136.31341-12-mgorman@techsingularity.net +Link: https://lkml.kernel.org/r/20210112094720.1238444-1-wu-yan@tcl.com +Fixes: 5a811889de10f1eb ("mm, compaction: use free lists to quickly locate a migration target") +Signed-off-by: Rokudo Yan +Acked-by: Mel Gorman +Acked-by: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/compaction.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/mm/compaction.c ++++ b/mm/compaction.c +@@ -1302,7 +1302,7 @@ fast_isolate_freepages(struct compact_co + { + unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1); + unsigned int nr_scanned = 0; +- unsigned long low_pfn, min_pfn, high_pfn = 0, highest = 0; ++ unsigned long low_pfn, min_pfn, highest = 0; + unsigned long nr_isolated = 0; + unsigned long distance; + struct page *page = NULL; +@@ -1347,6 +1347,7 @@ fast_isolate_freepages(struct compact_co + struct page *freepage; + unsigned long flags; + unsigned int order_scanned = 0; ++ unsigned long high_pfn = 0; + + if (!area->nr_free) + continue; diff --git a/queue-5.10/mm-filemap-add-missing-mem_cgroup_uncharge-to-__add_to_page_cache_locked.patch b/queue-5.10/mm-filemap-add-missing-mem_cgroup_uncharge-to-__add_to_page_cache_locked.patch new file mode 100644 index 00000000000..e63ffaf39ab --- /dev/null +++ b/queue-5.10/mm-filemap-add-missing-mem_cgroup_uncharge-to-__add_to_page_cache_locked.patch @@ -0,0 +1,93 @@ +From da74240eb3fcd806edb1643874363e954d9e948b Mon Sep 17 00:00:00 2001 +From: Waiman Long +Date: Thu, 4 Feb 2021 18:32:45 -0800 +Subject: mm/filemap: add missing mem_cgroup_uncharge() to __add_to_page_cache_locked() + +From: Waiman Long + +commit da74240eb3fcd806edb1643874363e954d9e948b upstream. + +Commit 3fea5a499d57 ("mm: memcontrol: convert page cache to a new +mem_cgroup_charge() API") introduced a bug in __add_to_page_cache_locked() +causing the following splat: + + page dumped because: VM_BUG_ON_PAGE(page_memcg(page)) + pages's memcg:ffff8889a4116000 + ------------[ cut here ]------------ + kernel BUG at mm/memcontrol.c:2924! + invalid opcode: 0000 [#1] SMP KASAN PTI + CPU: 35 PID: 12345 Comm: cat Tainted: G S W I 5.11.0-rc4-debug+ #1 + Hardware name: HP HP Z8 G4 Workstation/81C7, BIOS P60 v01.25 12/06/2017 + RIP: commit_charge+0xf4/0x130 + Call Trace: + mem_cgroup_charge+0x175/0x770 + __add_to_page_cache_locked+0x712/0xad0 + add_to_page_cache_lru+0xc5/0x1f0 + cachefiles_read_or_alloc_pages+0x895/0x2e10 [cachefiles] + __fscache_read_or_alloc_pages+0x6c0/0xa00 [fscache] + __nfs_readpages_from_fscache+0x16d/0x630 [nfs] + nfs_readpages+0x24e/0x540 [nfs] + read_pages+0x5b1/0xc40 + page_cache_ra_unbounded+0x460/0x750 + generic_file_buffered_read_get_pages+0x290/0x1710 + generic_file_buffered_read+0x2a9/0xc30 + nfs_file_read+0x13f/0x230 [nfs] + new_sync_read+0x3af/0x610 + vfs_read+0x339/0x4b0 + ksys_read+0xf1/0x1c0 + do_syscall_64+0x33/0x40 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Before that commit, there was a try_charge() and commit_charge() in +__add_to_page_cache_locked(). These two separated charge functions were +replaced by a single mem_cgroup_charge(). However, it forgot to add a +matching mem_cgroup_uncharge() when the xarray insertion failed with the +page released back to the pool. + +Fix this by adding a mem_cgroup_uncharge() call when insertion error +happens. + +Link: https://lkml.kernel.org/r/20210125042441.20030-1-longman@redhat.com +Fixes: 3fea5a499d57 ("mm: memcontrol: convert page cache to a new mem_cgroup_charge() API") +Signed-off-by: Waiman Long +Reviewed-by: Alex Shi +Acked-by: Johannes Weiner +Cc: Matthew Wilcox +Cc: Miaohe Lin +Cc: Muchun Song +Cc: Michal Hocko +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/filemap.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -835,6 +835,7 @@ noinline int __add_to_page_cache_locked( + XA_STATE(xas, &mapping->i_pages, offset); + int huge = PageHuge(page); + int error; ++ bool charged = false; + + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(PageSwapBacked(page), page); +@@ -848,6 +849,7 @@ noinline int __add_to_page_cache_locked( + error = mem_cgroup_charge(page, current->mm, gfp); + if (error) + goto error; ++ charged = true; + } + + gfp &= GFP_RECLAIM_MASK; +@@ -896,6 +898,8 @@ unlock: + + if (xas_error(&xas)) { + error = xas_error(&xas); ++ if (charged) ++ mem_cgroup_uncharge(page); + goto error; + } + diff --git a/queue-5.10/mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch b/queue-5.10/mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch new file mode 100644 index 00000000000..d4cd076c0e2 --- /dev/null +++ b/queue-5.10/mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch @@ -0,0 +1,136 @@ +From 7ffddd499ba6122b1a07828f023d1d67629aa017 Mon Sep 17 00:00:00 2001 +From: Muchun Song +Date: Thu, 4 Feb 2021 18:32:06 -0800 +Subject: mm: hugetlb: fix a race between freeing and dissolving the page + +From: Muchun Song + +commit 7ffddd499ba6122b1a07828f023d1d67629aa017 upstream. + +There is a race condition between __free_huge_page() +and dissolve_free_huge_page(). + + CPU0: CPU1: + + // page_count(page) == 1 + put_page(page) + __free_huge_page(page) + dissolve_free_huge_page(page) + spin_lock(&hugetlb_lock) + // PageHuge(page) && !page_count(page) + update_and_free_page(page) + // page is freed to the buddy + spin_unlock(&hugetlb_lock) + spin_lock(&hugetlb_lock) + clear_page_huge_active(page) + enqueue_huge_page(page) + // It is wrong, the page is already freed + spin_unlock(&hugetlb_lock) + +The race window is between put_page() and dissolve_free_huge_page(). + +We should make sure that the page is already on the free list when it is +dissolved. + +As a result __free_huge_page would corrupt page(s) already in the buddy +allocator. + +Link: https://lkml.kernel.org/r/20210115124942.46403-4-songmuchun@bytedance.com +Fixes: c8721bbbdd36 ("mm: memory-hotplug: enable memory hotplug to handle hugepage") +Signed-off-by: Muchun Song +Reviewed-by: Mike Kravetz +Reviewed-by: Oscar Salvador +Acked-by: Michal Hocko +Cc: David Hildenbrand +Cc: Yang Shi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/hugetlb.c | 39 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 39 insertions(+) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -79,6 +79,21 @@ DEFINE_SPINLOCK(hugetlb_lock); + static int num_fault_mutexes; + struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp; + ++static inline bool PageHugeFreed(struct page *head) ++{ ++ return page_private(head + 4) == -1UL; ++} ++ ++static inline void SetPageHugeFreed(struct page *head) ++{ ++ set_page_private(head + 4, -1UL); ++} ++ ++static inline void ClearPageHugeFreed(struct page *head) ++{ ++ set_page_private(head + 4, 0); ++} ++ + /* Forward declaration */ + static int hugetlb_acct_memory(struct hstate *h, long delta); + +@@ -1028,6 +1043,7 @@ static void enqueue_huge_page(struct hst + list_move(&page->lru, &h->hugepage_freelists[nid]); + h->free_huge_pages++; + h->free_huge_pages_node[nid]++; ++ SetPageHugeFreed(page); + } + + static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid) +@@ -1044,6 +1060,7 @@ static struct page *dequeue_huge_page_no + + list_move(&page->lru, &h->hugepage_activelist); + set_page_refcounted(page); ++ ClearPageHugeFreed(page); + h->free_huge_pages--; + h->free_huge_pages_node[nid]--; + return page; +@@ -1505,6 +1522,7 @@ static void prep_new_huge_page(struct hs + spin_lock(&hugetlb_lock); + h->nr_huge_pages++; + h->nr_huge_pages_node[nid]++; ++ ClearPageHugeFreed(page); + spin_unlock(&hugetlb_lock); + } + +@@ -1755,6 +1773,7 @@ int dissolve_free_huge_page(struct page + { + int rc = -EBUSY; + ++retry: + /* Not to disrupt normal path by vainly holding hugetlb_lock */ + if (!PageHuge(page)) + return 0; +@@ -1771,6 +1790,26 @@ int dissolve_free_huge_page(struct page + int nid = page_to_nid(head); + if (h->free_huge_pages - h->resv_huge_pages == 0) + goto out; ++ ++ /* ++ * We should make sure that the page is already on the free list ++ * when it is dissolved. ++ */ ++ if (unlikely(!PageHugeFreed(head))) { ++ spin_unlock(&hugetlb_lock); ++ cond_resched(); ++ ++ /* ++ * Theoretically, we should return -EBUSY when we ++ * encounter this race. In fact, we have a chance ++ * to successfully dissolve the page if we do a ++ * retry. Because the race window is quite small. ++ * If we seize this opportunity, it is an optimization ++ * for increasing the success rate of dissolving page. ++ */ ++ goto retry; ++ } ++ + /* + * Move PageHWPoison flag from head page to the raw error page, + * which makes any subpages rather than the error page reusable. diff --git a/queue-5.10/mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch b/queue-5.10/mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch new file mode 100644 index 00000000000..065c6187534 --- /dev/null +++ b/queue-5.10/mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch @@ -0,0 +1,64 @@ +From 0eb2df2b5629794020f75e94655e1994af63f0d4 Mon Sep 17 00:00:00 2001 +From: Muchun Song +Date: Thu, 4 Feb 2021 18:32:10 -0800 +Subject: mm: hugetlb: fix a race between isolating and freeing page + +From: Muchun Song + +commit 0eb2df2b5629794020f75e94655e1994af63f0d4 upstream. + +There is a race between isolate_huge_page() and __free_huge_page(). + + CPU0: CPU1: + + if (PageHuge(page)) + put_page(page) + __free_huge_page(page) + spin_lock(&hugetlb_lock) + update_and_free_page(page) + set_compound_page_dtor(page, + NULL_COMPOUND_DTOR) + spin_unlock(&hugetlb_lock) + isolate_huge_page(page) + // trigger BUG_ON + VM_BUG_ON_PAGE(!PageHead(page), page) + spin_lock(&hugetlb_lock) + page_huge_active(page) + // trigger BUG_ON + VM_BUG_ON_PAGE(!PageHuge(page), page) + spin_unlock(&hugetlb_lock) + +When we isolate a HugeTLB page on CPU0. Meanwhile, we free it to the +buddy allocator on CPU1. Then, we can trigger a BUG_ON on CPU0, because +it is already freed to the buddy allocator. + +Link: https://lkml.kernel.org/r/20210115124942.46403-5-songmuchun@bytedance.com +Fixes: c8721bbbdd36 ("mm: memory-hotplug: enable memory hotplug to handle hugepage") +Signed-off-by: Muchun Song +Reviewed-by: Mike Kravetz +Acked-by: Michal Hocko +Reviewed-by: Oscar Salvador +Cc: David Hildenbrand +Cc: Yang Shi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/hugetlb.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -5595,9 +5595,9 @@ bool isolate_huge_page(struct page *page + { + bool ret = true; + +- VM_BUG_ON_PAGE(!PageHead(page), page); + spin_lock(&hugetlb_lock); +- if (!page_huge_active(page) || !get_page_unless_zero(page)) { ++ if (!PageHeadHuge(page) || !page_huge_active(page) || ++ !get_page_unless_zero(page)) { + ret = false; + goto unlock; + } diff --git a/queue-5.10/mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch b/queue-5.10/mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch new file mode 100644 index 00000000000..6d484427396 --- /dev/null +++ b/queue-5.10/mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch @@ -0,0 +1,44 @@ +From ecbf4724e6061b4b01be20f6d797d64d462b2bc8 Mon Sep 17 00:00:00 2001 +From: Muchun Song +Date: Thu, 4 Feb 2021 18:32:13 -0800 +Subject: mm: hugetlb: remove VM_BUG_ON_PAGE from page_huge_active + +From: Muchun Song + +commit ecbf4724e6061b4b01be20f6d797d64d462b2bc8 upstream. + +The page_huge_active() can be called from scan_movable_pages() which do +not hold a reference count to the HugeTLB page. So when we call +page_huge_active() from scan_movable_pages(), the HugeTLB page can be +freed parallel. Then we will trigger a BUG_ON which is in the +page_huge_active() when CONFIG_DEBUG_VM is enabled. Just remove the +VM_BUG_ON_PAGE. + +Link: https://lkml.kernel.org/r/20210115124942.46403-6-songmuchun@bytedance.com +Fixes: 7e1f049efb86 ("mm: hugetlb: cleanup using paeg_huge_active()") +Signed-off-by: Muchun Song +Reviewed-by: Mike Kravetz +Acked-by: Michal Hocko +Reviewed-by: Oscar Salvador +Cc: David Hildenbrand +Cc: Yang Shi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/hugetlb.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -1361,8 +1361,7 @@ struct hstate *size_to_hstate(unsigned l + */ + bool page_huge_active(struct page *page) + { +- VM_BUG_ON_PAGE(!PageHuge(page), page); +- return PageHead(page) && PagePrivate(&page[1]); ++ return PageHeadHuge(page) && PagePrivate(&page[1]); + } + + /* never called for tail page */ diff --git a/queue-5.10/mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch b/queue-5.10/mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch new file mode 100644 index 00000000000..fea336dae7e --- /dev/null +++ b/queue-5.10/mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch @@ -0,0 +1,71 @@ +From 585fc0d2871c9318c949fbf45b1f081edd489e96 Mon Sep 17 00:00:00 2001 +From: Muchun Song +Date: Thu, 4 Feb 2021 18:32:03 -0800 +Subject: mm: hugetlbfs: fix cannot migrate the fallocated HugeTLB page + +From: Muchun Song + +commit 585fc0d2871c9318c949fbf45b1f081edd489e96 upstream. + +If a new hugetlb page is allocated during fallocate it will not be +marked as active (set_page_huge_active) which will result in a later +isolate_huge_page failure when the page migration code would like to +move that page. Such a failure would be unexpected and wrong. + +Only export set_page_huge_active, just leave clear_page_huge_active as +static. Because there are no external users. + +Link: https://lkml.kernel.org/r/20210115124942.46403-3-songmuchun@bytedance.com +Fixes: 70c3547e36f5 (hugetlbfs: add hugetlbfs_fallocate()) +Signed-off-by: Muchun Song +Acked-by: Michal Hocko +Reviewed-by: Mike Kravetz +Reviewed-by: Oscar Salvador +Cc: David Hildenbrand +Cc: Yang Shi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + fs/hugetlbfs/inode.c | 3 ++- + include/linux/hugetlb.h | 2 ++ + mm/hugetlb.c | 2 +- + 3 files changed, 5 insertions(+), 2 deletions(-) + +--- a/fs/hugetlbfs/inode.c ++++ b/fs/hugetlbfs/inode.c +@@ -735,9 +735,10 @@ static long hugetlbfs_fallocate(struct f + + mutex_unlock(&hugetlb_fault_mutex_table[hash]); + ++ set_page_huge_active(page); + /* + * unlock_page because locked by add_to_page_cache() +- * page_put due to reference from alloc_huge_page() ++ * put_page() due to reference from alloc_huge_page() + */ + unlock_page(page); + put_page(page); +--- a/include/linux/hugetlb.h ++++ b/include/linux/hugetlb.h +@@ -770,6 +770,8 @@ static inline void huge_ptep_modify_prot + } + #endif + ++void set_page_huge_active(struct page *page); ++ + #else /* CONFIG_HUGETLB_PAGE */ + struct hstate {}; + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -1349,7 +1349,7 @@ bool page_huge_active(struct page *page) + } + + /* never called for tail page */ +-static void set_page_huge_active(struct page *page) ++void set_page_huge_active(struct page *page) + { + VM_BUG_ON_PAGE(!PageHeadHuge(page), page); + SetPagePrivate(&page[1]); diff --git a/queue-5.10/mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch b/queue-5.10/mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch new file mode 100644 index 00000000000..83afaa50382 --- /dev/null +++ b/queue-5.10/mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch @@ -0,0 +1,111 @@ +From 1c2f67308af4c102b4e1e6cd6f69819ae59408e0 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins +Date: Thu, 4 Feb 2021 18:32:31 -0800 +Subject: mm: thp: fix MADV_REMOVE deadlock on shmem THP + +From: Hugh Dickins + +commit 1c2f67308af4c102b4e1e6cd6f69819ae59408e0 upstream. + +Sergey reported deadlock between kswapd correctly doing its usual +lock_page(page) followed by down_read(page->mapping->i_mmap_rwsem), and +madvise(MADV_REMOVE) on an madvise(MADV_HUGEPAGE) area doing +down_write(page->mapping->i_mmap_rwsem) followed by lock_page(page). + +This happened when shmem_fallocate(punch hole)'s unmap_mapping_range() +reaches zap_pmd_range()'s call to __split_huge_pmd(). The same deadlock +could occur when partially truncating a mapped huge tmpfs file, or using +fallocate(FALLOC_FL_PUNCH_HOLE) on it. + +__split_huge_pmd()'s page lock was added in 5.8, to make sure that any +concurrent use of reuse_swap_page() (holding page lock) could not catch +the anon THP's mapcounts and swapcounts while they were being split. + +Fortunately, reuse_swap_page() is never applied to a shmem or file THP +(not even by khugepaged, which checks PageSwapCache before calling), and +anonymous THPs are never created in shmem or file areas: so that +__split_huge_pmd()'s page lock can only be necessary for anonymous THPs, +on which there is no risk of deadlock with i_mmap_rwsem. + +Link: https://lkml.kernel.org/r/alpine.LSU.2.11.2101161409470.2022@eggly.anvils +Fixes: c444eb564fb1 ("mm: thp: make the THP mapcount atomic against __split_huge_pmd_locked()") +Signed-off-by: Hugh Dickins +Reported-by: Sergey Senozhatsky +Reviewed-by: Andrea Arcangeli +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/huge_memory.c | 37 +++++++++++++++++++++++-------------- + 1 file changed, 23 insertions(+), 14 deletions(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -2188,7 +2188,7 @@ void __split_huge_pmd(struct vm_area_str + { + spinlock_t *ptl; + struct mmu_notifier_range range; +- bool was_locked = false; ++ bool do_unlock_page = false; + pmd_t _pmd; + + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, +@@ -2204,7 +2204,6 @@ void __split_huge_pmd(struct vm_area_str + VM_BUG_ON(freeze && !page); + if (page) { + VM_WARN_ON_ONCE(!PageLocked(page)); +- was_locked = true; + if (page != pmd_page(*pmd)) + goto out; + } +@@ -2213,19 +2212,29 @@ repeat: + if (pmd_trans_huge(*pmd)) { + if (!page) { + page = pmd_page(*pmd); +- if (unlikely(!trylock_page(page))) { +- get_page(page); +- _pmd = *pmd; +- spin_unlock(ptl); +- lock_page(page); +- spin_lock(ptl); +- if (unlikely(!pmd_same(*pmd, _pmd))) { +- unlock_page(page); ++ /* ++ * An anonymous page must be locked, to ensure that a ++ * concurrent reuse_swap_page() sees stable mapcount; ++ * but reuse_swap_page() is not used on shmem or file, ++ * and page lock must not be taken when zap_pmd_range() ++ * calls __split_huge_pmd() while i_mmap_lock is held. ++ */ ++ if (PageAnon(page)) { ++ if (unlikely(!trylock_page(page))) { ++ get_page(page); ++ _pmd = *pmd; ++ spin_unlock(ptl); ++ lock_page(page); ++ spin_lock(ptl); ++ if (unlikely(!pmd_same(*pmd, _pmd))) { ++ unlock_page(page); ++ put_page(page); ++ page = NULL; ++ goto repeat; ++ } + put_page(page); +- page = NULL; +- goto repeat; + } +- put_page(page); ++ do_unlock_page = true; + } + } + if (PageMlocked(page)) +@@ -2235,7 +2244,7 @@ repeat: + __split_huge_pmd_locked(vma, pmd, range.start, freeze); + out: + spin_unlock(ptl); +- if (!was_locked && page) ++ if (do_unlock_page) + unlock_page(page); + /* + * No need to double call mmu_notifier->invalidate_range() callback. diff --git a/queue-5.10/mm-vmalloc-separate-put-pages-and-flush-vm-flags.patch b/queue-5.10/mm-vmalloc-separate-put-pages-and-flush-vm-flags.patch new file mode 100644 index 00000000000..12dc0cafdcc --- /dev/null +++ b/queue-5.10/mm-vmalloc-separate-put-pages-and-flush-vm-flags.patch @@ -0,0 +1,57 @@ +From 4f6ec8602341e97b364e4e0d41a1ed08148f5e98 Mon Sep 17 00:00:00 2001 +From: Rick Edgecombe +Date: Thu, 4 Feb 2021 18:32:24 -0800 +Subject: mm/vmalloc: separate put pages and flush VM flags + +From: Rick Edgecombe + +commit 4f6ec8602341e97b364e4e0d41a1ed08148f5e98 upstream. + +When VM_MAP_PUT_PAGES was added, it was defined with the same value as +VM_FLUSH_RESET_PERMS. This doesn't seem like it will cause any big +functional problems other than some excess flushing for VM_MAP_PUT_PAGES +allocations. + +Redefine VM_MAP_PUT_PAGES to have its own value. Also, rearrange things +so flags are less likely to be missed in the future. + +Link: https://lkml.kernel.org/r/20210122233706.9304-1-rick.p.edgecombe@intel.com +Fixes: b944afc9d64d ("mm: add a VM_MAP_PUT_PAGES flag for vmap") +Signed-off-by: Rick Edgecombe +Suggested-by: Matthew Wilcox +Cc: Miaohe Lin +Cc: Christoph Hellwig +Cc: Daniel Axtens +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/vmalloc.h | 9 ++------- + 1 file changed, 2 insertions(+), 7 deletions(-) + +--- a/include/linux/vmalloc.h ++++ b/include/linux/vmalloc.h +@@ -24,7 +24,8 @@ struct notifier_block; /* in notifier.h + #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ + #define VM_NO_GUARD 0x00000040 /* don't add guard page */ + #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ +-#define VM_MAP_PUT_PAGES 0x00000100 /* put pages and free array in vfree */ ++#define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */ ++#define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ + + /* + * VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC. +@@ -37,12 +38,6 @@ struct notifier_block; /* in notifier.h + * determine which allocations need the module shadow freed. + */ + +-/* +- * Memory with VM_FLUSH_RESET_PERMS cannot be freed in an interrupt or with +- * vfree_atomic(). +- */ +-#define VM_FLUSH_RESET_PERMS 0x00000100 /* Reset direct map and flush TLB on unmap */ +- + /* bits [20..32] reserved for arch specific ioremap internals */ + + /* diff --git a/queue-5.10/series b/queue-5.10/series index 14a2f0c7424..28912c1a240 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -85,3 +85,21 @@ drm-i915-power-up-combo-phy-lanes-for-for-hdmi-as-well.patch drm-amd-display-revert-fix-edid-parsing-after-resume-from-suspend.patch io_uring-don-t-modify-identity-s-files-uncess-identity-is-cowed.patch nvme-pci-avoid-the-deepest-sleep-state-on-kingston-a2000-ssds.patch +kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch +kvm-x86-mmu-fix-tdp-mmu-zap-collapsible-sptes.patch +kvm-x86-allow-guests-to-see-msr_ia32_tsx_ctrl-even-if-tsx-off.patch +kvm-x86-fix-cpuid-entries-returned-by-kvm_get_cpuid2-ioctl.patch +kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch +kvm-x86-set-so-called-reserved-cr3-bits-in-lm-mask-at-vcpu-reset.patch +dts-arm-gta04-remove-legacy-spi-cs-high-to-make-display-work-again.patch +arm-dts-gta04-spi-panel-chip-select-is-active-low.patch +arm-footbridge-fix-dc21285-pci-configuration-accessors.patch +arm-9043-1-tegra-fix-misplaced-tegra_uart_config-in-decompressor.patch +mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch +mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch +mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch +mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch +mm-compaction-move-high_pfn-to-the-for-loop-scope.patch +mm-vmalloc-separate-put-pages-and-flush-vm-flags.patch +mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch +mm-filemap-add-missing-mem_cgroup_uncharge-to-__add_to_page_cache_locked.patch