]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 8 Feb 2021 11:37:12 +0000 (12:37 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 8 Feb 2021 11:37:12 +0000 (12:37 +0100)
added patches:
arm-9043-1-tegra-fix-misplaced-tegra_uart_config-in-decompressor.patch
arm-dts-gta04-spi-panel-chip-select-is-active-low.patch
arm-footbridge-fix-dc21285-pci-configuration-accessors.patch
dts-arm-gta04-remove-legacy-spi-cs-high-to-make-display-work-again.patch
kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch
kvm-x86-allow-guests-to-see-msr_ia32_tsx_ctrl-even-if-tsx-off.patch
kvm-x86-fix-cpuid-entries-returned-by-kvm_get_cpuid2-ioctl.patch
kvm-x86-mmu-fix-tdp-mmu-zap-collapsible-sptes.patch
kvm-x86-set-so-called-reserved-cr3-bits-in-lm-mask-at-vcpu-reset.patch
kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch
mm-compaction-move-high_pfn-to-the-for-loop-scope.patch
mm-filemap-add-missing-mem_cgroup_uncharge-to-__add_to_page_cache_locked.patch
mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch
mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch
mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch
mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch
mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch
mm-vmalloc-separate-put-pages-and-flush-vm-flags.patch

19 files changed:
queue-5.10/arm-9043-1-tegra-fix-misplaced-tegra_uart_config-in-decompressor.patch [new file with mode: 0644]
queue-5.10/arm-dts-gta04-spi-panel-chip-select-is-active-low.patch [new file with mode: 0644]
queue-5.10/arm-footbridge-fix-dc21285-pci-configuration-accessors.patch [new file with mode: 0644]
queue-5.10/dts-arm-gta04-remove-legacy-spi-cs-high-to-make-display-work-again.patch [new file with mode: 0644]
queue-5.10/kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch [new file with mode: 0644]
queue-5.10/kvm-x86-allow-guests-to-see-msr_ia32_tsx_ctrl-even-if-tsx-off.patch [new file with mode: 0644]
queue-5.10/kvm-x86-fix-cpuid-entries-returned-by-kvm_get_cpuid2-ioctl.patch [new file with mode: 0644]
queue-5.10/kvm-x86-mmu-fix-tdp-mmu-zap-collapsible-sptes.patch [new file with mode: 0644]
queue-5.10/kvm-x86-set-so-called-reserved-cr3-bits-in-lm-mask-at-vcpu-reset.patch [new file with mode: 0644]
queue-5.10/kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch [new file with mode: 0644]
queue-5.10/mm-compaction-move-high_pfn-to-the-for-loop-scope.patch [new file with mode: 0644]
queue-5.10/mm-filemap-add-missing-mem_cgroup_uncharge-to-__add_to_page_cache_locked.patch [new file with mode: 0644]
queue-5.10/mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch [new file with mode: 0644]
queue-5.10/mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch [new file with mode: 0644]
queue-5.10/mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch [new file with mode: 0644]
queue-5.10/mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch [new file with mode: 0644]
queue-5.10/mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch [new file with mode: 0644]
queue-5.10/mm-vmalloc-separate-put-pages-and-flush-vm-flags.patch [new file with mode: 0644]
queue-5.10/series

diff --git a/queue-5.10/arm-9043-1-tegra-fix-misplaced-tegra_uart_config-in-decompressor.patch b/queue-5.10/arm-9043-1-tegra-fix-misplaced-tegra_uart_config-in-decompressor.patch
new file mode 100644 (file)
index 0000000..ad38cd9
--- /dev/null
@@ -0,0 +1,93 @@
+From 538eea5362a1179dfa7770dd2b6607dc30cc50c6 Mon Sep 17 00:00:00 2001
+From: Dmitry Osipenko <digetx@gmail.com>
+Date: Tue, 15 Dec 2020 16:16:44 +0100
+Subject: ARM: 9043/1: tegra: Fix misplaced tegra_uart_config in decompressor
+
+From: Dmitry Osipenko <digetx@gmail.com>
+
+commit 538eea5362a1179dfa7770dd2b6607dc30cc50c6 upstream.
+
+The tegra_uart_config of the DEBUG_LL code is now placed right at the
+start of the .text section after commit which enabled debug output in the
+decompressor. Tegra devices are not booting anymore if DEBUG_LL is enabled
+since tegra_uart_config data is executes as a code. Fix the misplaced
+tegra_uart_config storage by embedding it into the code.
+
+Cc: stable@vger.kernel.org
+Fixes: 2596a72d3384 ("ARM: 9009/1: uncompress: Enable debug in head.S")
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/include/debug/tegra.S |   54 ++++++++++++++++++++---------------------
+ 1 file changed, 27 insertions(+), 27 deletions(-)
+
+--- a/arch/arm/include/debug/tegra.S
++++ b/arch/arm/include/debug/tegra.S
+@@ -149,7 +149,34 @@
+               .align
+ 99:           .word   .
++#if defined(ZIMAGE)
++              .word   . + 4
++/*
++ * Storage for the state maintained by the macro.
++ *
++ * In the kernel proper, this data is located in arch/arm/mach-tegra/tegra.c.
++ * That's because this header is included from multiple files, and we only
++ * want a single copy of the data. In particular, the UART probing code above
++ * assumes it's running using physical addresses. This is true when this file
++ * is included from head.o, but not when included from debug.o. So we need
++ * to share the probe results between the two copies, rather than having
++ * to re-run the probing again later.
++ *
++ * In the decompressor, we put the storage right here, since common.c
++ * isn't included in the decompressor build. This storage data gets put in
++ * .text even though it's really data, since .data is discarded from the
++ * decompressor. Luckily, .text is writeable in the decompressor, unless
++ * CONFIG_ZBOOT_ROM. That dependency is handled in arch/arm/Kconfig.debug.
++ */
++              /* Debug UART initialization required */
++              .word   1
++              /* Debug UART physical address */
++              .word   0
++              /* Debug UART virtual address */
++              .word   0
++#else
+               .word   tegra_uart_config
++#endif
+               .ltorg
+               /* Load previously selected UART address */
+@@ -189,30 +216,3 @@
+               .macro  waituarttxrdy,rd,rx
+               .endm
+-
+-/*
+- * Storage for the state maintained by the macros above.
+- *
+- * In the kernel proper, this data is located in arch/arm/mach-tegra/tegra.c.
+- * That's because this header is included from multiple files, and we only
+- * want a single copy of the data. In particular, the UART probing code above
+- * assumes it's running using physical addresses. This is true when this file
+- * is included from head.o, but not when included from debug.o. So we need
+- * to share the probe results between the two copies, rather than having
+- * to re-run the probing again later.
+- *
+- * In the decompressor, we put the symbol/storage right here, since common.c
+- * isn't included in the decompressor build. This symbol gets put in .text
+- * even though it's really data, since .data is discarded from the
+- * decompressor. Luckily, .text is writeable in the decompressor, unless
+- * CONFIG_ZBOOT_ROM. That dependency is handled in arch/arm/Kconfig.debug.
+- */
+-#if defined(ZIMAGE)
+-tegra_uart_config:
+-      /* Debug UART initialization required */
+-      .word 1
+-      /* Debug UART physical address */
+-      .word 0
+-      /* Debug UART virtual address */
+-      .word 0
+-#endif
diff --git a/queue-5.10/arm-dts-gta04-spi-panel-chip-select-is-active-low.patch b/queue-5.10/arm-dts-gta04-spi-panel-chip-select-is-active-low.patch
new file mode 100644 (file)
index 0000000..c03c33b
--- /dev/null
@@ -0,0 +1,47 @@
+From 181739822cf6f8f4e12b173913af2967a28906c0 Mon Sep 17 00:00:00 2001
+From: "H. Nikolaus Schaller" <hns@goldelico.com>
+Date: Wed, 23 Dec 2020 11:30:21 +0100
+Subject: ARM: dts; gta04: SPI panel chip select is active low
+
+From: H. Nikolaus Schaller <hns@goldelico.com>
+
+commit 181739822cf6f8f4e12b173913af2967a28906c0 upstream.
+
+With the arrival of
+
+commit 2fee9583198eb9 ("spi: dt-bindings: clarify CS behavior for spi-cs-high and gpio descriptors")
+
+it was clarified what the proper state for cs-gpios should be, even if the
+flag is ignored. The driver code is doing the right thing since
+
+766c6b63aa04 ("spi: fix client driver breakages when using GPIO descriptors")
+
+The chip-select of the td028ttec1 panel is active-low, so we must omit spi-cs-high;
+attribute (already removed by separate patch) and should now use GPIO_ACTIVE_LOW for
+the client device description to be fully consistent.
+
+Fixes: 766c6b63aa04 ("spi: fix client driver breakages when using GPIO descriptors")
+CC: stable@vger.kernel.org
+Signed-off-by: H. Nikolaus Schaller <hns@goldelico.com>
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/boot/dts/omap3-gta04.dtsi | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi
+index 003202d12990..7b8c18e6605e 100644
+--- a/arch/arm/boot/dts/omap3-gta04.dtsi
++++ b/arch/arm/boot/dts/omap3-gta04.dtsi
+@@ -114,7 +114,7 @@ spi_lcd: spi_lcd {
+               gpio-sck = <&gpio1 12 GPIO_ACTIVE_HIGH>;
+               gpio-miso = <&gpio1 18 GPIO_ACTIVE_HIGH>;
+               gpio-mosi = <&gpio1 20 GPIO_ACTIVE_HIGH>;
+-              cs-gpios = <&gpio1 19 GPIO_ACTIVE_HIGH>;
++              cs-gpios = <&gpio1 19 GPIO_ACTIVE_LOW>;
+               num-chipselects = <1>;
+               /* lcd panel */
+-- 
+2.30.0
+
diff --git a/queue-5.10/arm-footbridge-fix-dc21285-pci-configuration-accessors.patch b/queue-5.10/arm-footbridge-fix-dc21285-pci-configuration-accessors.patch
new file mode 100644 (file)
index 0000000..5570b65
--- /dev/null
@@ -0,0 +1,62 @@
+From 39d3454c3513840eb123b3913fda6903e45ce671 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Sun, 18 Oct 2020 09:39:21 +0100
+Subject: ARM: footbridge: fix dc21285 PCI configuration accessors
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit 39d3454c3513840eb123b3913fda6903e45ce671 upstream.
+
+Building with gcc 4.9.2 reveals a latent bug in the PCI accessors
+for Footbridge platforms, which causes a fatal alignment fault
+while accessing IO memory. Fix this by making the assembly volatile.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/mach-footbridge/dc21285.c |   12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/arch/arm/mach-footbridge/dc21285.c
++++ b/arch/arm/mach-footbridge/dc21285.c
+@@ -65,15 +65,15 @@ dc21285_read_config(struct pci_bus *bus,
+       if (addr)
+               switch (size) {
+               case 1:
+-                      asm("ldrb       %0, [%1, %2]"
++                      asm volatile("ldrb      %0, [%1, %2]"
+                               : "=r" (v) : "r" (addr), "r" (where) : "cc");
+                       break;
+               case 2:
+-                      asm("ldrh       %0, [%1, %2]"
++                      asm volatile("ldrh      %0, [%1, %2]"
+                               : "=r" (v) : "r" (addr), "r" (where) : "cc");
+                       break;
+               case 4:
+-                      asm("ldr        %0, [%1, %2]"
++                      asm volatile("ldr       %0, [%1, %2]"
+                               : "=r" (v) : "r" (addr), "r" (where) : "cc");
+                       break;
+               }
+@@ -99,17 +99,17 @@ dc21285_write_config(struct pci_bus *bus
+       if (addr)
+               switch (size) {
+               case 1:
+-                      asm("strb       %0, [%1, %2]"
++                      asm volatile("strb      %0, [%1, %2]"
+                               : : "r" (value), "r" (addr), "r" (where)
+                               : "cc");
+                       break;
+               case 2:
+-                      asm("strh       %0, [%1, %2]"
++                      asm volatile("strh      %0, [%1, %2]"
+                               : : "r" (value), "r" (addr), "r" (where)
+                               : "cc");
+                       break;
+               case 4:
+-                      asm("str        %0, [%1, %2]"
++                      asm volatile("str       %0, [%1, %2]"
+                               : : "r" (value), "r" (addr), "r" (where)
+                               : "cc");
+                       break;
diff --git a/queue-5.10/dts-arm-gta04-remove-legacy-spi-cs-high-to-make-display-work-again.patch b/queue-5.10/dts-arm-gta04-remove-legacy-spi-cs-high-to-make-display-work-again.patch
new file mode 100644 (file)
index 0000000..5e8f3d4
--- /dev/null
@@ -0,0 +1,53 @@
+From 07af7810e0a5bc4e51682c90f9fa19fc4cb93f18 Mon Sep 17 00:00:00 2001
+From: "H. Nikolaus Schaller" <hns@goldelico.com>
+Date: Sat, 12 Dec 2020 10:55:25 +0100
+Subject: DTS: ARM: gta04: remove legacy spi-cs-high to make display work again
+
+From: H. Nikolaus Schaller <hns@goldelico.com>
+
+commit 07af7810e0a5bc4e51682c90f9fa19fc4cb93f18 upstream.
+
+This reverts
+
+commit f1f028ff89cb ("DTS: ARM: gta04: introduce legacy spi-cs-high to make display work again")
+
+which had to be intruduced after
+
+commit 6953c57ab172 ("gpio: of: Handle SPI chipselect legacy bindings")
+
+broke the GTA04 display. This contradicted the data sheet but was the only
+way to get it as an spi client operational again.
+
+The panel data sheet defines the chip-select to be active low.
+
+Now, with the arrival of
+
+commit 766c6b63aa04 ("spi: fix client driver breakages when using GPIO descriptors")
+
+the logic of interaction between spi-cs-high and the gpio descriptor flags
+has been changed a second time, making the display broken again. So we have
+to remove the original fix which in retrospect was a workaround of a bug in
+the spi subsystem and not a feature of the panel or bug in the device tree.
+
+With this fix the device tree is back in sync with the data sheet and
+spi subsystem code.
+
+Fixes: 766c6b63aa04 ("spi: fix client driver breakages when using GPIO descriptors")
+CC: stable@vger.kernel.org
+Signed-off-by: H. Nikolaus Schaller <hns@goldelico.com>
+Signed-off-by: Tony Lindgren <tony@atomide.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/boot/dts/omap3-gta04.dtsi |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/arch/arm/boot/dts/omap3-gta04.dtsi
++++ b/arch/arm/boot/dts/omap3-gta04.dtsi
+@@ -124,7 +124,6 @@
+                       spi-max-frequency = <100000>;
+                       spi-cpol;
+                       spi-cpha;
+-                      spi-cs-high;
+                       backlight= <&backlight>;
+                       label = "lcd";
diff --git a/queue-5.10/kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch b/queue-5.10/kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch
new file mode 100644 (file)
index 0000000..3dea4b0
--- /dev/null
@@ -0,0 +1,59 @@
+From ccd85d90ce092bdb047a7f6580f3955393833b22 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 2 Feb 2021 13:20:17 -0800
+Subject: KVM: SVM: Treat SVM as unsupported when running as an SEV guest
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit ccd85d90ce092bdb047a7f6580f3955393833b22 upstream.
+
+Don't let KVM load when running as an SEV guest, regardless of what
+CPUID says.  Memory is encrypted with a key that is not accessible to
+the host (L0), thus it's impossible for L0 to emulate SVM, e.g. it'll
+see garbage when reading the VMCB.
+
+Technically, KVM could decrypt all memory that needs to be accessible to
+the L0 and use shadow paging so that L0 does not need to shadow NPT, but
+exposing such information to L0 largely defeats the purpose of running as
+an SEV guest.  This can always be revisited if someone comes up with a
+use case for running VMs inside SEV guests.
+
+Note, VMLOAD, VMRUN, etc... will also #GP on GPAs with C-bit set, i.e. KVM
+is doomed even if the SEV guest is debuggable and the hypervisor is willing
+to decrypt the VMCB.  This may or may not be fixed on CPUs that have the
+SVME_ADDR_CHK fix.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210202212017.2486595-1-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c    |    5 +++++
+ arch/x86/mm/mem_encrypt.c |    1 +
+ 2 files changed, 6 insertions(+)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -438,6 +438,11 @@ static int has_svm(void)
+               return 0;
+       }
++      if (sev_active()) {
++              pr_info("KVM is unsupported when running as an SEV guest\n");
++              return 0;
++      }
++
+       return 1;
+ }
+--- a/arch/x86/mm/mem_encrypt.c
++++ b/arch/x86/mm/mem_encrypt.c
+@@ -351,6 +351,7 @@ bool sev_active(void)
+ {
+       return sev_status & MSR_AMD64_SEV_ENABLED;
+ }
++EXPORT_SYMBOL_GPL(sev_active);
+ /* Needs to be called from non-instrumentable code */
+ bool noinstr sev_es_active(void)
diff --git a/queue-5.10/kvm-x86-allow-guests-to-see-msr_ia32_tsx_ctrl-even-if-tsx-off.patch b/queue-5.10/kvm-x86-allow-guests-to-see-msr_ia32_tsx_ctrl-even-if-tsx-off.patch
new file mode 100644 (file)
index 0000000..1b86f5d
--- /dev/null
@@ -0,0 +1,98 @@
+From 7131636e7ea5b50ca910f8953f6365ef2d1f741c Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Thu, 28 Jan 2021 11:45:00 -0500
+Subject: KVM: x86: Allow guests to see MSR_IA32_TSX_CTRL even if tsx=off
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 7131636e7ea5b50ca910f8953f6365ef2d1f741c upstream.
+
+Userspace that does not know about KVM_GET_MSR_FEATURE_INDEX_LIST
+will generally use the default value for MSR_IA32_ARCH_CAPABILITIES.
+When this happens and the host has tsx=on, it is possible to end up with
+virtual machines that have HLE and RTM disabled, but TSX_CTRL available.
+
+If the fleet is then switched to tsx=off, kvm_get_arch_capabilities()
+will clear the ARCH_CAP_TSX_CTRL_MSR bit and it will not be possible to
+use the tsx=off hosts as migration destinations, even though the guests
+do not have TSX enabled.
+
+To allow this migration, allow guests to write to their TSX_CTRL MSR,
+while keeping the host MSR unchanged for the entire life of the guests.
+This ensures that TSX remains disabled and also saves MSR reads and
+writes, and it's okay to do because with tsx=off we know that guests will
+not have the HLE and RTM features in their CPUID.  (If userspace sets
+bogus CPUID data, we do not expect HLE and RTM to work in guests anyway).
+
+Cc: stable@vger.kernel.org
+Fixes: cbbaa2727aa3 ("KVM: x86: fix presentation of TSX feature in ARCH_CAPABILITIES")
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmx.c |   17 +++++++++++++----
+ arch/x86/kvm/x86.c     |   26 +++++++++++++++++---------
+ 2 files changed, 30 insertions(+), 13 deletions(-)
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6874,11 +6874,20 @@ static int vmx_create_vcpu(struct kvm_vc
+               switch (index) {
+               case MSR_IA32_TSX_CTRL:
+                       /*
+-                       * No need to pass TSX_CTRL_CPUID_CLEAR through, so
+-                       * let's avoid changing CPUID bits under the host
+-                       * kernel's feet.
++                       * TSX_CTRL_CPUID_CLEAR is handled in the CPUID
++                       * interception.  Keep the host value unchanged to avoid
++                       * changing CPUID bits under the host kernel's feet.
++                       *
++                       * hle=0, rtm=0, tsx_ctrl=1 can be found with some
++                       * combinations of new kernel and old userspace.  If
++                       * those guests run on a tsx=off host, do allow guests
++                       * to use TSX_CTRL, but do not change the value on the
++                       * host so that TSX remains always disabled.
+                        */
+-                      vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
++                      if (boot_cpu_has(X86_FEATURE_RTM))
++                              vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
++                      else
++                              vmx->guest_uret_msrs[j].mask = 0;
+                       break;
+               default:
+                       vmx->guest_uret_msrs[j].mask = -1ull;
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1376,16 +1376,24 @@ static u64 kvm_get_arch_capabilities(voi
+       if (!boot_cpu_has_bug(X86_BUG_MDS))
+               data |= ARCH_CAP_MDS_NO;
+-      /*
+-       * On TAA affected systems:
+-       *      - nothing to do if TSX is disabled on the host.
+-       *      - we emulate TSX_CTRL if present on the host.
+-       *        This lets the guest use VERW to clear CPU buffers.
+-       */
+-      if (!boot_cpu_has(X86_FEATURE_RTM))
+-              data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR);
+-      else if (!boot_cpu_has_bug(X86_BUG_TAA))
++      if (!boot_cpu_has(X86_FEATURE_RTM)) {
++              /*
++               * If RTM=0 because the kernel has disabled TSX, the host might
++               * have TAA_NO or TSX_CTRL.  Clear TAA_NO (the guest sees RTM=0
++               * and therefore knows that there cannot be TAA) but keep
++               * TSX_CTRL: some buggy userspaces leave it set on tsx=on hosts,
++               * and we want to allow migrating those guests to tsx=off hosts.
++               */
++              data &= ~ARCH_CAP_TAA_NO;
++      } else if (!boot_cpu_has_bug(X86_BUG_TAA)) {
+               data |= ARCH_CAP_TAA_NO;
++      } else {
++              /*
++               * Nothing to do here; we emulate TSX_CTRL if present on the
++               * host so the guest can choose between disabling TSX or
++               * using VERW to clear CPU buffers.
++               */
++      }
+       return data;
+ }
diff --git a/queue-5.10/kvm-x86-fix-cpuid-entries-returned-by-kvm_get_cpuid2-ioctl.patch b/queue-5.10/kvm-x86-fix-cpuid-entries-returned-by-kvm_get_cpuid2-ioctl.patch
new file mode 100644 (file)
index 0000000..e453131
--- /dev/null
@@ -0,0 +1,41 @@
+From 181f494888d5b178ffda41bed965f187d5e5c432 Mon Sep 17 00:00:00 2001
+From: Michael Roth <michael.roth@amd.com>
+Date: Wed, 27 Jan 2021 20:44:51 -0600
+Subject: KVM: x86: fix CPUID entries returned by KVM_GET_CPUID2 ioctl
+
+From: Michael Roth <michael.roth@amd.com>
+
+commit 181f494888d5b178ffda41bed965f187d5e5c432 upstream.
+
+Recent commit 255cbecfe0 modified struct kvm_vcpu_arch to make
+'cpuid_entries' a pointer to an array of kvm_cpuid_entry2 entries
+rather than embedding the array in the struct. KVM_SET_CPUID and
+KVM_SET_CPUID2 were updated accordingly, but KVM_GET_CPUID2 was missed.
+
+As a result, KVM_GET_CPUID2 currently returns random fields from struct
+kvm_vcpu_arch to userspace rather than the expected CPUID values. Fix
+this by treating 'cpuid_entries' as a pointer when copying its
+contents to userspace buffer.
+
+Fixes: 255cbecfe0c9 ("KVM: x86: allocate vcpu->arch.cpuid_entries dynamically")
+Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: Michael Roth <michael.roth@amd.com.com>
+Message-Id: <20210128024451.1816770-1-michael.roth@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/cpuid.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -320,7 +320,7 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm
+       if (cpuid->nent < vcpu->arch.cpuid_nent)
+               goto out;
+       r = -EFAULT;
+-      if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
++      if (copy_to_user(entries, vcpu->arch.cpuid_entries,
+                        vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
+               goto out;
+       return 0;
diff --git a/queue-5.10/kvm-x86-mmu-fix-tdp-mmu-zap-collapsible-sptes.patch b/queue-5.10/kvm-x86-mmu-fix-tdp-mmu-zap-collapsible-sptes.patch
new file mode 100644 (file)
index 0000000..5e1a4a4
--- /dev/null
@@ -0,0 +1,45 @@
+From 87aa9ec939ec7277b730786e19c161c9194cc8ca Mon Sep 17 00:00:00 2001
+From: Ben Gardon <bgardon@google.com>
+Date: Tue, 2 Feb 2021 10:57:16 -0800
+Subject: KVM: x86/mmu: Fix TDP MMU zap collapsible SPTEs
+
+From: Ben Gardon <bgardon@google.com>
+
+commit 87aa9ec939ec7277b730786e19c161c9194cc8ca upstream.
+
+There is a bug in the TDP MMU function to zap SPTEs which could be
+replaced with a larger mapping which prevents the function from doing
+anything. Fix this by correctly zapping the last level SPTEs.
+
+Cc: stable@vger.kernel.org
+Fixes: 14881998566d ("kvm: x86/mmu: Support disabling dirty logging for the tdp MMU")
+Signed-off-by: Ben Gardon <bgardon@google.com>
+Message-Id: <20210202185734.1680553-11-bgardon@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/tdp_mmu.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -1037,8 +1037,8 @@ bool kvm_tdp_mmu_slot_set_dirty(struct k
+ }
+ /*
+- * Clear non-leaf entries (and free associated page tables) which could
+- * be replaced by large mappings, for GFNs within the slot.
++ * Clear leaf entries which could be replaced by large mappings, for
++ * GFNs within the slot.
+  */
+ static void zap_collapsible_spte_range(struct kvm *kvm,
+                                      struct kvm_mmu_page *root,
+@@ -1050,7 +1050,7 @@ static void zap_collapsible_spte_range(s
+       tdp_root_for_each_pte(iter, root, start, end) {
+               if (!is_shadow_present_pte(iter.old_spte) ||
+-                  is_last_spte(iter.old_spte, iter.level))
++                  !is_last_spte(iter.old_spte, iter.level))
+                       continue;
+               pfn = spte_to_pfn(iter.old_spte);
diff --git a/queue-5.10/kvm-x86-set-so-called-reserved-cr3-bits-in-lm-mask-at-vcpu-reset.patch b/queue-5.10/kvm-x86-set-so-called-reserved-cr3-bits-in-lm-mask-at-vcpu-reset.patch
new file mode 100644 (file)
index 0000000..b0e8a10
--- /dev/null
@@ -0,0 +1,33 @@
+From 031b91a5fe6f1ce61b7617614ddde9ed61e252be Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 3 Feb 2021 16:01:06 -0800
+Subject: KVM: x86: Set so called 'reserved CR3 bits in LM mask' at vCPU reset
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 031b91a5fe6f1ce61b7617614ddde9ed61e252be upstream.
+
+Set cr3_lm_rsvd_bits, which is effectively an invalid GPA mask, at vCPU
+reset.  The reserved bits check needs to be done even if userspace never
+configures the guest's CPUID model.
+
+Cc: stable@vger.kernel.org
+Fixes: 0107973a80ad ("KVM: x86: Introduce cr3_lm_rsvd_bits in kvm_vcpu_arch")
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210204000117.3303214-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -9915,6 +9915,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu
+       fx_init(vcpu);
+       vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
++      vcpu->arch.cr3_lm_rsvd_bits = rsvd_bits(cpuid_maxphyaddr(vcpu), 63);
+       vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
diff --git a/queue-5.10/kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch b/queue-5.10/kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch
new file mode 100644 (file)
index 0000000..d243526
--- /dev/null
@@ -0,0 +1,45 @@
+From 943dea8af21bd896e0d6c30ea221203fb3cd3265 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 2 Feb 2021 08:55:46 -0800
+Subject: KVM: x86: Update emulator context mode if SYSENTER xfers to 64-bit mode
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 943dea8af21bd896e0d6c30ea221203fb3cd3265 upstream.
+
+Set the emulator context to PROT64 if SYSENTER transitions from 32-bit
+userspace (compat mode) to a 64-bit kernel, otherwise the RIP update at
+the end of x86_emulate_insn() will incorrectly truncate the new RIP.
+
+Note, this bug is mostly limited to running an Intel virtual CPU model on
+an AMD physical CPU, as other combinations of virtual and physical CPUs
+do not trigger full emulation.  On Intel CPUs, SYSENTER in compatibility
+mode is legal, and unconditionally transitions to 64-bit mode.  On AMD
+CPUs, SYSENTER is illegal in compatibility mode and #UDs.  If the vCPU is
+AMD, KVM injects a #UD on SYSENTER in compat mode.  If the pCPU is Intel,
+SYSENTER will execute natively and not trigger #UD->VM-Exit (ignoring
+guest TLB shenanigans).
+
+Fixes: fede8076aab4 ("KVM: x86: handle wrap around 32-bit address space")
+Cc: stable@vger.kernel.org
+Signed-off-by: Jonny Barker <jonny@jonnybarker.com>
+[sean: wrote changelog]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210202165546.2390296-1-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/emulate.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -2879,6 +2879,8 @@ static int em_sysenter(struct x86_emulat
+       ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
+       *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
+                                                             (u32)msr_data;
++      if (efer & EFER_LMA)
++              ctxt->mode = X86EMUL_MODE_PROT64;
+       return X86EMUL_CONTINUE;
+ }
diff --git a/queue-5.10/mm-compaction-move-high_pfn-to-the-for-loop-scope.patch b/queue-5.10/mm-compaction-move-high_pfn-to-the-for-loop-scope.patch
new file mode 100644 (file)
index 0000000..4644558
--- /dev/null
@@ -0,0 +1,85 @@
+From 74e21484e40bb8ce0f9828bbfe1c9fc9b04249c6 Mon Sep 17 00:00:00 2001
+From: Rokudo Yan <wu-yan@tcl.com>
+Date: Thu, 4 Feb 2021 18:32:20 -0800
+Subject: mm, compaction: move high_pfn to the for loop scope
+
+From: Rokudo Yan <wu-yan@tcl.com>
+
+commit 74e21484e40bb8ce0f9828bbfe1c9fc9b04249c6 upstream.
+
+In fast_isolate_freepages, high_pfn will be used if a prefered one (ie
+PFN >= low_fn) not found.
+
+But the high_pfn is not reset before searching an free area, so when it
+was used as freepage, it may from another free area searched before.  As
+a result move_freelist_head(freelist, freepage) will have unexpected
+behavior (eg corrupt the MOVABLE freelist)
+
+  Unable to handle kernel paging request at virtual address dead000000000200
+  Mem abort info:
+    ESR = 0x96000044
+    Exception class = DABT (current EL), IL = 32 bits
+    SET = 0, FnV = 0
+    EA = 0, S1PTW = 0
+  Data abort info:
+    ISV = 0, ISS = 0x00000044
+    CM = 0, WnR = 1
+  [dead000000000200] address between user and kernel address ranges
+
+  -000|list_cut_before(inline)
+  -000|move_freelist_head(inline)
+  -000|fast_isolate_freepages(inline)
+  -000|isolate_freepages(inline)
+  -000|compaction_alloc(?, ?)
+  -001|unmap_and_move(inline)
+  -001|migrate_pages([NSD:0xFFFFFF80088CBBD0] from = 0xFFFFFF80088CBD88, [NSD:0xFFFFFF80088CBBC8] get_new_p
+  -002|__read_once_size(inline)
+  -002|static_key_count(inline)
+  -002|static_key_false(inline)
+  -002|trace_mm_compaction_migratepages(inline)
+  -002|compact_zone(?, [NSD:0xFFFFFF80088CBCB0] capc = 0x0)
+  -003|kcompactd_do_work(inline)
+  -003|kcompactd([X19] p = 0xFFFFFF93227FBC40)
+  -004|kthread([X20] _create = 0xFFFFFFE1AFB26380)
+  -005|ret_from_fork(asm)
+
+The issue was reported on an smart phone product with 6GB ram and 3GB
+zram as swap device.
+
+This patch fixes the issue by reset high_pfn before searching each free
+area, which ensure freepage and freelist match when call
+move_freelist_head in fast_isolate_freepages().
+
+Link: http://lkml.kernel.org/r/20190118175136.31341-12-mgorman@techsingularity.net
+Link: https://lkml.kernel.org/r/20210112094720.1238444-1-wu-yan@tcl.com
+Fixes: 5a811889de10f1eb ("mm, compaction: use free lists to quickly locate a migration target")
+Signed-off-by: Rokudo Yan <wu-yan@tcl.com>
+Acked-by: Mel Gorman <mgorman@techsingularity.net>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/compaction.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -1302,7 +1302,7 @@ fast_isolate_freepages(struct compact_co
+ {
+       unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1);
+       unsigned int nr_scanned = 0;
+-      unsigned long low_pfn, min_pfn, high_pfn = 0, highest = 0;
++      unsigned long low_pfn, min_pfn, highest = 0;
+       unsigned long nr_isolated = 0;
+       unsigned long distance;
+       struct page *page = NULL;
+@@ -1347,6 +1347,7 @@ fast_isolate_freepages(struct compact_co
+               struct page *freepage;
+               unsigned long flags;
+               unsigned int order_scanned = 0;
++              unsigned long high_pfn = 0;
+               if (!area->nr_free)
+                       continue;
diff --git a/queue-5.10/mm-filemap-add-missing-mem_cgroup_uncharge-to-__add_to_page_cache_locked.patch b/queue-5.10/mm-filemap-add-missing-mem_cgroup_uncharge-to-__add_to_page_cache_locked.patch
new file mode 100644 (file)
index 0000000..e63ffaf
--- /dev/null
@@ -0,0 +1,93 @@
+From da74240eb3fcd806edb1643874363e954d9e948b Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Thu, 4 Feb 2021 18:32:45 -0800
+Subject: mm/filemap: add missing mem_cgroup_uncharge() to __add_to_page_cache_locked()
+
+From: Waiman Long <longman@redhat.com>
+
+commit da74240eb3fcd806edb1643874363e954d9e948b upstream.
+
+Commit 3fea5a499d57 ("mm: memcontrol: convert page cache to a new
+mem_cgroup_charge() API") introduced a bug in __add_to_page_cache_locked()
+causing the following splat:
+
+  page dumped because: VM_BUG_ON_PAGE(page_memcg(page))
+  pages's memcg:ffff8889a4116000
+  ------------[ cut here ]------------
+  kernel BUG at mm/memcontrol.c:2924!
+  invalid opcode: 0000 [#1] SMP KASAN PTI
+  CPU: 35 PID: 12345 Comm: cat Tainted: G S      W I       5.11.0-rc4-debug+ #1
+  Hardware name: HP HP Z8 G4 Workstation/81C7, BIOS P60 v01.25 12/06/2017
+  RIP: commit_charge+0xf4/0x130
+  Call Trace:
+    mem_cgroup_charge+0x175/0x770
+    __add_to_page_cache_locked+0x712/0xad0
+    add_to_page_cache_lru+0xc5/0x1f0
+    cachefiles_read_or_alloc_pages+0x895/0x2e10 [cachefiles]
+    __fscache_read_or_alloc_pages+0x6c0/0xa00 [fscache]
+    __nfs_readpages_from_fscache+0x16d/0x630 [nfs]
+    nfs_readpages+0x24e/0x540 [nfs]
+    read_pages+0x5b1/0xc40
+    page_cache_ra_unbounded+0x460/0x750
+    generic_file_buffered_read_get_pages+0x290/0x1710
+    generic_file_buffered_read+0x2a9/0xc30
+    nfs_file_read+0x13f/0x230 [nfs]
+    new_sync_read+0x3af/0x610
+    vfs_read+0x339/0x4b0
+    ksys_read+0xf1/0x1c0
+    do_syscall_64+0x33/0x40
+    entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Before that commit, there was a try_charge() and commit_charge() in
+__add_to_page_cache_locked().  These two separated charge functions were
+replaced by a single mem_cgroup_charge().  However, it forgot to add a
+matching mem_cgroup_uncharge() when the xarray insertion failed with the
+page released back to the pool.
+
+Fix this by adding a mem_cgroup_uncharge() call when insertion error
+happens.
+
+Link: https://lkml.kernel.org/r/20210125042441.20030-1-longman@redhat.com
+Fixes: 3fea5a499d57 ("mm: memcontrol: convert page cache to a new mem_cgroup_charge() API")
+Signed-off-by: Waiman Long <longman@redhat.com>
+Reviewed-by: Alex Shi <alex.shi@linux.alibaba.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Muchun Song <smuchun@gmail.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/filemap.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -835,6 +835,7 @@ noinline int __add_to_page_cache_locked(
+       XA_STATE(xas, &mapping->i_pages, offset);
+       int huge = PageHuge(page);
+       int error;
++      bool charged = false;
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
+       VM_BUG_ON_PAGE(PageSwapBacked(page), page);
+@@ -848,6 +849,7 @@ noinline int __add_to_page_cache_locked(
+               error = mem_cgroup_charge(page, current->mm, gfp);
+               if (error)
+                       goto error;
++              charged = true;
+       }
+       gfp &= GFP_RECLAIM_MASK;
+@@ -896,6 +898,8 @@ unlock:
+       if (xas_error(&xas)) {
+               error = xas_error(&xas);
++              if (charged)
++                      mem_cgroup_uncharge(page);
+               goto error;
+       }
diff --git a/queue-5.10/mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch b/queue-5.10/mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch
new file mode 100644 (file)
index 0000000..d4cd076
--- /dev/null
@@ -0,0 +1,136 @@
+From 7ffddd499ba6122b1a07828f023d1d67629aa017 Mon Sep 17 00:00:00 2001
+From: Muchun Song <songmuchun@bytedance.com>
+Date: Thu, 4 Feb 2021 18:32:06 -0800
+Subject: mm: hugetlb: fix a race between freeing and dissolving the page
+
+From: Muchun Song <songmuchun@bytedance.com>
+
+commit 7ffddd499ba6122b1a07828f023d1d67629aa017 upstream.
+
+There is a race condition between __free_huge_page()
+and dissolve_free_huge_page().
+
+  CPU0:                         CPU1:
+
+  // page_count(page) == 1
+  put_page(page)
+    __free_huge_page(page)
+                                dissolve_free_huge_page(page)
+                                  spin_lock(&hugetlb_lock)
+                                  // PageHuge(page) && !page_count(page)
+                                  update_and_free_page(page)
+                                  // page is freed to the buddy
+                                  spin_unlock(&hugetlb_lock)
+      spin_lock(&hugetlb_lock)
+      clear_page_huge_active(page)
+      enqueue_huge_page(page)
+      // It is wrong, the page is already freed
+      spin_unlock(&hugetlb_lock)
+
+The race window is between put_page() and dissolve_free_huge_page().
+
+We should make sure that the page is already on the free list when it is
+dissolved.
+
+As a result __free_huge_page would corrupt page(s) already in the buddy
+allocator.
+
+Link: https://lkml.kernel.org/r/20210115124942.46403-4-songmuchun@bytedance.com
+Fixes: c8721bbbdd36 ("mm: memory-hotplug: enable memory hotplug to handle hugepage")
+Signed-off-by: Muchun Song <songmuchun@bytedance.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c |   39 +++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 39 insertions(+)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -79,6 +79,21 @@ DEFINE_SPINLOCK(hugetlb_lock);
+ static int num_fault_mutexes;
+ struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
++static inline bool PageHugeFreed(struct page *head)
++{
++      return page_private(head + 4) == -1UL;
++}
++
++static inline void SetPageHugeFreed(struct page *head)
++{
++      set_page_private(head + 4, -1UL);
++}
++
++static inline void ClearPageHugeFreed(struct page *head)
++{
++      set_page_private(head + 4, 0);
++}
++
+ /* Forward declaration */
+ static int hugetlb_acct_memory(struct hstate *h, long delta);
+@@ -1028,6 +1043,7 @@ static void enqueue_huge_page(struct hst
+       list_move(&page->lru, &h->hugepage_freelists[nid]);
+       h->free_huge_pages++;
+       h->free_huge_pages_node[nid]++;
++      SetPageHugeFreed(page);
+ }
+ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
+@@ -1044,6 +1060,7 @@ static struct page *dequeue_huge_page_no
+               list_move(&page->lru, &h->hugepage_activelist);
+               set_page_refcounted(page);
++              ClearPageHugeFreed(page);
+               h->free_huge_pages--;
+               h->free_huge_pages_node[nid]--;
+               return page;
+@@ -1505,6 +1522,7 @@ static void prep_new_huge_page(struct hs
+       spin_lock(&hugetlb_lock);
+       h->nr_huge_pages++;
+       h->nr_huge_pages_node[nid]++;
++      ClearPageHugeFreed(page);
+       spin_unlock(&hugetlb_lock);
+ }
+@@ -1755,6 +1773,7 @@ int dissolve_free_huge_page(struct page
+ {
+       int rc = -EBUSY;
++retry:
+       /* Not to disrupt normal path by vainly holding hugetlb_lock */
+       if (!PageHuge(page))
+               return 0;
+@@ -1771,6 +1790,26 @@ int dissolve_free_huge_page(struct page
+               int nid = page_to_nid(head);
+               if (h->free_huge_pages - h->resv_huge_pages == 0)
+                       goto out;
++
++              /*
++               * We should make sure that the page is already on the free list
++               * when it is dissolved.
++               */
++              if (unlikely(!PageHugeFreed(head))) {
++                      spin_unlock(&hugetlb_lock);
++                      cond_resched();
++
++                      /*
++                       * Theoretically, we should return -EBUSY when we
++                       * encounter this race. In fact, we have a chance
++                       * to successfully dissolve the page if we do a
++                       * retry. Because the race window is quite small.
++                       * If we seize this opportunity, it is an optimization
++                       * for increasing the success rate of dissolving page.
++                       */
++                      goto retry;
++              }
++
+               /*
+                * Move PageHWPoison flag from head page to the raw error page,
+                * which makes any subpages rather than the error page reusable.
diff --git a/queue-5.10/mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch b/queue-5.10/mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch
new file mode 100644 (file)
index 0000000..065c618
--- /dev/null
@@ -0,0 +1,64 @@
+From 0eb2df2b5629794020f75e94655e1994af63f0d4 Mon Sep 17 00:00:00 2001
+From: Muchun Song <songmuchun@bytedance.com>
+Date: Thu, 4 Feb 2021 18:32:10 -0800
+Subject: mm: hugetlb: fix a race between isolating and freeing page
+
+From: Muchun Song <songmuchun@bytedance.com>
+
+commit 0eb2df2b5629794020f75e94655e1994af63f0d4 upstream.
+
+There is a race between isolate_huge_page() and __free_huge_page().
+
+  CPU0:                                     CPU1:
+
+  if (PageHuge(page))
+                                            put_page(page)
+                                              __free_huge_page(page)
+                                                  spin_lock(&hugetlb_lock)
+                                                  update_and_free_page(page)
+                                                    set_compound_page_dtor(page,
+                                                      NULL_COMPOUND_DTOR)
+                                                  spin_unlock(&hugetlb_lock)
+    isolate_huge_page(page)
+      // trigger BUG_ON
+      VM_BUG_ON_PAGE(!PageHead(page), page)
+      spin_lock(&hugetlb_lock)
+      page_huge_active(page)
+        // trigger BUG_ON
+        VM_BUG_ON_PAGE(!PageHuge(page), page)
+      spin_unlock(&hugetlb_lock)
+
+When we isolate a HugeTLB page on CPU0.  Meanwhile, we free it to the
+buddy allocator on CPU1.  Then, we can trigger a BUG_ON on CPU0, because
+it is already freed to the buddy allocator.
+
+Link: https://lkml.kernel.org/r/20210115124942.46403-5-songmuchun@bytedance.com
+Fixes: c8721bbbdd36 ("mm: memory-hotplug: enable memory hotplug to handle hugepage")
+Signed-off-by: Muchun Song <songmuchun@bytedance.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -5595,9 +5595,9 @@ bool isolate_huge_page(struct page *page
+ {
+       bool ret = true;
+-      VM_BUG_ON_PAGE(!PageHead(page), page);
+       spin_lock(&hugetlb_lock);
+-      if (!page_huge_active(page) || !get_page_unless_zero(page)) {
++      if (!PageHeadHuge(page) || !page_huge_active(page) ||
++          !get_page_unless_zero(page)) {
+               ret = false;
+               goto unlock;
+       }
diff --git a/queue-5.10/mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch b/queue-5.10/mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch
new file mode 100644 (file)
index 0000000..6d48442
--- /dev/null
@@ -0,0 +1,44 @@
+From ecbf4724e6061b4b01be20f6d797d64d462b2bc8 Mon Sep 17 00:00:00 2001
+From: Muchun Song <songmuchun@bytedance.com>
+Date: Thu, 4 Feb 2021 18:32:13 -0800
+Subject: mm: hugetlb: remove VM_BUG_ON_PAGE from page_huge_active
+
+From: Muchun Song <songmuchun@bytedance.com>
+
+commit ecbf4724e6061b4b01be20f6d797d64d462b2bc8 upstream.
+
+The page_huge_active() can be called from scan_movable_pages() which do
+not hold a reference count to the HugeTLB page.  So when we call
+page_huge_active() from scan_movable_pages(), the HugeTLB page can be
+freed parallel.  Then we will trigger a BUG_ON which is in the
+page_huge_active() when CONFIG_DEBUG_VM is enabled.  Just remove the
+VM_BUG_ON_PAGE.
+
+Link: https://lkml.kernel.org/r/20210115124942.46403-6-songmuchun@bytedance.com
+Fixes: 7e1f049efb86 ("mm: hugetlb: cleanup using paeg_huge_active()")
+Signed-off-by: Muchun Song <songmuchun@bytedance.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -1361,8 +1361,7 @@ struct hstate *size_to_hstate(unsigned l
+  */
+ bool page_huge_active(struct page *page)
+ {
+-      VM_BUG_ON_PAGE(!PageHuge(page), page);
+-      return PageHead(page) && PagePrivate(&page[1]);
++      return PageHeadHuge(page) && PagePrivate(&page[1]);
+ }
+ /* never called for tail page */
diff --git a/queue-5.10/mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch b/queue-5.10/mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch
new file mode 100644 (file)
index 0000000..fea336d
--- /dev/null
@@ -0,0 +1,71 @@
+From 585fc0d2871c9318c949fbf45b1f081edd489e96 Mon Sep 17 00:00:00 2001
+From: Muchun Song <songmuchun@bytedance.com>
+Date: Thu, 4 Feb 2021 18:32:03 -0800
+Subject: mm: hugetlbfs: fix cannot migrate the fallocated HugeTLB page
+
+From: Muchun Song <songmuchun@bytedance.com>
+
+commit 585fc0d2871c9318c949fbf45b1f081edd489e96 upstream.
+
+If a new hugetlb page is allocated during fallocate it will not be
+marked as active (set_page_huge_active) which will result in a later
+isolate_huge_page failure when the page migration code would like to
+move that page.  Such a failure would be unexpected and wrong.
+
+Only export set_page_huge_active, just leave clear_page_huge_active as
+static.  Because there are no external users.
+
+Link: https://lkml.kernel.org/r/20210115124942.46403-3-songmuchun@bytedance.com
+Fixes: 70c3547e36f5 (hugetlbfs: add hugetlbfs_fallocate())
+Signed-off-by: Muchun Song <songmuchun@bytedance.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/hugetlbfs/inode.c    |    3 ++-
+ include/linux/hugetlb.h |    2 ++
+ mm/hugetlb.c            |    2 +-
+ 3 files changed, 5 insertions(+), 2 deletions(-)
+
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -735,9 +735,10 @@ static long hugetlbfs_fallocate(struct f
+               mutex_unlock(&hugetlb_fault_mutex_table[hash]);
++              set_page_huge_active(page);
+               /*
+                * unlock_page because locked by add_to_page_cache()
+-               * page_put due to reference from alloc_huge_page()
++               * put_page() due to reference from alloc_huge_page()
+                */
+               unlock_page(page);
+               put_page(page);
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -770,6 +770,8 @@ static inline void huge_ptep_modify_prot
+ }
+ #endif
++void set_page_huge_active(struct page *page);
++
+ #else /* CONFIG_HUGETLB_PAGE */
+ struct hstate {};
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -1349,7 +1349,7 @@ bool page_huge_active(struct page *page)
+ }
+ /* never called for tail page */
+-static void set_page_huge_active(struct page *page)
++void set_page_huge_active(struct page *page)
+ {
+       VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
+       SetPagePrivate(&page[1]);
diff --git a/queue-5.10/mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch b/queue-5.10/mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch
new file mode 100644 (file)
index 0000000..83afaa5
--- /dev/null
@@ -0,0 +1,111 @@
+From 1c2f67308af4c102b4e1e6cd6f69819ae59408e0 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 4 Feb 2021 18:32:31 -0800
+Subject: mm: thp: fix MADV_REMOVE deadlock on shmem THP
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 1c2f67308af4c102b4e1e6cd6f69819ae59408e0 upstream.
+
+Sergey reported deadlock between kswapd correctly doing its usual
+lock_page(page) followed by down_read(page->mapping->i_mmap_rwsem), and
+madvise(MADV_REMOVE) on an madvise(MADV_HUGEPAGE) area doing
+down_write(page->mapping->i_mmap_rwsem) followed by lock_page(page).
+
+This happened when shmem_fallocate(punch hole)'s unmap_mapping_range()
+reaches zap_pmd_range()'s call to __split_huge_pmd().  The same deadlock
+could occur when partially truncating a mapped huge tmpfs file, or using
+fallocate(FALLOC_FL_PUNCH_HOLE) on it.
+
+__split_huge_pmd()'s page lock was added in 5.8, to make sure that any
+concurrent use of reuse_swap_page() (holding page lock) could not catch
+the anon THP's mapcounts and swapcounts while they were being split.
+
+Fortunately, reuse_swap_page() is never applied to a shmem or file THP
+(not even by khugepaged, which checks PageSwapCache before calling), and
+anonymous THPs are never created in shmem or file areas: so that
+__split_huge_pmd()'s page lock can only be necessary for anonymous THPs,
+on which there is no risk of deadlock with i_mmap_rwsem.
+
+Link: https://lkml.kernel.org/r/alpine.LSU.2.11.2101161409470.2022@eggly.anvils
+Fixes: c444eb564fb1 ("mm: thp: make the THP mapcount atomic against __split_huge_pmd_locked()")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Reported-by: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
+Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/huge_memory.c |   37 +++++++++++++++++++++++--------------
+ 1 file changed, 23 insertions(+), 14 deletions(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2188,7 +2188,7 @@ void __split_huge_pmd(struct vm_area_str
+ {
+       spinlock_t *ptl;
+       struct mmu_notifier_range range;
+-      bool was_locked = false;
++      bool do_unlock_page = false;
+       pmd_t _pmd;
+       mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
+@@ -2204,7 +2204,6 @@ void __split_huge_pmd(struct vm_area_str
+       VM_BUG_ON(freeze && !page);
+       if (page) {
+               VM_WARN_ON_ONCE(!PageLocked(page));
+-              was_locked = true;
+               if (page != pmd_page(*pmd))
+                       goto out;
+       }
+@@ -2213,19 +2212,29 @@ repeat:
+       if (pmd_trans_huge(*pmd)) {
+               if (!page) {
+                       page = pmd_page(*pmd);
+-                      if (unlikely(!trylock_page(page))) {
+-                              get_page(page);
+-                              _pmd = *pmd;
+-                              spin_unlock(ptl);
+-                              lock_page(page);
+-                              spin_lock(ptl);
+-                              if (unlikely(!pmd_same(*pmd, _pmd))) {
+-                                      unlock_page(page);
++                      /*
++                       * An anonymous page must be locked, to ensure that a
++                       * concurrent reuse_swap_page() sees stable mapcount;
++                       * but reuse_swap_page() is not used on shmem or file,
++                       * and page lock must not be taken when zap_pmd_range()
++                       * calls __split_huge_pmd() while i_mmap_lock is held.
++                       */
++                      if (PageAnon(page)) {
++                              if (unlikely(!trylock_page(page))) {
++                                      get_page(page);
++                                      _pmd = *pmd;
++                                      spin_unlock(ptl);
++                                      lock_page(page);
++                                      spin_lock(ptl);
++                                      if (unlikely(!pmd_same(*pmd, _pmd))) {
++                                              unlock_page(page);
++                                              put_page(page);
++                                              page = NULL;
++                                              goto repeat;
++                                      }
+                                       put_page(page);
+-                                      page = NULL;
+-                                      goto repeat;
+                               }
+-                              put_page(page);
++                              do_unlock_page = true;
+                       }
+               }
+               if (PageMlocked(page))
+@@ -2235,7 +2244,7 @@ repeat:
+       __split_huge_pmd_locked(vma, pmd, range.start, freeze);
+ out:
+       spin_unlock(ptl);
+-      if (!was_locked && page)
++      if (do_unlock_page)
+               unlock_page(page);
+       /*
+        * No need to double call mmu_notifier->invalidate_range() callback.
diff --git a/queue-5.10/mm-vmalloc-separate-put-pages-and-flush-vm-flags.patch b/queue-5.10/mm-vmalloc-separate-put-pages-and-flush-vm-flags.patch
new file mode 100644 (file)
index 0000000..12dc0ca
--- /dev/null
@@ -0,0 +1,57 @@
+From 4f6ec8602341e97b364e4e0d41a1ed08148f5e98 Mon Sep 17 00:00:00 2001
+From: Rick Edgecombe <rick.p.edgecombe@intel.com>
+Date: Thu, 4 Feb 2021 18:32:24 -0800
+Subject: mm/vmalloc: separate put pages and flush VM flags
+
+From: Rick Edgecombe <rick.p.edgecombe@intel.com>
+
+commit 4f6ec8602341e97b364e4e0d41a1ed08148f5e98 upstream.
+
+When VM_MAP_PUT_PAGES was added, it was defined with the same value as
+VM_FLUSH_RESET_PERMS.  This doesn't seem like it will cause any big
+functional problems other than some excess flushing for VM_MAP_PUT_PAGES
+allocations.
+
+Redefine VM_MAP_PUT_PAGES to have its own value.  Also, rearrange things
+so flags are less likely to be missed in the future.
+
+Link: https://lkml.kernel.org/r/20210122233706.9304-1-rick.p.edgecombe@intel.com
+Fixes: b944afc9d64d ("mm: add a VM_MAP_PUT_PAGES flag for vmap")
+Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
+Suggested-by: Matthew Wilcox <willy@infradead.org>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Daniel Axtens <dja@axtens.net>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/vmalloc.h |    9 ++-------
+ 1 file changed, 2 insertions(+), 7 deletions(-)
+
+--- a/include/linux/vmalloc.h
++++ b/include/linux/vmalloc.h
+@@ -24,7 +24,8 @@ struct notifier_block;               /* in notifier.h
+ #define VM_UNINITIALIZED      0x00000020      /* vm_struct is not fully initialized */
+ #define VM_NO_GUARD           0x00000040      /* don't add guard page */
+ #define VM_KASAN              0x00000080      /* has allocated kasan shadow memory */
+-#define VM_MAP_PUT_PAGES      0x00000100      /* put pages and free array in vfree */
++#define VM_FLUSH_RESET_PERMS  0x00000100      /* reset direct map and flush TLB on unmap, can't be freed in atomic context */
++#define VM_MAP_PUT_PAGES      0x00000200      /* put pages and free array in vfree */
+ /*
+  * VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC.
+@@ -37,12 +38,6 @@ struct notifier_block;              /* in notifier.h
+  * determine which allocations need the module shadow freed.
+  */
+-/*
+- * Memory with VM_FLUSH_RESET_PERMS cannot be freed in an interrupt or with
+- * vfree_atomic().
+- */
+-#define VM_FLUSH_RESET_PERMS  0x00000100      /* Reset direct map and flush TLB on unmap */
+-
+ /* bits [20..32] reserved for arch specific ioremap internals */
+ /*
index 14a2f0c74240282d34460c953cf02a81605c12bb..28912c1a2401d8966568e012c3ba8b7663a49b34 100644 (file)
@@ -85,3 +85,21 @@ drm-i915-power-up-combo-phy-lanes-for-for-hdmi-as-well.patch
 drm-amd-display-revert-fix-edid-parsing-after-resume-from-suspend.patch
 io_uring-don-t-modify-identity-s-files-uncess-identity-is-cowed.patch
 nvme-pci-avoid-the-deepest-sleep-state-on-kingston-a2000-ssds.patch
+kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch
+kvm-x86-mmu-fix-tdp-mmu-zap-collapsible-sptes.patch
+kvm-x86-allow-guests-to-see-msr_ia32_tsx_ctrl-even-if-tsx-off.patch
+kvm-x86-fix-cpuid-entries-returned-by-kvm_get_cpuid2-ioctl.patch
+kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch
+kvm-x86-set-so-called-reserved-cr3-bits-in-lm-mask-at-vcpu-reset.patch
+dts-arm-gta04-remove-legacy-spi-cs-high-to-make-display-work-again.patch
+arm-dts-gta04-spi-panel-chip-select-is-active-low.patch
+arm-footbridge-fix-dc21285-pci-configuration-accessors.patch
+arm-9043-1-tegra-fix-misplaced-tegra_uart_config-in-decompressor.patch
+mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch
+mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch
+mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch
+mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch
+mm-compaction-move-high_pfn-to-the-for-loop-scope.patch
+mm-vmalloc-separate-put-pages-and-flush-vm-flags.patch
+mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch
+mm-filemap-add-missing-mem_cgroup_uncharge-to-__add_to_page_cache_locked.patch