]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 8 Feb 2021 11:34:56 +0000 (12:34 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 8 Feb 2021 11:34:56 +0000 (12:34 +0100)
added patches:
arm-footbridge-fix-dc21285-pci-configuration-accessors.patch
kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch
kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch
mm-compaction-move-high_pfn-to-the-for-loop-scope.patch
mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch
mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch
mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch
mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch
mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch

queue-5.4/arm-footbridge-fix-dc21285-pci-configuration-accessors.patch [new file with mode: 0644]
queue-5.4/kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch [new file with mode: 0644]
queue-5.4/kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch [new file with mode: 0644]
queue-5.4/mm-compaction-move-high_pfn-to-the-for-loop-scope.patch [new file with mode: 0644]
queue-5.4/mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch [new file with mode: 0644]
queue-5.4/mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch [new file with mode: 0644]
queue-5.4/mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch [new file with mode: 0644]
queue-5.4/mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch [new file with mode: 0644]
queue-5.4/mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch [new file with mode: 0644]
queue-5.4/series

diff --git a/queue-5.4/arm-footbridge-fix-dc21285-pci-configuration-accessors.patch b/queue-5.4/arm-footbridge-fix-dc21285-pci-configuration-accessors.patch
new file mode 100644 (file)
index 0000000..8a604d2
--- /dev/null
@@ -0,0 +1,62 @@
+From 39d3454c3513840eb123b3913fda6903e45ce671 Mon Sep 17 00:00:00 2001
+From: Russell King <rmk+kernel@armlinux.org.uk>
+Date: Sun, 18 Oct 2020 09:39:21 +0100
+Subject: ARM: footbridge: fix dc21285 PCI configuration accessors
+
+From: Russell King <rmk+kernel@armlinux.org.uk>
+
+commit 39d3454c3513840eb123b3913fda6903e45ce671 upstream.
+
+Building with gcc 4.9.2 reveals a latent bug in the PCI accessors
+for Footbridge platforms, which causes a fatal alignment fault
+while accessing IO memory. Fix this by making the assembly volatile.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/mach-footbridge/dc21285.c |   12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/arch/arm/mach-footbridge/dc21285.c
++++ b/arch/arm/mach-footbridge/dc21285.c
+@@ -66,15 +66,15 @@ dc21285_read_config(struct pci_bus *bus,
+       if (addr)
+               switch (size) {
+               case 1:
+-                      asm("ldrb       %0, [%1, %2]"
++                      asm volatile("ldrb      %0, [%1, %2]"
+                               : "=r" (v) : "r" (addr), "r" (where) : "cc");
+                       break;
+               case 2:
+-                      asm("ldrh       %0, [%1, %2]"
++                      asm volatile("ldrh      %0, [%1, %2]"
+                               : "=r" (v) : "r" (addr), "r" (where) : "cc");
+                       break;
+               case 4:
+-                      asm("ldr        %0, [%1, %2]"
++                      asm volatile("ldr       %0, [%1, %2]"
+                               : "=r" (v) : "r" (addr), "r" (where) : "cc");
+                       break;
+               }
+@@ -100,17 +100,17 @@ dc21285_write_config(struct pci_bus *bus
+       if (addr)
+               switch (size) {
+               case 1:
+-                      asm("strb       %0, [%1, %2]"
++                      asm volatile("strb      %0, [%1, %2]"
+                               : : "r" (value), "r" (addr), "r" (where)
+                               : "cc");
+                       break;
+               case 2:
+-                      asm("strh       %0, [%1, %2]"
++                      asm volatile("strh      %0, [%1, %2]"
+                               : : "r" (value), "r" (addr), "r" (where)
+                               : "cc");
+                       break;
+               case 4:
+-                      asm("str        %0, [%1, %2]"
++                      asm volatile("str       %0, [%1, %2]"
+                               : : "r" (value), "r" (addr), "r" (where)
+                               : "cc");
+                       break;
diff --git a/queue-5.4/kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch b/queue-5.4/kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch
new file mode 100644 (file)
index 0000000..fe84f88
--- /dev/null
@@ -0,0 +1,59 @@
+From ccd85d90ce092bdb047a7f6580f3955393833b22 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 2 Feb 2021 13:20:17 -0800
+Subject: KVM: SVM: Treat SVM as unsupported when running as an SEV guest
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit ccd85d90ce092bdb047a7f6580f3955393833b22 upstream.
+
+Don't let KVM load when running as an SEV guest, regardless of what
+CPUID says.  Memory is encrypted with a key that is not accessible to
+the host (L0), thus it's impossible for L0 to emulate SVM, e.g. it'll
+see garbage when reading the VMCB.
+
+Technically, KVM could decrypt all memory that needs to be accessible to
+the L0 and use shadow paging so that L0 does not need to shadow NPT, but
+exposing such information to L0 largely defeats the purpose of running as
+an SEV guest.  This can always be revisited if someone comes up with a
+use case for running VMs inside SEV guests.
+
+Note, VMLOAD, VMRUN, etc... will also #GP on GPAs with C-bit set, i.e. KVM
+is doomed even if the SEV guest is debuggable and the hypervisor is willing
+to decrypt the VMCB.  This may or may not be fixed on CPUs that have the
+SVME_ADDR_CHK fix.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210202212017.2486595-1-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm.c        |    5 +++++
+ arch/x86/mm/mem_encrypt.c |    1 +
+ 2 files changed, 6 insertions(+)
+
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -889,6 +889,11 @@ static int has_svm(void)
+               return 0;
+       }
++      if (sev_active()) {
++              pr_info("KVM is unsupported when running as an SEV guest\n");
++              return 0;
++      }
++
+       return 1;
+ }
+--- a/arch/x86/mm/mem_encrypt.c
++++ b/arch/x86/mm/mem_encrypt.c
+@@ -375,6 +375,7 @@ bool force_dma_unencrypted(struct device
+       return false;
+ }
++EXPORT_SYMBOL_GPL(sev_active);
+ /* Architecture __weak replacement functions */
+ void __init mem_encrypt_free_decrypted_mem(void)
diff --git a/queue-5.4/kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch b/queue-5.4/kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch
new file mode 100644 (file)
index 0000000..efec510
--- /dev/null
@@ -0,0 +1,45 @@
+From 943dea8af21bd896e0d6c30ea221203fb3cd3265 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 2 Feb 2021 08:55:46 -0800
+Subject: KVM: x86: Update emulator context mode if SYSENTER xfers to 64-bit mode
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 943dea8af21bd896e0d6c30ea221203fb3cd3265 upstream.
+
+Set the emulator context to PROT64 if SYSENTER transitions from 32-bit
+userspace (compat mode) to a 64-bit kernel, otherwise the RIP update at
+the end of x86_emulate_insn() will incorrectly truncate the new RIP.
+
+Note, this bug is mostly limited to running an Intel virtual CPU model on
+an AMD physical CPU, as other combinations of virtual and physical CPUs
+do not trigger full emulation.  On Intel CPUs, SYSENTER in compatibility
+mode is legal, and unconditionally transitions to 64-bit mode.  On AMD
+CPUs, SYSENTER is illegal in compatibility mode and #UDs.  If the vCPU is
+AMD, KVM injects a #UD on SYSENTER in compat mode.  If the pCPU is Intel,
+SYSENTER will execute natively and not trigger #UD->VM-Exit (ignoring
+guest TLB shenanigans).
+
+Fixes: fede8076aab4 ("KVM: x86: handle wrap around 32-bit address space")
+Cc: stable@vger.kernel.org
+Signed-off-by: Jonny Barker <jonny@jonnybarker.com>
+[sean: wrote changelog]
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210202165546.2390296-1-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/emulate.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -2890,6 +2890,8 @@ static int em_sysenter(struct x86_emulat
+       ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
+       *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
+                                                             (u32)msr_data;
++      if (efer & EFER_LMA)
++              ctxt->mode = X86EMUL_MODE_PROT64;
+       return X86EMUL_CONTINUE;
+ }
diff --git a/queue-5.4/mm-compaction-move-high_pfn-to-the-for-loop-scope.patch b/queue-5.4/mm-compaction-move-high_pfn-to-the-for-loop-scope.patch
new file mode 100644 (file)
index 0000000..8aa6370
--- /dev/null
@@ -0,0 +1,85 @@
+From 74e21484e40bb8ce0f9828bbfe1c9fc9b04249c6 Mon Sep 17 00:00:00 2001
+From: Rokudo Yan <wu-yan@tcl.com>
+Date: Thu, 4 Feb 2021 18:32:20 -0800
+Subject: mm, compaction: move high_pfn to the for loop scope
+
+From: Rokudo Yan <wu-yan@tcl.com>
+
+commit 74e21484e40bb8ce0f9828bbfe1c9fc9b04249c6 upstream.
+
+In fast_isolate_freepages, high_pfn will be used if a prefered one (ie
+PFN >= low_fn) not found.
+
+But the high_pfn is not reset before searching an free area, so when it
+was used as freepage, it may from another free area searched before.  As
+a result move_freelist_head(freelist, freepage) will have unexpected
+behavior (eg corrupt the MOVABLE freelist)
+
+  Unable to handle kernel paging request at virtual address dead000000000200
+  Mem abort info:
+    ESR = 0x96000044
+    Exception class = DABT (current EL), IL = 32 bits
+    SET = 0, FnV = 0
+    EA = 0, S1PTW = 0
+  Data abort info:
+    ISV = 0, ISS = 0x00000044
+    CM = 0, WnR = 1
+  [dead000000000200] address between user and kernel address ranges
+
+  -000|list_cut_before(inline)
+  -000|move_freelist_head(inline)
+  -000|fast_isolate_freepages(inline)
+  -000|isolate_freepages(inline)
+  -000|compaction_alloc(?, ?)
+  -001|unmap_and_move(inline)
+  -001|migrate_pages([NSD:0xFFFFFF80088CBBD0] from = 0xFFFFFF80088CBD88, [NSD:0xFFFFFF80088CBBC8] get_new_p
+  -002|__read_once_size(inline)
+  -002|static_key_count(inline)
+  -002|static_key_false(inline)
+  -002|trace_mm_compaction_migratepages(inline)
+  -002|compact_zone(?, [NSD:0xFFFFFF80088CBCB0] capc = 0x0)
+  -003|kcompactd_do_work(inline)
+  -003|kcompactd([X19] p = 0xFFFFFF93227FBC40)
+  -004|kthread([X20] _create = 0xFFFFFFE1AFB26380)
+  -005|ret_from_fork(asm)
+
+The issue was reported on an smart phone product with 6GB ram and 3GB
+zram as swap device.
+
+This patch fixes the issue by reset high_pfn before searching each free
+area, which ensure freepage and freelist match when call
+move_freelist_head in fast_isolate_freepages().
+
+Link: http://lkml.kernel.org/r/20190118175136.31341-12-mgorman@techsingularity.net
+Link: https://lkml.kernel.org/r/20210112094720.1238444-1-wu-yan@tcl.com
+Fixes: 5a811889de10f1eb ("mm, compaction: use free lists to quickly locate a migration target")
+Signed-off-by: Rokudo Yan <wu-yan@tcl.com>
+Acked-by: Mel Gorman <mgorman@techsingularity.net>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/compaction.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/mm/compaction.c
++++ b/mm/compaction.c
+@@ -1276,7 +1276,7 @@ fast_isolate_freepages(struct compact_co
+ {
+       unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1);
+       unsigned int nr_scanned = 0;
+-      unsigned long low_pfn, min_pfn, high_pfn = 0, highest = 0;
++      unsigned long low_pfn, min_pfn, highest = 0;
+       unsigned long nr_isolated = 0;
+       unsigned long distance;
+       struct page *page = NULL;
+@@ -1321,6 +1321,7 @@ fast_isolate_freepages(struct compact_co
+               struct page *freepage;
+               unsigned long flags;
+               unsigned int order_scanned = 0;
++              unsigned long high_pfn = 0;
+               if (!area->nr_free)
+                       continue;
diff --git a/queue-5.4/mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch b/queue-5.4/mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch
new file mode 100644 (file)
index 0000000..cb3f75a
--- /dev/null
@@ -0,0 +1,136 @@
+From 7ffddd499ba6122b1a07828f023d1d67629aa017 Mon Sep 17 00:00:00 2001
+From: Muchun Song <songmuchun@bytedance.com>
+Date: Thu, 4 Feb 2021 18:32:06 -0800
+Subject: mm: hugetlb: fix a race between freeing and dissolving the page
+
+From: Muchun Song <songmuchun@bytedance.com>
+
+commit 7ffddd499ba6122b1a07828f023d1d67629aa017 upstream.
+
+There is a race condition between __free_huge_page()
+and dissolve_free_huge_page().
+
+  CPU0:                         CPU1:
+
+  // page_count(page) == 1
+  put_page(page)
+    __free_huge_page(page)
+                                dissolve_free_huge_page(page)
+                                  spin_lock(&hugetlb_lock)
+                                  // PageHuge(page) && !page_count(page)
+                                  update_and_free_page(page)
+                                  // page is freed to the buddy
+                                  spin_unlock(&hugetlb_lock)
+      spin_lock(&hugetlb_lock)
+      clear_page_huge_active(page)
+      enqueue_huge_page(page)
+      // It is wrong, the page is already freed
+      spin_unlock(&hugetlb_lock)
+
+The race window is between put_page() and dissolve_free_huge_page().
+
+We should make sure that the page is already on the free list when it is
+dissolved.
+
+As a result __free_huge_page would corrupt page(s) already in the buddy
+allocator.
+
+Link: https://lkml.kernel.org/r/20210115124942.46403-4-songmuchun@bytedance.com
+Fixes: c8721bbbdd36 ("mm: memory-hotplug: enable memory hotplug to handle hugepage")
+Signed-off-by: Muchun Song <songmuchun@bytedance.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c |   39 +++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 39 insertions(+)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -71,6 +71,21 @@ DEFINE_SPINLOCK(hugetlb_lock);
+ static int num_fault_mutexes;
+ struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
++static inline bool PageHugeFreed(struct page *head)
++{
++      return page_private(head + 4) == -1UL;
++}
++
++static inline void SetPageHugeFreed(struct page *head)
++{
++      set_page_private(head + 4, -1UL);
++}
++
++static inline void ClearPageHugeFreed(struct page *head)
++{
++      set_page_private(head + 4, 0);
++}
++
+ /* Forward declaration */
+ static int hugetlb_acct_memory(struct hstate *h, long delta);
+@@ -869,6 +884,7 @@ static void enqueue_huge_page(struct hst
+       list_move(&page->lru, &h->hugepage_freelists[nid]);
+       h->free_huge_pages++;
+       h->free_huge_pages_node[nid]++;
++      SetPageHugeFreed(page);
+ }
+ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
+@@ -886,6 +902,7 @@ static struct page *dequeue_huge_page_no
+               return NULL;
+       list_move(&page->lru, &h->hugepage_activelist);
+       set_page_refcounted(page);
++      ClearPageHugeFreed(page);
+       h->free_huge_pages--;
+       h->free_huge_pages_node[nid]--;
+       return page;
+@@ -1375,6 +1392,7 @@ static void prep_new_huge_page(struct hs
+       set_hugetlb_cgroup(page, NULL);
+       h->nr_huge_pages++;
+       h->nr_huge_pages_node[nid]++;
++      ClearPageHugeFreed(page);
+       spin_unlock(&hugetlb_lock);
+ }
+@@ -1602,6 +1620,7 @@ int dissolve_free_huge_page(struct page
+ {
+       int rc = -EBUSY;
++retry:
+       /* Not to disrupt normal path by vainly holding hugetlb_lock */
+       if (!PageHuge(page))
+               return 0;
+@@ -1618,6 +1637,26 @@ int dissolve_free_huge_page(struct page
+               int nid = page_to_nid(head);
+               if (h->free_huge_pages - h->resv_huge_pages == 0)
+                       goto out;
++
++              /*
++               * We should make sure that the page is already on the free list
++               * when it is dissolved.
++               */
++              if (unlikely(!PageHugeFreed(head))) {
++                      spin_unlock(&hugetlb_lock);
++                      cond_resched();
++
++                      /*
++                       * Theoretically, we should return -EBUSY when we
++                       * encounter this race. In fact, we have a chance
++                       * to successfully dissolve the page if we do a
++                       * retry. Because the race window is quite small.
++                       * If we seize this opportunity, it is an optimization
++                       * for increasing the success rate of dissolving page.
++                       */
++                      goto retry;
++              }
++
+               /*
+                * Move PageHWPoison flag from head page to the raw error page,
+                * which makes any subpages rather than the error page reusable.
diff --git a/queue-5.4/mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch b/queue-5.4/mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch
new file mode 100644 (file)
index 0000000..4ed9065
--- /dev/null
@@ -0,0 +1,64 @@
+From 0eb2df2b5629794020f75e94655e1994af63f0d4 Mon Sep 17 00:00:00 2001
+From: Muchun Song <songmuchun@bytedance.com>
+Date: Thu, 4 Feb 2021 18:32:10 -0800
+Subject: mm: hugetlb: fix a race between isolating and freeing page
+
+From: Muchun Song <songmuchun@bytedance.com>
+
+commit 0eb2df2b5629794020f75e94655e1994af63f0d4 upstream.
+
+There is a race between isolate_huge_page() and __free_huge_page().
+
+  CPU0:                                     CPU1:
+
+  if (PageHuge(page))
+                                            put_page(page)
+                                              __free_huge_page(page)
+                                                  spin_lock(&hugetlb_lock)
+                                                  update_and_free_page(page)
+                                                    set_compound_page_dtor(page,
+                                                      NULL_COMPOUND_DTOR)
+                                                  spin_unlock(&hugetlb_lock)
+    isolate_huge_page(page)
+      // trigger BUG_ON
+      VM_BUG_ON_PAGE(!PageHead(page), page)
+      spin_lock(&hugetlb_lock)
+      page_huge_active(page)
+        // trigger BUG_ON
+        VM_BUG_ON_PAGE(!PageHuge(page), page)
+      spin_unlock(&hugetlb_lock)
+
+When we isolate a HugeTLB page on CPU0.  Meanwhile, we free it to the
+buddy allocator on CPU1.  Then, we can trigger a BUG_ON on CPU0, because
+it is already freed to the buddy allocator.
+
+Link: https://lkml.kernel.org/r/20210115124942.46403-5-songmuchun@bytedance.com
+Fixes: c8721bbbdd36 ("mm: memory-hotplug: enable memory hotplug to handle hugepage")
+Signed-off-by: Muchun Song <songmuchun@bytedance.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -5175,9 +5175,9 @@ bool isolate_huge_page(struct page *page
+ {
+       bool ret = true;
+-      VM_BUG_ON_PAGE(!PageHead(page), page);
+       spin_lock(&hugetlb_lock);
+-      if (!page_huge_active(page) || !get_page_unless_zero(page)) {
++      if (!PageHeadHuge(page) || !page_huge_active(page) ||
++          !get_page_unless_zero(page)) {
+               ret = false;
+               goto unlock;
+       }
diff --git a/queue-5.4/mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch b/queue-5.4/mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch
new file mode 100644 (file)
index 0000000..2f5f7e4
--- /dev/null
@@ -0,0 +1,44 @@
+From ecbf4724e6061b4b01be20f6d797d64d462b2bc8 Mon Sep 17 00:00:00 2001
+From: Muchun Song <songmuchun@bytedance.com>
+Date: Thu, 4 Feb 2021 18:32:13 -0800
+Subject: mm: hugetlb: remove VM_BUG_ON_PAGE from page_huge_active
+
+From: Muchun Song <songmuchun@bytedance.com>
+
+commit ecbf4724e6061b4b01be20f6d797d64d462b2bc8 upstream.
+
+The page_huge_active() can be called from scan_movable_pages() which do
+not hold a reference count to the HugeTLB page.  So when we call
+page_huge_active() from scan_movable_pages(), the HugeTLB page can be
+freed parallel.  Then we will trigger a BUG_ON which is in the
+page_huge_active() when CONFIG_DEBUG_VM is enabled.  Just remove the
+VM_BUG_ON_PAGE.
+
+Link: https://lkml.kernel.org/r/20210115124942.46403-6-songmuchun@bytedance.com
+Fixes: 7e1f049efb86 ("mm: hugetlb: cleanup using paeg_huge_active()")
+Signed-off-by: Muchun Song <songmuchun@bytedance.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -1234,8 +1234,7 @@ struct hstate *size_to_hstate(unsigned l
+  */
+ bool page_huge_active(struct page *page)
+ {
+-      VM_BUG_ON_PAGE(!PageHuge(page), page);
+-      return PageHead(page) && PagePrivate(&page[1]);
++      return PageHeadHuge(page) && PagePrivate(&page[1]);
+ }
+ /* never called for tail page */
diff --git a/queue-5.4/mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch b/queue-5.4/mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch
new file mode 100644 (file)
index 0000000..d226a8d
--- /dev/null
@@ -0,0 +1,71 @@
+From 585fc0d2871c9318c949fbf45b1f081edd489e96 Mon Sep 17 00:00:00 2001
+From: Muchun Song <songmuchun@bytedance.com>
+Date: Thu, 4 Feb 2021 18:32:03 -0800
+Subject: mm: hugetlbfs: fix cannot migrate the fallocated HugeTLB page
+
+From: Muchun Song <songmuchun@bytedance.com>
+
+commit 585fc0d2871c9318c949fbf45b1f081edd489e96 upstream.
+
+If a new hugetlb page is allocated during fallocate it will not be
+marked as active (set_page_huge_active) which will result in a later
+isolate_huge_page failure when the page migration code would like to
+move that page.  Such a failure would be unexpected and wrong.
+
+Only export set_page_huge_active, just leave clear_page_huge_active as
+static.  Because there are no external users.
+
+Link: https://lkml.kernel.org/r/20210115124942.46403-3-songmuchun@bytedance.com
+Fixes: 70c3547e36f5 (hugetlbfs: add hugetlbfs_fallocate())
+Signed-off-by: Muchun Song <songmuchun@bytedance.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Yang Shi <shy828301@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/hugetlbfs/inode.c    |    3 ++-
+ include/linux/hugetlb.h |    2 ++
+ mm/hugetlb.c            |    2 +-
+ 3 files changed, 5 insertions(+), 2 deletions(-)
+
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -675,9 +675,10 @@ static long hugetlbfs_fallocate(struct f
+               mutex_unlock(&hugetlb_fault_mutex_table[hash]);
++              set_page_huge_active(page);
+               /*
+                * unlock_page because locked by add_to_page_cache()
+-               * page_put due to reference from alloc_huge_page()
++               * put_page() due to reference from alloc_huge_page()
+                */
+               unlock_page(page);
+               put_page(page);
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -590,6 +590,8 @@ static inline void huge_ptep_modify_prot
+ }
+ #endif
++void set_page_huge_active(struct page *page);
++
+ #else /* CONFIG_HUGETLB_PAGE */
+ struct hstate {};
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -1222,7 +1222,7 @@ bool page_huge_active(struct page *page)
+ }
+ /* never called for tail page */
+-static void set_page_huge_active(struct page *page)
++void set_page_huge_active(struct page *page)
+ {
+       VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
+       SetPagePrivate(&page[1]);
diff --git a/queue-5.4/mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch b/queue-5.4/mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch
new file mode 100644 (file)
index 0000000..f538d01
--- /dev/null
@@ -0,0 +1,111 @@
+From 1c2f67308af4c102b4e1e6cd6f69819ae59408e0 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Thu, 4 Feb 2021 18:32:31 -0800
+Subject: mm: thp: fix MADV_REMOVE deadlock on shmem THP
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 1c2f67308af4c102b4e1e6cd6f69819ae59408e0 upstream.
+
+Sergey reported deadlock between kswapd correctly doing its usual
+lock_page(page) followed by down_read(page->mapping->i_mmap_rwsem), and
+madvise(MADV_REMOVE) on an madvise(MADV_HUGEPAGE) area doing
+down_write(page->mapping->i_mmap_rwsem) followed by lock_page(page).
+
+This happened when shmem_fallocate(punch hole)'s unmap_mapping_range()
+reaches zap_pmd_range()'s call to __split_huge_pmd().  The same deadlock
+could occur when partially truncating a mapped huge tmpfs file, or using
+fallocate(FALLOC_FL_PUNCH_HOLE) on it.
+
+__split_huge_pmd()'s page lock was added in 5.8, to make sure that any
+concurrent use of reuse_swap_page() (holding page lock) could not catch
+the anon THP's mapcounts and swapcounts while they were being split.
+
+Fortunately, reuse_swap_page() is never applied to a shmem or file THP
+(not even by khugepaged, which checks PageSwapCache before calling), and
+anonymous THPs are never created in shmem or file areas: so that
+__split_huge_pmd()'s page lock can only be necessary for anonymous THPs,
+on which there is no risk of deadlock with i_mmap_rwsem.
+
+Link: https://lkml.kernel.org/r/alpine.LSU.2.11.2101161409470.2022@eggly.anvils
+Fixes: c444eb564fb1 ("mm: thp: make the THP mapcount atomic against __split_huge_pmd_locked()")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Reported-by: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
+Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/huge_memory.c |   37 +++++++++++++++++++++++--------------
+ 1 file changed, 23 insertions(+), 14 deletions(-)
+
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -2306,7 +2306,7 @@ void __split_huge_pmd(struct vm_area_str
+ {
+       spinlock_t *ptl;
+       struct mmu_notifier_range range;
+-      bool was_locked = false;
++      bool do_unlock_page = false;
+       pmd_t _pmd;
+       mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
+@@ -2322,7 +2322,6 @@ void __split_huge_pmd(struct vm_area_str
+       VM_BUG_ON(freeze && !page);
+       if (page) {
+               VM_WARN_ON_ONCE(!PageLocked(page));
+-              was_locked = true;
+               if (page != pmd_page(*pmd))
+                       goto out;
+       }
+@@ -2331,19 +2330,29 @@ repeat:
+       if (pmd_trans_huge(*pmd)) {
+               if (!page) {
+                       page = pmd_page(*pmd);
+-                      if (unlikely(!trylock_page(page))) {
+-                              get_page(page);
+-                              _pmd = *pmd;
+-                              spin_unlock(ptl);
+-                              lock_page(page);
+-                              spin_lock(ptl);
+-                              if (unlikely(!pmd_same(*pmd, _pmd))) {
+-                                      unlock_page(page);
++                      /*
++                       * An anonymous page must be locked, to ensure that a
++                       * concurrent reuse_swap_page() sees stable mapcount;
++                       * but reuse_swap_page() is not used on shmem or file,
++                       * and page lock must not be taken when zap_pmd_range()
++                       * calls __split_huge_pmd() while i_mmap_lock is held.
++                       */
++                      if (PageAnon(page)) {
++                              if (unlikely(!trylock_page(page))) {
++                                      get_page(page);
++                                      _pmd = *pmd;
++                                      spin_unlock(ptl);
++                                      lock_page(page);
++                                      spin_lock(ptl);
++                                      if (unlikely(!pmd_same(*pmd, _pmd))) {
++                                              unlock_page(page);
++                                              put_page(page);
++                                              page = NULL;
++                                              goto repeat;
++                                      }
+                                       put_page(page);
+-                                      page = NULL;
+-                                      goto repeat;
+                               }
+-                              put_page(page);
++                              do_unlock_page = true;
+                       }
+               }
+               if (PageMlocked(page))
+@@ -2353,7 +2362,7 @@ repeat:
+       __split_huge_pmd_locked(vma, pmd, range.start, freeze);
+ out:
+       spin_unlock(ptl);
+-      if (!was_locked && page)
++      if (do_unlock_page)
+               unlock_page(page);
+       /*
+        * No need to double call mmu_notifier->invalidate_range() callback.
index b88d9bfea664c3fee6a532fda523f10e1b1cf590..54cd93d38df91fb41b9704ba0ae3610f7231b068 100644 (file)
@@ -42,3 +42,12 @@ smb3-fix-crediting-for-compounding-when-only-one-request-in-flight.patch
 mmc-core-limit-retries-when-analyse-of-sdio-tuples-fails.patch
 drm-amd-display-revert-fix-edid-parsing-after-resume-from-suspend.patch
 nvme-pci-avoid-the-deepest-sleep-state-on-kingston-a2000-ssds.patch
+kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch
+kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch
+arm-footbridge-fix-dc21285-pci-configuration-accessors.patch
+mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch
+mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch
+mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch
+mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch
+mm-compaction-move-high_pfn-to-the-for-loop-scope.patch
+mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch