From ccc8860b0600e17d50570dac0470ab2da8df86f7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 8 Feb 2021 12:34:56 +0100 Subject: [PATCH] 5.4-stable patches added patches: arm-footbridge-fix-dc21285-pci-configuration-accessors.patch kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch mm-compaction-move-high_pfn-to-the-for-loop-scope.patch mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch --- ...-dc21285-pci-configuration-accessors.patch | 62 ++++++++ ...pported-when-running-as-an-sev-guest.patch | 59 ++++++++ ...ode-if-sysenter-xfers-to-64-bit-mode.patch | 45 ++++++ ...-move-high_pfn-to-the-for-loop-scope.patch | 85 +++++++++++ ...ween-freeing-and-dissolving-the-page.patch | 136 ++++++++++++++++++ ...e-between-isolating-and-freeing-page.patch | 64 +++++++++ ...vm_bug_on_page-from-page_huge_active.patch | 44 ++++++ ...-migrate-the-fallocated-hugetlb-page.patch | 71 +++++++++ ...ix-madv_remove-deadlock-on-shmem-thp.patch | 111 ++++++++++++++ queue-5.4/series | 9 ++ 10 files changed, 686 insertions(+) create mode 100644 queue-5.4/arm-footbridge-fix-dc21285-pci-configuration-accessors.patch create mode 100644 queue-5.4/kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch create mode 100644 queue-5.4/kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch create mode 100644 queue-5.4/mm-compaction-move-high_pfn-to-the-for-loop-scope.patch create mode 100644 queue-5.4/mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch create mode 100644 queue-5.4/mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch create mode 100644 queue-5.4/mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch create mode 100644 queue-5.4/mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch create mode 100644 queue-5.4/mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch diff --git a/queue-5.4/arm-footbridge-fix-dc21285-pci-configuration-accessors.patch b/queue-5.4/arm-footbridge-fix-dc21285-pci-configuration-accessors.patch new file mode 100644 index 00000000000..8a604d27100 --- /dev/null +++ b/queue-5.4/arm-footbridge-fix-dc21285-pci-configuration-accessors.patch @@ -0,0 +1,62 @@ +From 39d3454c3513840eb123b3913fda6903e45ce671 Mon Sep 17 00:00:00 2001 +From: Russell King +Date: Sun, 18 Oct 2020 09:39:21 +0100 +Subject: ARM: footbridge: fix dc21285 PCI configuration accessors + +From: Russell King + +commit 39d3454c3513840eb123b3913fda6903e45ce671 upstream. + +Building with gcc 4.9.2 reveals a latent bug in the PCI accessors +for Footbridge platforms, which causes a fatal alignment fault +while accessing IO memory. Fix this by making the assembly volatile. + +Cc: stable@vger.kernel.org +Signed-off-by: Russell King +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/mach-footbridge/dc21285.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/arch/arm/mach-footbridge/dc21285.c ++++ b/arch/arm/mach-footbridge/dc21285.c +@@ -66,15 +66,15 @@ dc21285_read_config(struct pci_bus *bus, + if (addr) + switch (size) { + case 1: +- asm("ldrb %0, [%1, %2]" ++ asm volatile("ldrb %0, [%1, %2]" + : "=r" (v) : "r" (addr), "r" (where) : "cc"); + break; + case 2: +- asm("ldrh %0, [%1, %2]" ++ asm volatile("ldrh %0, [%1, %2]" + : "=r" (v) : "r" (addr), "r" (where) : "cc"); + break; + case 4: +- asm("ldr %0, [%1, %2]" ++ asm volatile("ldr %0, [%1, %2]" + : "=r" (v) : "r" (addr), "r" (where) : "cc"); + break; + } +@@ -100,17 +100,17 @@ dc21285_write_config(struct pci_bus *bus + if (addr) + switch (size) { + case 1: +- asm("strb %0, [%1, %2]" ++ asm volatile("strb %0, [%1, %2]" + : : "r" (value), "r" (addr), "r" (where) + : "cc"); + break; + case 2: +- asm("strh %0, [%1, %2]" ++ asm volatile("strh %0, [%1, %2]" + : : "r" (value), "r" (addr), "r" (where) + : "cc"); + break; + case 4: +- asm("str %0, [%1, %2]" ++ asm volatile("str %0, [%1, %2]" + : : "r" (value), "r" (addr), "r" (where) + : "cc"); + break; diff --git a/queue-5.4/kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch b/queue-5.4/kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch new file mode 100644 index 00000000000..fe84f88c647 --- /dev/null +++ b/queue-5.4/kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch @@ -0,0 +1,59 @@ +From ccd85d90ce092bdb047a7f6580f3955393833b22 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Tue, 2 Feb 2021 13:20:17 -0800 +Subject: KVM: SVM: Treat SVM as unsupported when running as an SEV guest + +From: Sean Christopherson + +commit ccd85d90ce092bdb047a7f6580f3955393833b22 upstream. + +Don't let KVM load when running as an SEV guest, regardless of what +CPUID says. Memory is encrypted with a key that is not accessible to +the host (L0), thus it's impossible for L0 to emulate SVM, e.g. it'll +see garbage when reading the VMCB. + +Technically, KVM could decrypt all memory that needs to be accessible to +the L0 and use shadow paging so that L0 does not need to shadow NPT, but +exposing such information to L0 largely defeats the purpose of running as +an SEV guest. This can always be revisited if someone comes up with a +use case for running VMs inside SEV guests. + +Note, VMLOAD, VMRUN, etc... will also #GP on GPAs with C-bit set, i.e. KVM +is doomed even if the SEV guest is debuggable and the hypervisor is willing +to decrypt the VMCB. This may or may not be fixed on CPUs that have the +SVME_ADDR_CHK fix. + +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20210202212017.2486595-1-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm.c | 5 +++++ + arch/x86/mm/mem_encrypt.c | 1 + + 2 files changed, 6 insertions(+) + +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -889,6 +889,11 @@ static int has_svm(void) + return 0; + } + ++ if (sev_active()) { ++ pr_info("KVM is unsupported when running as an SEV guest\n"); ++ return 0; ++ } ++ + return 1; + } + +--- a/arch/x86/mm/mem_encrypt.c ++++ b/arch/x86/mm/mem_encrypt.c +@@ -375,6 +375,7 @@ bool force_dma_unencrypted(struct device + + return false; + } ++EXPORT_SYMBOL_GPL(sev_active); + + /* Architecture __weak replacement functions */ + void __init mem_encrypt_free_decrypted_mem(void) diff --git a/queue-5.4/kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch b/queue-5.4/kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch new file mode 100644 index 00000000000..efec510e87c --- /dev/null +++ b/queue-5.4/kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch @@ -0,0 +1,45 @@ +From 943dea8af21bd896e0d6c30ea221203fb3cd3265 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Tue, 2 Feb 2021 08:55:46 -0800 +Subject: KVM: x86: Update emulator context mode if SYSENTER xfers to 64-bit mode + +From: Sean Christopherson + +commit 943dea8af21bd896e0d6c30ea221203fb3cd3265 upstream. + +Set the emulator context to PROT64 if SYSENTER transitions from 32-bit +userspace (compat mode) to a 64-bit kernel, otherwise the RIP update at +the end of x86_emulate_insn() will incorrectly truncate the new RIP. + +Note, this bug is mostly limited to running an Intel virtual CPU model on +an AMD physical CPU, as other combinations of virtual and physical CPUs +do not trigger full emulation. On Intel CPUs, SYSENTER in compatibility +mode is legal, and unconditionally transitions to 64-bit mode. On AMD +CPUs, SYSENTER is illegal in compatibility mode and #UDs. If the vCPU is +AMD, KVM injects a #UD on SYSENTER in compat mode. If the pCPU is Intel, +SYSENTER will execute natively and not trigger #UD->VM-Exit (ignoring +guest TLB shenanigans). + +Fixes: fede8076aab4 ("KVM: x86: handle wrap around 32-bit address space") +Cc: stable@vger.kernel.org +Signed-off-by: Jonny Barker +[sean: wrote changelog] +Signed-off-by: Sean Christopherson +Message-Id: <20210202165546.2390296-1-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/emulate.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -2890,6 +2890,8 @@ static int em_sysenter(struct x86_emulat + ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); + *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data : + (u32)msr_data; ++ if (efer & EFER_LMA) ++ ctxt->mode = X86EMUL_MODE_PROT64; + + return X86EMUL_CONTINUE; + } diff --git a/queue-5.4/mm-compaction-move-high_pfn-to-the-for-loop-scope.patch b/queue-5.4/mm-compaction-move-high_pfn-to-the-for-loop-scope.patch new file mode 100644 index 00000000000..8aa63705b18 --- /dev/null +++ b/queue-5.4/mm-compaction-move-high_pfn-to-the-for-loop-scope.patch @@ -0,0 +1,85 @@ +From 74e21484e40bb8ce0f9828bbfe1c9fc9b04249c6 Mon Sep 17 00:00:00 2001 +From: Rokudo Yan +Date: Thu, 4 Feb 2021 18:32:20 -0800 +Subject: mm, compaction: move high_pfn to the for loop scope + +From: Rokudo Yan + +commit 74e21484e40bb8ce0f9828bbfe1c9fc9b04249c6 upstream. + +In fast_isolate_freepages, high_pfn will be used if a prefered one (ie +PFN >= low_fn) not found. + +But the high_pfn is not reset before searching an free area, so when it +was used as freepage, it may from another free area searched before. As +a result move_freelist_head(freelist, freepage) will have unexpected +behavior (eg corrupt the MOVABLE freelist) + + Unable to handle kernel paging request at virtual address dead000000000200 + Mem abort info: + ESR = 0x96000044 + Exception class = DABT (current EL), IL = 32 bits + SET = 0, FnV = 0 + EA = 0, S1PTW = 0 + Data abort info: + ISV = 0, ISS = 0x00000044 + CM = 0, WnR = 1 + [dead000000000200] address between user and kernel address ranges + + -000|list_cut_before(inline) + -000|move_freelist_head(inline) + -000|fast_isolate_freepages(inline) + -000|isolate_freepages(inline) + -000|compaction_alloc(?, ?) + -001|unmap_and_move(inline) + -001|migrate_pages([NSD:0xFFFFFF80088CBBD0] from = 0xFFFFFF80088CBD88, [NSD:0xFFFFFF80088CBBC8] get_new_p + -002|__read_once_size(inline) + -002|static_key_count(inline) + -002|static_key_false(inline) + -002|trace_mm_compaction_migratepages(inline) + -002|compact_zone(?, [NSD:0xFFFFFF80088CBCB0] capc = 0x0) + -003|kcompactd_do_work(inline) + -003|kcompactd([X19] p = 0xFFFFFF93227FBC40) + -004|kthread([X20] _create = 0xFFFFFFE1AFB26380) + -005|ret_from_fork(asm) + +The issue was reported on an smart phone product with 6GB ram and 3GB +zram as swap device. + +This patch fixes the issue by reset high_pfn before searching each free +area, which ensure freepage and freelist match when call +move_freelist_head in fast_isolate_freepages(). + +Link: http://lkml.kernel.org/r/20190118175136.31341-12-mgorman@techsingularity.net +Link: https://lkml.kernel.org/r/20210112094720.1238444-1-wu-yan@tcl.com +Fixes: 5a811889de10f1eb ("mm, compaction: use free lists to quickly locate a migration target") +Signed-off-by: Rokudo Yan +Acked-by: Mel Gorman +Acked-by: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/compaction.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/mm/compaction.c ++++ b/mm/compaction.c +@@ -1276,7 +1276,7 @@ fast_isolate_freepages(struct compact_co + { + unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1); + unsigned int nr_scanned = 0; +- unsigned long low_pfn, min_pfn, high_pfn = 0, highest = 0; ++ unsigned long low_pfn, min_pfn, highest = 0; + unsigned long nr_isolated = 0; + unsigned long distance; + struct page *page = NULL; +@@ -1321,6 +1321,7 @@ fast_isolate_freepages(struct compact_co + struct page *freepage; + unsigned long flags; + unsigned int order_scanned = 0; ++ unsigned long high_pfn = 0; + + if (!area->nr_free) + continue; diff --git a/queue-5.4/mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch b/queue-5.4/mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch new file mode 100644 index 00000000000..cb3f75a6869 --- /dev/null +++ b/queue-5.4/mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch @@ -0,0 +1,136 @@ +From 7ffddd499ba6122b1a07828f023d1d67629aa017 Mon Sep 17 00:00:00 2001 +From: Muchun Song +Date: Thu, 4 Feb 2021 18:32:06 -0800 +Subject: mm: hugetlb: fix a race between freeing and dissolving the page + +From: Muchun Song + +commit 7ffddd499ba6122b1a07828f023d1d67629aa017 upstream. + +There is a race condition between __free_huge_page() +and dissolve_free_huge_page(). + + CPU0: CPU1: + + // page_count(page) == 1 + put_page(page) + __free_huge_page(page) + dissolve_free_huge_page(page) + spin_lock(&hugetlb_lock) + // PageHuge(page) && !page_count(page) + update_and_free_page(page) + // page is freed to the buddy + spin_unlock(&hugetlb_lock) + spin_lock(&hugetlb_lock) + clear_page_huge_active(page) + enqueue_huge_page(page) + // It is wrong, the page is already freed + spin_unlock(&hugetlb_lock) + +The race window is between put_page() and dissolve_free_huge_page(). + +We should make sure that the page is already on the free list when it is +dissolved. + +As a result __free_huge_page would corrupt page(s) already in the buddy +allocator. + +Link: https://lkml.kernel.org/r/20210115124942.46403-4-songmuchun@bytedance.com +Fixes: c8721bbbdd36 ("mm: memory-hotplug: enable memory hotplug to handle hugepage") +Signed-off-by: Muchun Song +Reviewed-by: Mike Kravetz +Reviewed-by: Oscar Salvador +Acked-by: Michal Hocko +Cc: David Hildenbrand +Cc: Yang Shi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/hugetlb.c | 39 +++++++++++++++++++++++++++++++++++++++ + 1 file changed, 39 insertions(+) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -71,6 +71,21 @@ DEFINE_SPINLOCK(hugetlb_lock); + static int num_fault_mutexes; + struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp; + ++static inline bool PageHugeFreed(struct page *head) ++{ ++ return page_private(head + 4) == -1UL; ++} ++ ++static inline void SetPageHugeFreed(struct page *head) ++{ ++ set_page_private(head + 4, -1UL); ++} ++ ++static inline void ClearPageHugeFreed(struct page *head) ++{ ++ set_page_private(head + 4, 0); ++} ++ + /* Forward declaration */ + static int hugetlb_acct_memory(struct hstate *h, long delta); + +@@ -869,6 +884,7 @@ static void enqueue_huge_page(struct hst + list_move(&page->lru, &h->hugepage_freelists[nid]); + h->free_huge_pages++; + h->free_huge_pages_node[nid]++; ++ SetPageHugeFreed(page); + } + + static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid) +@@ -886,6 +902,7 @@ static struct page *dequeue_huge_page_no + return NULL; + list_move(&page->lru, &h->hugepage_activelist); + set_page_refcounted(page); ++ ClearPageHugeFreed(page); + h->free_huge_pages--; + h->free_huge_pages_node[nid]--; + return page; +@@ -1375,6 +1392,7 @@ static void prep_new_huge_page(struct hs + set_hugetlb_cgroup(page, NULL); + h->nr_huge_pages++; + h->nr_huge_pages_node[nid]++; ++ ClearPageHugeFreed(page); + spin_unlock(&hugetlb_lock); + } + +@@ -1602,6 +1620,7 @@ int dissolve_free_huge_page(struct page + { + int rc = -EBUSY; + ++retry: + /* Not to disrupt normal path by vainly holding hugetlb_lock */ + if (!PageHuge(page)) + return 0; +@@ -1618,6 +1637,26 @@ int dissolve_free_huge_page(struct page + int nid = page_to_nid(head); + if (h->free_huge_pages - h->resv_huge_pages == 0) + goto out; ++ ++ /* ++ * We should make sure that the page is already on the free list ++ * when it is dissolved. ++ */ ++ if (unlikely(!PageHugeFreed(head))) { ++ spin_unlock(&hugetlb_lock); ++ cond_resched(); ++ ++ /* ++ * Theoretically, we should return -EBUSY when we ++ * encounter this race. In fact, we have a chance ++ * to successfully dissolve the page if we do a ++ * retry. Because the race window is quite small. ++ * If we seize this opportunity, it is an optimization ++ * for increasing the success rate of dissolving page. ++ */ ++ goto retry; ++ } ++ + /* + * Move PageHWPoison flag from head page to the raw error page, + * which makes any subpages rather than the error page reusable. diff --git a/queue-5.4/mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch b/queue-5.4/mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch new file mode 100644 index 00000000000..4ed90655e8e --- /dev/null +++ b/queue-5.4/mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch @@ -0,0 +1,64 @@ +From 0eb2df2b5629794020f75e94655e1994af63f0d4 Mon Sep 17 00:00:00 2001 +From: Muchun Song +Date: Thu, 4 Feb 2021 18:32:10 -0800 +Subject: mm: hugetlb: fix a race between isolating and freeing page + +From: Muchun Song + +commit 0eb2df2b5629794020f75e94655e1994af63f0d4 upstream. + +There is a race between isolate_huge_page() and __free_huge_page(). + + CPU0: CPU1: + + if (PageHuge(page)) + put_page(page) + __free_huge_page(page) + spin_lock(&hugetlb_lock) + update_and_free_page(page) + set_compound_page_dtor(page, + NULL_COMPOUND_DTOR) + spin_unlock(&hugetlb_lock) + isolate_huge_page(page) + // trigger BUG_ON + VM_BUG_ON_PAGE(!PageHead(page), page) + spin_lock(&hugetlb_lock) + page_huge_active(page) + // trigger BUG_ON + VM_BUG_ON_PAGE(!PageHuge(page), page) + spin_unlock(&hugetlb_lock) + +When we isolate a HugeTLB page on CPU0. Meanwhile, we free it to the +buddy allocator on CPU1. Then, we can trigger a BUG_ON on CPU0, because +it is already freed to the buddy allocator. + +Link: https://lkml.kernel.org/r/20210115124942.46403-5-songmuchun@bytedance.com +Fixes: c8721bbbdd36 ("mm: memory-hotplug: enable memory hotplug to handle hugepage") +Signed-off-by: Muchun Song +Reviewed-by: Mike Kravetz +Acked-by: Michal Hocko +Reviewed-by: Oscar Salvador +Cc: David Hildenbrand +Cc: Yang Shi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/hugetlb.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -5175,9 +5175,9 @@ bool isolate_huge_page(struct page *page + { + bool ret = true; + +- VM_BUG_ON_PAGE(!PageHead(page), page); + spin_lock(&hugetlb_lock); +- if (!page_huge_active(page) || !get_page_unless_zero(page)) { ++ if (!PageHeadHuge(page) || !page_huge_active(page) || ++ !get_page_unless_zero(page)) { + ret = false; + goto unlock; + } diff --git a/queue-5.4/mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch b/queue-5.4/mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch new file mode 100644 index 00000000000..2f5f7e476f4 --- /dev/null +++ b/queue-5.4/mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch @@ -0,0 +1,44 @@ +From ecbf4724e6061b4b01be20f6d797d64d462b2bc8 Mon Sep 17 00:00:00 2001 +From: Muchun Song +Date: Thu, 4 Feb 2021 18:32:13 -0800 +Subject: mm: hugetlb: remove VM_BUG_ON_PAGE from page_huge_active + +From: Muchun Song + +commit ecbf4724e6061b4b01be20f6d797d64d462b2bc8 upstream. + +The page_huge_active() can be called from scan_movable_pages() which do +not hold a reference count to the HugeTLB page. So when we call +page_huge_active() from scan_movable_pages(), the HugeTLB page can be +freed parallel. Then we will trigger a BUG_ON which is in the +page_huge_active() when CONFIG_DEBUG_VM is enabled. Just remove the +VM_BUG_ON_PAGE. + +Link: https://lkml.kernel.org/r/20210115124942.46403-6-songmuchun@bytedance.com +Fixes: 7e1f049efb86 ("mm: hugetlb: cleanup using paeg_huge_active()") +Signed-off-by: Muchun Song +Reviewed-by: Mike Kravetz +Acked-by: Michal Hocko +Reviewed-by: Oscar Salvador +Cc: David Hildenbrand +Cc: Yang Shi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/hugetlb.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -1234,8 +1234,7 @@ struct hstate *size_to_hstate(unsigned l + */ + bool page_huge_active(struct page *page) + { +- VM_BUG_ON_PAGE(!PageHuge(page), page); +- return PageHead(page) && PagePrivate(&page[1]); ++ return PageHeadHuge(page) && PagePrivate(&page[1]); + } + + /* never called for tail page */ diff --git a/queue-5.4/mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch b/queue-5.4/mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch new file mode 100644 index 00000000000..d226a8d7ff3 --- /dev/null +++ b/queue-5.4/mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch @@ -0,0 +1,71 @@ +From 585fc0d2871c9318c949fbf45b1f081edd489e96 Mon Sep 17 00:00:00 2001 +From: Muchun Song +Date: Thu, 4 Feb 2021 18:32:03 -0800 +Subject: mm: hugetlbfs: fix cannot migrate the fallocated HugeTLB page + +From: Muchun Song + +commit 585fc0d2871c9318c949fbf45b1f081edd489e96 upstream. + +If a new hugetlb page is allocated during fallocate it will not be +marked as active (set_page_huge_active) which will result in a later +isolate_huge_page failure when the page migration code would like to +move that page. Such a failure would be unexpected and wrong. + +Only export set_page_huge_active, just leave clear_page_huge_active as +static. Because there are no external users. + +Link: https://lkml.kernel.org/r/20210115124942.46403-3-songmuchun@bytedance.com +Fixes: 70c3547e36f5 (hugetlbfs: add hugetlbfs_fallocate()) +Signed-off-by: Muchun Song +Acked-by: Michal Hocko +Reviewed-by: Mike Kravetz +Reviewed-by: Oscar Salvador +Cc: David Hildenbrand +Cc: Yang Shi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + fs/hugetlbfs/inode.c | 3 ++- + include/linux/hugetlb.h | 2 ++ + mm/hugetlb.c | 2 +- + 3 files changed, 5 insertions(+), 2 deletions(-) + +--- a/fs/hugetlbfs/inode.c ++++ b/fs/hugetlbfs/inode.c +@@ -675,9 +675,10 @@ static long hugetlbfs_fallocate(struct f + + mutex_unlock(&hugetlb_fault_mutex_table[hash]); + ++ set_page_huge_active(page); + /* + * unlock_page because locked by add_to_page_cache() +- * page_put due to reference from alloc_huge_page() ++ * put_page() due to reference from alloc_huge_page() + */ + unlock_page(page); + put_page(page); +--- a/include/linux/hugetlb.h ++++ b/include/linux/hugetlb.h +@@ -590,6 +590,8 @@ static inline void huge_ptep_modify_prot + } + #endif + ++void set_page_huge_active(struct page *page); ++ + #else /* CONFIG_HUGETLB_PAGE */ + struct hstate {}; + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -1222,7 +1222,7 @@ bool page_huge_active(struct page *page) + } + + /* never called for tail page */ +-static void set_page_huge_active(struct page *page) ++void set_page_huge_active(struct page *page) + { + VM_BUG_ON_PAGE(!PageHeadHuge(page), page); + SetPagePrivate(&page[1]); diff --git a/queue-5.4/mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch b/queue-5.4/mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch new file mode 100644 index 00000000000..f538d01ee61 --- /dev/null +++ b/queue-5.4/mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch @@ -0,0 +1,111 @@ +From 1c2f67308af4c102b4e1e6cd6f69819ae59408e0 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins +Date: Thu, 4 Feb 2021 18:32:31 -0800 +Subject: mm: thp: fix MADV_REMOVE deadlock on shmem THP + +From: Hugh Dickins + +commit 1c2f67308af4c102b4e1e6cd6f69819ae59408e0 upstream. + +Sergey reported deadlock between kswapd correctly doing its usual +lock_page(page) followed by down_read(page->mapping->i_mmap_rwsem), and +madvise(MADV_REMOVE) on an madvise(MADV_HUGEPAGE) area doing +down_write(page->mapping->i_mmap_rwsem) followed by lock_page(page). + +This happened when shmem_fallocate(punch hole)'s unmap_mapping_range() +reaches zap_pmd_range()'s call to __split_huge_pmd(). The same deadlock +could occur when partially truncating a mapped huge tmpfs file, or using +fallocate(FALLOC_FL_PUNCH_HOLE) on it. + +__split_huge_pmd()'s page lock was added in 5.8, to make sure that any +concurrent use of reuse_swap_page() (holding page lock) could not catch +the anon THP's mapcounts and swapcounts while they were being split. + +Fortunately, reuse_swap_page() is never applied to a shmem or file THP +(not even by khugepaged, which checks PageSwapCache before calling), and +anonymous THPs are never created in shmem or file areas: so that +__split_huge_pmd()'s page lock can only be necessary for anonymous THPs, +on which there is no risk of deadlock with i_mmap_rwsem. + +Link: https://lkml.kernel.org/r/alpine.LSU.2.11.2101161409470.2022@eggly.anvils +Fixes: c444eb564fb1 ("mm: thp: make the THP mapcount atomic against __split_huge_pmd_locked()") +Signed-off-by: Hugh Dickins +Reported-by: Sergey Senozhatsky +Reviewed-by: Andrea Arcangeli +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + mm/huge_memory.c | 37 +++++++++++++++++++++++-------------- + 1 file changed, 23 insertions(+), 14 deletions(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -2306,7 +2306,7 @@ void __split_huge_pmd(struct vm_area_str + { + spinlock_t *ptl; + struct mmu_notifier_range range; +- bool was_locked = false; ++ bool do_unlock_page = false; + pmd_t _pmd; + + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, +@@ -2322,7 +2322,6 @@ void __split_huge_pmd(struct vm_area_str + VM_BUG_ON(freeze && !page); + if (page) { + VM_WARN_ON_ONCE(!PageLocked(page)); +- was_locked = true; + if (page != pmd_page(*pmd)) + goto out; + } +@@ -2331,19 +2330,29 @@ repeat: + if (pmd_trans_huge(*pmd)) { + if (!page) { + page = pmd_page(*pmd); +- if (unlikely(!trylock_page(page))) { +- get_page(page); +- _pmd = *pmd; +- spin_unlock(ptl); +- lock_page(page); +- spin_lock(ptl); +- if (unlikely(!pmd_same(*pmd, _pmd))) { +- unlock_page(page); ++ /* ++ * An anonymous page must be locked, to ensure that a ++ * concurrent reuse_swap_page() sees stable mapcount; ++ * but reuse_swap_page() is not used on shmem or file, ++ * and page lock must not be taken when zap_pmd_range() ++ * calls __split_huge_pmd() while i_mmap_lock is held. ++ */ ++ if (PageAnon(page)) { ++ if (unlikely(!trylock_page(page))) { ++ get_page(page); ++ _pmd = *pmd; ++ spin_unlock(ptl); ++ lock_page(page); ++ spin_lock(ptl); ++ if (unlikely(!pmd_same(*pmd, _pmd))) { ++ unlock_page(page); ++ put_page(page); ++ page = NULL; ++ goto repeat; ++ } + put_page(page); +- page = NULL; +- goto repeat; + } +- put_page(page); ++ do_unlock_page = true; + } + } + if (PageMlocked(page)) +@@ -2353,7 +2362,7 @@ repeat: + __split_huge_pmd_locked(vma, pmd, range.start, freeze); + out: + spin_unlock(ptl); +- if (!was_locked && page) ++ if (do_unlock_page) + unlock_page(page); + /* + * No need to double call mmu_notifier->invalidate_range() callback. diff --git a/queue-5.4/series b/queue-5.4/series index b88d9bfea66..54cd93d38df 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -42,3 +42,12 @@ smb3-fix-crediting-for-compounding-when-only-one-request-in-flight.patch mmc-core-limit-retries-when-analyse-of-sdio-tuples-fails.patch drm-amd-display-revert-fix-edid-parsing-after-resume-from-suspend.patch nvme-pci-avoid-the-deepest-sleep-state-on-kingston-a2000-ssds.patch +kvm-svm-treat-svm-as-unsupported-when-running-as-an-sev-guest.patch +kvm-x86-update-emulator-context-mode-if-sysenter-xfers-to-64-bit-mode.patch +arm-footbridge-fix-dc21285-pci-configuration-accessors.patch +mm-hugetlbfs-fix-cannot-migrate-the-fallocated-hugetlb-page.patch +mm-hugetlb-fix-a-race-between-freeing-and-dissolving-the-page.patch +mm-hugetlb-fix-a-race-between-isolating-and-freeing-page.patch +mm-hugetlb-remove-vm_bug_on_page-from-page_huge_active.patch +mm-compaction-move-high_pfn-to-the-for-loop-scope.patch +mm-thp-fix-madv_remove-deadlock-on-shmem-thp.patch -- 2.47.3