From: Greg Kroah-Hartman Date: Wed, 28 Jun 2023 18:28:31 +0000 (+0200) Subject: 6.1-stable patches X-Git-Tag: v6.4.1~52 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3b50a3d88b9ce2f318e320184d39972b24a39b3f;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: mm-hwpoison-try-to-recover-from-copy-on-write-faults.patch mm-hwpoison-when-copy-on-write-hits-poison-take-page-offline.patch mptcp-ensure-listener-is-unhashed-before-updating-the-sk-status.patch --- diff --git a/queue-6.1/mm-hwpoison-try-to-recover-from-copy-on-write-faults.patch b/queue-6.1/mm-hwpoison-try-to-recover-from-copy-on-write-faults.patch new file mode 100644 index 00000000000..63bc3270cb1 --- /dev/null +++ b/queue-6.1/mm-hwpoison-try-to-recover-from-copy-on-write-faults.patch @@ -0,0 +1,214 @@ +From a873dfe1032a132bf89f9e19a6ac44f5a0b78754 Mon Sep 17 00:00:00 2001 +From: Tony Luck +Date: Fri, 21 Oct 2022 13:01:19 -0700 +Subject: mm, hwpoison: try to recover from copy-on write faults + +From: Tony Luck + +commit a873dfe1032a132bf89f9e19a6ac44f5a0b78754 upstream. + +Patch series "Copy-on-write poison recovery", v3. + +Part 1 deals with the process that triggered the copy on write fault with +a store to a shared read-only page. That process is send a SIGBUS with +the usual machine check decoration to specify the virtual address of the +lost page, together with the scope. + +Part 2 sets up to asynchronously take the page with the uncorrected error +offline to prevent additional machine check faults. H/t to Miaohe Lin + and Shuai Xue for +pointing me to the existing function to queue a call to memory_failure(). + +On x86 there is some duplicate reporting (because the error is also +signalled by the memory controller as well as by the core that triggered +the machine check). Console logs look like this: + + +This patch (of 2): + +If the kernel is copying a page as the result of a copy-on-write +fault and runs into an uncorrectable error, Linux will crash because +it does not have recovery code for this case where poison is consumed +by the kernel. + +It is easy to set up a test case. Just inject an error into a private +page, fork(2), and have the child process write to the page. + +I wrapped that neatly into a test at: + + git://git.kernel.org/pub/scm/linux/kernel/git/aegl/ras-tools.git + +just enable ACPI error injection and run: + + # ./einj_mem-uc -f copy-on-write + +Add a new copy_user_highpage_mc() function that uses copy_mc_to_kernel() +on architectures where that is available (currently x86 and powerpc). +When an error is detected during the page copy, return VM_FAULT_HWPOISON +to caller of wp_page_copy(). This propagates up the call stack. Both x86 +and powerpc have code in their fault handler to deal with this code by +sending a SIGBUS to the application. + +Note that this patch avoids a system crash and signals the process that +triggered the copy-on-write action. It does not take any action for the +memory error that is still in the shared page. To handle that a call to +memory_failure() is needed. But this cannot be done from wp_page_copy() +because it holds mmap_lock(). Perhaps the architecture fault handlers +can deal with this loose end in a subsequent patch? + +On Intel/x86 this loose end will often be handled automatically because +the memory controller provides an additional notification of the h/w +poison in memory, the handler for this will call memory_failure(). This +isn't a 100% solution. If there are multiple errors, not all may be +logged in this way. + +[tony.luck@intel.com: add call to kmsan_unpoison_memory(), per Miaohe Lin] + Link: https://lkml.kernel.org/r/20221031201029.102123-2-tony.luck@intel.com +Link: https://lkml.kernel.org/r/20221021200120.175753-1-tony.luck@intel.com +Link: https://lkml.kernel.org/r/20221021200120.175753-2-tony.luck@intel.com +Signed-off-by: Tony Luck +Reviewed-by: Dan Williams +Reviewed-by: Naoya Horiguchi +Reviewed-by: Miaohe Lin +Reviewed-by: Alexander Potapenko +Tested-by: Shuai Xue +Cc: Christophe Leroy +Cc: Matthew Wilcox (Oracle) +Cc: Michael Ellerman +Cc: Nicholas Piggin +Signed-off-by: Andrew Morton +Igned-off-by: Jane Chu +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/highmem.h | 26 ++++++++++++++++++++++++++ + mm/memory.c | 30 ++++++++++++++++++++---------- + 2 files changed, 46 insertions(+), 10 deletions(-) + +--- a/include/linux/highmem.h ++++ b/include/linux/highmem.h +@@ -319,6 +319,32 @@ static inline void copy_user_highpage(st + + #endif + ++#ifdef copy_mc_to_kernel ++static inline int copy_mc_user_highpage(struct page *to, struct page *from, ++ unsigned long vaddr, struct vm_area_struct *vma) ++{ ++ unsigned long ret; ++ char *vfrom, *vto; ++ ++ vfrom = kmap_local_page(from); ++ vto = kmap_local_page(to); ++ ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE); ++ if (!ret) ++ kmsan_unpoison_memory(page_address(to), PAGE_SIZE); ++ kunmap_local(vto); ++ kunmap_local(vfrom); ++ ++ return ret; ++} ++#else ++static inline int copy_mc_user_highpage(struct page *to, struct page *from, ++ unsigned long vaddr, struct vm_area_struct *vma) ++{ ++ copy_user_highpage(to, from, vaddr, vma); ++ return 0; ++} ++#endif ++ + #ifndef __HAVE_ARCH_COPY_HIGHPAGE + + static inline void copy_highpage(struct page *to, struct page *from) +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -2843,10 +2843,16 @@ static inline int pte_unmap_same(struct + return same; + } + +-static inline bool __wp_page_copy_user(struct page *dst, struct page *src, +- struct vm_fault *vmf) ++/* ++ * Return: ++ * 0: copied succeeded ++ * -EHWPOISON: copy failed due to hwpoison in source page ++ * -EAGAIN: copied failed (some other reason) ++ */ ++static inline int __wp_page_copy_user(struct page *dst, struct page *src, ++ struct vm_fault *vmf) + { +- bool ret; ++ int ret; + void *kaddr; + void __user *uaddr; + bool locked = false; +@@ -2855,8 +2861,9 @@ static inline bool __wp_page_copy_user(s + unsigned long addr = vmf->address; + + if (likely(src)) { +- copy_user_highpage(dst, src, addr, vma); +- return true; ++ if (copy_mc_user_highpage(dst, src, addr, vma)) ++ return -EHWPOISON; ++ return 0; + } + + /* +@@ -2883,7 +2890,7 @@ static inline bool __wp_page_copy_user(s + * and update local tlb only + */ + update_mmu_tlb(vma, addr, vmf->pte); +- ret = false; ++ ret = -EAGAIN; + goto pte_unlock; + } + +@@ -2908,7 +2915,7 @@ static inline bool __wp_page_copy_user(s + if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { + /* The PTE changed under us, update local tlb */ + update_mmu_tlb(vma, addr, vmf->pte); +- ret = false; ++ ret = -EAGAIN; + goto pte_unlock; + } + +@@ -2927,7 +2934,7 @@ warn: + } + } + +- ret = true; ++ ret = 0; + + pte_unlock: + if (locked) +@@ -3099,6 +3106,7 @@ static vm_fault_t wp_page_copy(struct vm + pte_t entry; + int page_copied = 0; + struct mmu_notifier_range range; ++ int ret; + + delayacct_wpcopy_start(); + +@@ -3116,19 +3124,21 @@ static vm_fault_t wp_page_copy(struct vm + if (!new_page) + goto oom; + +- if (!__wp_page_copy_user(new_page, old_page, vmf)) { ++ ret = __wp_page_copy_user(new_page, old_page, vmf); ++ if (ret) { + /* + * COW failed, if the fault was solved by other, + * it's fine. If not, userspace would re-fault on + * the same address and we will handle the fault + * from the second attempt. ++ * The -EHWPOISON case will not be retried. + */ + put_page(new_page); + if (old_page) + put_page(old_page); + + delayacct_wpcopy_end(); +- return 0; ++ return ret == -EHWPOISON ? VM_FAULT_HWPOISON : 0; + } + kmsan_copy_page_meta(new_page, old_page); + } diff --git a/queue-6.1/mm-hwpoison-when-copy-on-write-hits-poison-take-page-offline.patch b/queue-6.1/mm-hwpoison-when-copy-on-write-hits-poison-take-page-offline.patch new file mode 100644 index 00000000000..e7fb68e1fb2 --- /dev/null +++ b/queue-6.1/mm-hwpoison-when-copy-on-write-hits-poison-take-page-offline.patch @@ -0,0 +1,83 @@ +From d302c2398ba269e788a4f37ae57c07a7fcabaa42 Mon Sep 17 00:00:00 2001 +From: Tony Luck +Date: Fri, 21 Oct 2022 13:01:20 -0700 +Subject: mm, hwpoison: when copy-on-write hits poison, take page offline + +From: Tony Luck + +commit d302c2398ba269e788a4f37ae57c07a7fcabaa42 upstream. + +Cannot call memory_failure() directly from the fault handler because +mmap_lock (and others) are held. + +It is important, but not urgent, to mark the source page as h/w poisoned +and unmap it from other tasks. + +Use memory_failure_queue() to request a call to memory_failure() for the +page with the error. + +Also provide a stub version for CONFIG_MEMORY_FAILURE=n + +Link: https://lkml.kernel.org/r/20221021200120.175753-3-tony.luck@intel.com +Signed-off-by: Tony Luck +Reviewed-by: Miaohe Lin +Cc: Christophe Leroy +Cc: Dan Williams +Cc: Matthew Wilcox (Oracle) +Cc: Michael Ellerman +Cc: Naoya Horiguchi +Cc: Nicholas Piggin +Cc: Shuai Xue +Signed-off-by: Andrew Morton +[ Due to missing commits + e591ef7d96d6e ("mm,hwpoison,hugetlb,memory_hotplug: hotremove memory section with hwpoisoned hugepage") + 5033091de814a ("mm/hwpoison: introduce per-memory_block hwpoison counter") + The impact of e591ef7d96d6e is its introduction of an additional flag in + __get_huge_page_for_hwpoison() that serves as an indication a hwpoisoned + hugetlb page should have its migratable bit cleared. + The impact of 5033091de814a is contexual. + Resolve by ignoring both missing commits. - jane] +Signed-off-by: Jane Chu +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mm.h | 5 ++++- + mm/memory.c | 4 +++- + 2 files changed, 7 insertions(+), 2 deletions(-) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -3295,7 +3295,6 @@ enum mf_flags { + int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index, + unsigned long count, int mf_flags); + extern int memory_failure(unsigned long pfn, int flags); +-extern void memory_failure_queue(unsigned long pfn, int flags); + extern void memory_failure_queue_kick(int cpu); + extern int unpoison_memory(unsigned long pfn); + extern int sysctl_memory_failure_early_kill; +@@ -3304,8 +3303,12 @@ extern void shake_page(struct page *p); + extern atomic_long_t num_poisoned_pages __read_mostly; + extern int soft_offline_page(unsigned long pfn, int flags); + #ifdef CONFIG_MEMORY_FAILURE ++extern void memory_failure_queue(unsigned long pfn, int flags); + extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags); + #else ++static inline void memory_failure_queue(unsigned long pfn, int flags) ++{ ++} + static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags) + { + return 0; +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -2861,8 +2861,10 @@ static inline int __wp_page_copy_user(st + unsigned long addr = vmf->address; + + if (likely(src)) { +- if (copy_mc_user_highpage(dst, src, addr, vma)) ++ if (copy_mc_user_highpage(dst, src, addr, vma)) { ++ memory_failure_queue(page_to_pfn(src), 0); + return -EHWPOISON; ++ } + return 0; + } + diff --git a/queue-6.1/mptcp-ensure-listener-is-unhashed-before-updating-the-sk-status.patch b/queue-6.1/mptcp-ensure-listener-is-unhashed-before-updating-the-sk-status.patch new file mode 100644 index 00000000000..e04bcc90684 --- /dev/null +++ b/queue-6.1/mptcp-ensure-listener-is-unhashed-before-updating-the-sk-status.patch @@ -0,0 +1,98 @@ +From 57fc0f1ceaa4016354cf6f88533e20b56190e41a Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Tue, 20 Jun 2023 18:24:23 +0200 +Subject: mptcp: ensure listener is unhashed before updating the sk status + +From: Paolo Abeni + +commit 57fc0f1ceaa4016354cf6f88533e20b56190e41a upstream. + +The MPTCP protocol access the listener subflow in a lockless +manner in a couple of places (poll, diag). That works only if +the msk itself leaves the listener status only after that the +subflow itself has been closed/disconnected. Otherwise we risk +deadlock in diag, as reported by Christoph. + +Address the issue ensuring that the first subflow (the listener +one) is always disconnected before updating the msk socket status. + +Reported-by: Christoph Paasch +Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/407 +Fixes: b29fcfb54cd7 ("mptcp: full disconnect implementation") +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts +Signed-off-by: Matthieu Baerts +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/pm_netlink.c | 1 + + net/mptcp/protocol.c | 26 ++++++++++++++++++++------ + 2 files changed, 21 insertions(+), 6 deletions(-) + +--- a/net/mptcp/pm_netlink.c ++++ b/net/mptcp/pm_netlink.c +@@ -1039,6 +1039,7 @@ static int mptcp_pm_nl_create_listen_soc + return err; + } + ++ inet_sk_state_store(newsk, TCP_LISTEN); + err = kernel_listen(ssock, backlog); + if (err) { + pr_warn("kernel_listen error, err=%d", err); +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -2400,12 +2400,6 @@ static void __mptcp_close_ssk(struct soc + kfree_rcu(subflow, rcu); + } else { + /* otherwise tcp will dispose of the ssk and subflow ctx */ +- if (ssk->sk_state == TCP_LISTEN) { +- tcp_set_state(ssk, TCP_CLOSE); +- mptcp_subflow_queue_clean(sk, ssk); +- inet_csk_listen_stop(ssk); +- } +- + __tcp_close(ssk, 0); + + /* close acquired an extra ref */ +@@ -2939,6 +2933,24 @@ static __poll_t mptcp_check_readable(str + return EPOLLIN | EPOLLRDNORM; + } + ++static void mptcp_check_listen_stop(struct sock *sk) ++{ ++ struct sock *ssk; ++ ++ if (inet_sk_state_load(sk) != TCP_LISTEN) ++ return; ++ ++ ssk = mptcp_sk(sk)->first; ++ if (WARN_ON_ONCE(!ssk || inet_sk_state_load(ssk) != TCP_LISTEN)) ++ return; ++ ++ lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); ++ mptcp_subflow_queue_clean(sk, ssk); ++ inet_csk_listen_stop(ssk); ++ tcp_set_state(ssk, TCP_CLOSE); ++ release_sock(ssk); ++} ++ + bool __mptcp_close(struct sock *sk, long timeout) + { + struct mptcp_subflow_context *subflow; +@@ -2949,6 +2961,7 @@ bool __mptcp_close(struct sock *sk, long + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); + + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { ++ mptcp_check_listen_stop(sk); + inet_sk_state_store(sk, TCP_CLOSE); + goto cleanup; + } +@@ -3062,6 +3075,7 @@ static int mptcp_disconnect(struct sock + if (msk->fastopening) + return -EBUSY; + ++ mptcp_check_listen_stop(sk); + inet_sk_state_store(sk, TCP_CLOSE); + + mptcp_stop_timer(sk); diff --git a/queue-6.1/series b/queue-6.1/series index eb7f63bed40..64f8a0aa3ae 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -1,2 +1,5 @@ mm-mmap-fix-error-path-in-do_vmi_align_munmap.patch mm-mmap-fix-error-return-in-do_vmi_align_munmap.patch +mptcp-ensure-listener-is-unhashed-before-updating-the-sk-status.patch +mm-hwpoison-try-to-recover-from-copy-on-write-faults.patch +mm-hwpoison-when-copy-on-write-hits-poison-take-page-offline.patch