]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
mm/memory-failure: send SIGBUS in the event of thp split fail
authorJane Chu <jane.chu@oracle.com>
Fri, 24 May 2024 21:53:06 +0000 (15:53 -0600)
committerAndrew Morton <akpm@linux-foundation.org>
Thu, 4 Jul 2024 02:29:58 +0000 (19:29 -0700)
While handling hwpoison in a THP page, it is possible that
try_to_split_thp_page() fails.  For example, when the THP page has been
RDMA pinned.  At this point, the kernel cannot isolate the poisoned THP
page, all it could do is to send a SIGBUS to the user process with
meaningful payload to give user-level recovery a chance.

Link: https://lkml.kernel.org/r/20240524215306.2705454-6-jane.chu@oracle.com
Signed-off-by: Jane Chu <jane.chu@oracle.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Acked-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Oscar Salvador <oalvador@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/memory-failure.c

index 7e568838fb329c8484f675649d9da3c55440b611..fc9ce331eb16734f8772843f39bd2c70ac6749d5 100644 (file)
@@ -1706,7 +1706,12 @@ static int identify_page_state(unsigned long pfn, struct page *p,
        return page_action(ps, p, pfn);
 }
 
-static int try_to_split_thp_page(struct page *page)
+/*
+ * When 'release' is 'false', it means that if thp split has failed,
+ * there is still more to do, hence the page refcount we took earlier
+ * is still needed.
+ */
+static int try_to_split_thp_page(struct page *page, bool release)
 {
        int ret;
 
@@ -1714,7 +1719,7 @@ static int try_to_split_thp_page(struct page *page)
        ret = split_huge_page(page);
        unlock_page(page);
 
-       if (unlikely(ret))
+       if (ret && release)
                put_page(page);
 
        return ret;
@@ -2186,6 +2191,22 @@ out:
        return rc;
 }
 
+/*
+ * The calling condition is as such: thp split failed, page might have
+ * been RDMA pinned, not much can be done for recovery.
+ * But a SIGBUS should be delivered with vaddr provided so that the user
+ * application has a chance to recover. Also, application processes'
+ * election for MCE early killed will be honored.
+ */
+static void kill_procs_now(struct page *p, unsigned long pfn, int flags,
+                               struct folio *folio)
+{
+       LIST_HEAD(tokill);
+
+       collect_procs(folio, p, &tokill, flags & MF_ACTION_REQUIRED);
+       kill_procs(&tokill, true, pfn, flags);
+}
+
 /**
  * memory_failure - Handle memory failure of a page.
  * @pfn: Page Number of the corrupted page
@@ -2327,8 +2348,11 @@ try_again:
                 * page is a valid handlable page.
                 */
                folio_set_has_hwpoisoned(folio);
-               if (try_to_split_thp_page(p) < 0) {
-                       res = action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
+               if (try_to_split_thp_page(p, false) < 0) {
+                       res = -EHWPOISON;
+                       kill_procs_now(p, pfn, flags, folio);
+                       put_page(p);
+                       action_result(pfn, MF_MSG_UNSPLIT_THP, MF_FAILED);
                        goto unlock_mutex;
                }
                VM_BUG_ON_PAGE(!page_count(p), p);
@@ -2709,7 +2733,7 @@ static int soft_offline_in_use_page(struct page *page)
        };
 
        if (!huge && folio_test_large(folio)) {
-               if (try_to_split_thp_page(page)) {
+               if (try_to_split_thp_page(page, true)) {
                        pr_info("soft offline: %#lx: thp split failed\n", pfn);
                        return -EBUSY;
                }