]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
mm/memory-failure: fix missing ->mf_stats count in hugetlb poison
authorJane Chu <jane.chu@oracle.com>
Tue, 20 Jan 2026 23:22:33 +0000 (16:22 -0700)
committerAndrew Morton <akpm@linux-foundation.org>
Tue, 27 Jan 2026 03:03:46 +0000 (19:03 -0800)
When a newly poisoned subpage ends up in an already poisoned hugetlb
folio, 'num_poisoned_pages' is incremented, but the per node ->mf_stats is
not.  Fix the inconsistency by designating action_result() to update them
both.

While at it, define __get_huge_page_for_hwpoison() return values in terms
of symbol names for better readibility.  Also rename
folio_set_hugetlb_hwpoison() to hugetlb_update_hwpoison() since the
function does more than the conventional bit setting and the fact three
possible return values are expected.

Link: https://lkml.kernel.org/r/20260120232234.3462258-1-jane.chu@oracle.com
Fixes: 18f41fa616ee ("mm: memory-failure: bump memory failure stats to pglist_data")
Signed-off-by: Jane Chu <jane.chu@oracle.com>
Acked-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: Chris Mason <clm@meta.com>
Cc: David Hildenbrand <david@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Jiaqi Yan <jiaqiyan@google.com>
Cc: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: William Roche <william.roche@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/memory-failure.c

index c80c2907da3332e0210da7bbb1888634b2afd280..473204359e1f6287b6828f05a41c66b7d09c132c 100644 (file)
@@ -1883,12 +1883,22 @@ static unsigned long __folio_free_raw_hwp(struct folio *folio, bool move_flag)
        return count;
 }
 
-static int folio_set_hugetlb_hwpoison(struct folio *folio, struct page *page)
+#define        MF_HUGETLB_FREED                0       /* freed hugepage */
+#define        MF_HUGETLB_IN_USED              1       /* in-use hugepage */
+#define        MF_HUGETLB_NON_HUGEPAGE         2       /* not a hugepage */
+#define        MF_HUGETLB_FOLIO_PRE_POISONED   3       /* folio already poisoned */
+#define        MF_HUGETLB_PAGE_PRE_POISONED    4       /* exact page already poisoned */
+#define        MF_HUGETLB_RETRY                5       /* hugepage is busy, retry */
+/*
+ * Set hugetlb folio as hwpoisoned, update folio private raw hwpoison list
+ * to keep track of the poisoned pages.
+ */
+static int hugetlb_update_hwpoison(struct folio *folio, struct page *page)
 {
        struct llist_head *head;
        struct raw_hwp_page *raw_hwp;
        struct raw_hwp_page *p;
-       int ret = folio_test_set_hwpoison(folio) ? -EHWPOISON : 0;
+       int ret = folio_test_set_hwpoison(folio) ? MF_HUGETLB_FOLIO_PRE_POISONED : 0;
 
        /*
         * Once the hwpoison hugepage has lost reliable raw error info,
@@ -1896,20 +1906,17 @@ static int folio_set_hugetlb_hwpoison(struct folio *folio, struct page *page)
         * so skip to add additional raw error info.
         */
        if (folio_test_hugetlb_raw_hwp_unreliable(folio))
-               return -EHWPOISON;
+               return MF_HUGETLB_FOLIO_PRE_POISONED;
        head = raw_hwp_list_head(folio);
        llist_for_each_entry(p, head->first, node) {
                if (p->page == page)
-                       return -EHWPOISON;
+                       return MF_HUGETLB_PAGE_PRE_POISONED;
        }
 
        raw_hwp = kmalloc(sizeof(struct raw_hwp_page), GFP_ATOMIC);
        if (raw_hwp) {
                raw_hwp->page = page;
                llist_add(&raw_hwp->node, head);
-               /* the first error event will be counted in action_result(). */
-               if (ret)
-                       num_poisoned_pages_inc(page_to_pfn(page));
        } else {
                /*
                 * Failed to save raw error info.  We no longer trace all
@@ -1957,42 +1964,39 @@ void folio_clear_hugetlb_hwpoison(struct folio *folio)
 
 /*
  * Called from hugetlb code with hugetlb_lock held.
- *
- * Return values:
- *   0             - free hugepage
- *   1             - in-use hugepage
- *   2             - not a hugepage
- *   -EBUSY        - the hugepage is busy (try to retry)
- *   -EHWPOISON    - the hugepage is already hwpoisoned
  */
 int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
                                 bool *migratable_cleared)
 {
        struct page *page = pfn_to_page(pfn);
        struct folio *folio = page_folio(page);
-       int ret = 2;    /* fallback to normal page handling */
        bool count_increased = false;
+       int ret, rc;
 
-       if (!folio_test_hugetlb(folio))
+       if (!folio_test_hugetlb(folio)) {
+               ret = MF_HUGETLB_NON_HUGEPAGE;
                goto out;
-
-       if (flags & MF_COUNT_INCREASED) {
-               ret = 1;
+       } else if (flags & MF_COUNT_INCREASED) {
+               ret = MF_HUGETLB_IN_USED;
                count_increased = true;
        } else if (folio_test_hugetlb_freed(folio)) {
-               ret = 0;
+               ret = MF_HUGETLB_FREED;
        } else if (folio_test_hugetlb_migratable(folio)) {
-               ret = folio_try_get(folio);
-               if (ret)
+               if (folio_try_get(folio)) {
+                       ret = MF_HUGETLB_IN_USED;
                        count_increased = true;
+               } else {
+                       ret = MF_HUGETLB_FREED;
+               }
        } else {
-               ret = -EBUSY;
+               ret = MF_HUGETLB_RETRY;
                if (!(flags & MF_NO_RETRY))
                        goto out;
        }
 
-       if (folio_set_hugetlb_hwpoison(folio, page)) {
-               ret = -EHWPOISON;
+       rc = hugetlb_update_hwpoison(folio, page);
+       if (rc >= MF_HUGETLB_FOLIO_PRE_POISONED) {
+               ret = rc;
                goto out;
        }
 
@@ -2017,10 +2021,16 @@ out:
  * with basic operations like hugepage allocation/free/demotion.
  * So some of prechecks for hwpoison (pinning, and testing/setting
  * PageHWPoison) should be done in single hugetlb_lock range.
+ * Returns:
+ *     0               - not hugetlb, or recovered
+ *     -EBUSY          - not recovered
+ *     -EOPNOTSUPP     - hwpoison_filter'ed
+ *     -EHWPOISON      - folio or exact page already poisoned
+ *     -EFAULT         - kill_accessing_process finds current->mm null
  */
 static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb)
 {
-       int res;
+       int res, rv;
        struct page *p = pfn_to_page(pfn);
        struct folio *folio;
        unsigned long page_flags;
@@ -2029,22 +2039,31 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
        *hugetlb = 1;
 retry:
        res = get_huge_page_for_hwpoison(pfn, flags, &migratable_cleared);
-       if (res == 2) { /* fallback to normal page handling */
+       switch (res) {
+       case MF_HUGETLB_NON_HUGEPAGE:   /* fallback to normal page handling */
                *hugetlb = 0;
                return 0;
-       } else if (res == -EHWPOISON) {
-               if (flags & MF_ACTION_REQUIRED) {
-                       folio = page_folio(p);
-                       res = kill_accessing_process(current, folio_pfn(folio), flags);
-               }
-               action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED);
-               return res;
-       } else if (res == -EBUSY) {
+       case MF_HUGETLB_RETRY:
                if (!(flags & MF_NO_RETRY)) {
                        flags |= MF_NO_RETRY;
                        goto retry;
                }
                return action_result(pfn, MF_MSG_GET_HWPOISON, MF_IGNORED);
+       case MF_HUGETLB_FOLIO_PRE_POISONED:
+       case MF_HUGETLB_PAGE_PRE_POISONED:
+               rv = -EHWPOISON;
+               if (flags & MF_ACTION_REQUIRED) {
+                       folio = page_folio(p);
+                       rv = kill_accessing_process(current, folio_pfn(folio), flags);
+               }
+               if (res == MF_HUGETLB_PAGE_PRE_POISONED)
+                       action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED);
+               else
+                       action_result(pfn, MF_MSG_HUGE, MF_FAILED);
+               return rv;
+       default:
+               WARN_ON((res != MF_HUGETLB_FREED) && (res != MF_HUGETLB_IN_USED));
+               break;
        }
 
        folio = page_folio(p);
@@ -2055,7 +2074,7 @@ retry:
                if (migratable_cleared)
                        folio_set_hugetlb_migratable(folio);
                folio_unlock(folio);
-               if (res == 1)
+               if (res == MF_HUGETLB_IN_USED)
                        folio_put(folio);
                return -EOPNOTSUPP;
        }
@@ -2064,7 +2083,7 @@ retry:
         * Handling free hugepage.  The possible race with hugepage allocation
         * or demotion can be prevented by PageHWPoison flag.
         */
-       if (res == 0) {
+       if (res == MF_HUGETLB_FREED) {
                folio_unlock(folio);
                if (__page_handle_poison(p) > 0) {
                        page_ref_inc(p);