]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
mm/khugepaged: retry with sync writeback for MADV_COLLAPSE
authorShivank Garg <shivankg@amd.com>
Sun, 18 Jan 2026 19:09:43 +0000 (19:09 +0000)
committerAndrew Morton <akpm@linux-foundation.org>
Tue, 27 Jan 2026 04:02:12 +0000 (20:02 -0800)
When MADV_COLLAPSE is called on file-backed mappings (e.g., executable
text sections), the pages may still be dirty from recent writes.
collapse_file() will trigger async writeback and fail with
SCAN_PAGE_DIRTY_OR_WRITEBACK (-EAGAIN).

MADV_COLLAPSE is a synchronous operation where userspace expects immediate
results.  If the collapse fails due to dirty pages, perform synchronous
writeback on the specific range and retry once.

This avoids spurious failures for freshly written executables while
avoiding unnecessary synchronous I/O for mappings that are already clean.

Link: https://lkml.kernel.org/r/20260118190939.8986-7-shivankg@amd.com
Signed-off-by: Shivank Garg <shivankg@amd.com>
Reported-by: Branden Moore <Branden.Moore@amd.com>
Closes: https://lore.kernel.org/all/4e26fe5e-7374-467c-a333-9dd48f85d7cc@amd.com
Fixes: 34488399fa08 ("mm/madvise: add file and shmem support to MADV_COLLAPSE")
Suggested-by: David Hildenbrand <david@kernel.org>
Tested-by: Lance Yang <lance.yang@linux.dev>
Acked-by: David Hildenbrand (Red Hat) <david@kernel.org>
Reviewed-by: Dev Jain <dev.jain@arm.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: wang lian <lianux.mm@gmail.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/khugepaged.c

index 219dfa2e523c852b4f4a9407fbe9f52b226866c9..16582bdcb6ffd7b09bb9515dbfd3fbfa73abf643 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/dax.h>
 #include <linux/ksm.h>
 #include <linux/pgalloc.h>
+#include <linux/backing-dev.h>
 
 #include <asm/tlb.h>
 #include "internal.h"
@@ -2788,7 +2789,9 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
 
        for (addr = hstart; addr < hend; addr += HPAGE_PMD_SIZE) {
                int result = SCAN_FAIL;
+               bool triggered_wb = false;
 
+retry:
                if (!mmap_locked) {
                        cond_resched();
                        mmap_read_lock(mm);
@@ -2809,8 +2812,20 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
 
                        mmap_read_unlock(mm);
                        mmap_locked = false;
+                       *lock_dropped = true;
                        result = hpage_collapse_scan_file(mm, addr, file, pgoff,
                                                          cc);
+
+                       if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && !triggered_wb &&
+                           mapping_can_writeback(file->f_mapping)) {
+                               loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
+                               loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
+
+                               filemap_write_and_wait_range(file->f_mapping, lstart, lend);
+                               triggered_wb = true;
+                               fput(file);
+                               goto retry;
+                       }
                        fput(file);
                } else {
                        result = hpage_collapse_scan_pmd(mm, vma, addr,