]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
Merge tag 'mm-stable-2025-12-03-21-26' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 5 Dec 2025 21:52:43 +0000 (13:52 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 5 Dec 2025 21:52:43 +0000 (13:52 -0800)
Pull MM updates from Andrew Morton:

  "__vmalloc()/kvmalloc() and no-block support" (Uladzislau Rezki)
     Rework the vmalloc() code to support non-blocking allocations
     (GFP_ATOIC, GFP_NOWAIT)

  "ksm: fix exec/fork inheritance" (xu xin)
     Fix a rare case where the KSM MMF_VM_MERGE_ANY prctl state is not
     inherited across fork/exec

  "mm/zswap: misc cleanup of code and documentations" (SeongJae Park)
     Some light maintenance work on the zswap code

  "mm/page_owner: add debugfs files 'show_handles' and 'show_stacks_handles'" (Mauricio Faria de Oliveira)
     Enhance the /sys/kernel/debug/page_owner debug feature by adding
     unique identifiers to differentiate the various stack traces so
     that userspace monitoring tools can better match stack traces over
     time

  "mm/page_alloc: pcp->batch cleanups" (Joshua Hahn)
     Minor alterations to the page allocator's per-cpu-pages feature

  "Improve UFFDIO_MOVE scalability by removing anon_vma lock" (Lokesh Gidra)
     Address a scalability issue in userfaultfd's UFFDIO_MOVE operation

  "kasan: cleanups for kasan_enabled() checks" (Sabyrzhan Tasbolatov)

  "drivers/base/node: fold node register and unregister functions" (Donet Tom)
     Clean up the NUMA node handling code a little

  "mm: some optimizations for prot numa" (Kefeng Wang)
     Cleanups and small optimizations to the NUMA allocation hinting
     code

  "mm/page_alloc: Batch callers of free_pcppages_bulk" (Joshua Hahn)
     Address long lock hold times at boot on large machines. These were
     causing (harmless) softlockup warnings

  "optimize the logic for handling dirty file folios during reclaim" (Baolin Wang)
     Remove some now-unnecessary work from page reclaim

  "mm/damon: allow DAMOS auto-tuned for per-memcg per-node memory usage" (SeongJae Park)
     Enhance the DAMOS auto-tuning feature

  "mm/damon: fixes for address alignment issues in DAMON_LRU_SORT and DAMON_RECLAIM" (Quanmin Yan)
     Fix DAMON_LRU_SORT and DAMON_RECLAIM with certain userspace
     configuration

  "expand mmap_prepare functionality, port more users" (Lorenzo Stoakes)
     Enhance the new(ish) file_operations.mmap_prepare() method and port
     additional callsites from the old ->mmap() over to ->mmap_prepare()

  "Fix stale IOTLB entries for kernel address space" (Lu Baolu)
     Fix a bug (and possible security issue on non-x86) in the IOMMU
     code. In some situations the IOMMU could be left hanging onto a
     stale kernel pagetable entry

  "mm/huge_memory: cleanup __split_unmapped_folio()" (Wei Yang)
     Clean up and optimize the folio splitting code

  "mm, swap: misc cleanup and bugfix" (Kairui Song)
     Some cleanups and a minor fix in the swap discard code

  "mm/damon: misc documentation fixups" (SeongJae Park)

  "mm/damon: support pin-point targets removal" (SeongJae Park)
     Permit userspace to remove a specific monitoring target in the
     middle of the current targets list

  "mm: MISC follow-up patches for linux/pgalloc.h" (Harry Yoo)
     A couple of cleanups related to mm header file inclusion

  "mm/swapfile.c: select swap devices of default priority round robin" (Baoquan He)
     improve the selection of swap devices for NUMA machines

  "mm: Convert memory block states (MEM_*) macros to enums" (Israel Batista)
     Change the memory block labels from macros to enums so they will
     appear in kernel debug info

  "ksm: perform a range-walk to jump over holes in break_ksm" (Pedro Demarchi Gomes)
     Address an inefficiency when KSM unmerges an address range

  "mm/damon/tests: fix memory bugs in kunit tests" (SeongJae Park)
     Fix leaks and unhandled malloc() failures in DAMON userspace unit
     tests

  "some cleanups for pageout()" (Baolin Wang)
     Clean up a couple of minor things in the page scanner's
     writeback-for-eviction code

  "mm/hugetlb: refactor sysfs/sysctl interfaces" (Hui Zhu)
     Move hugetlb's sysfs/sysctl handling code into a new file

  "introduce VM_MAYBE_GUARD and make it sticky" (Lorenzo Stoakes)
     Make the VMA guard regions available in /proc/pid/smaps and
     improves the mergeability of guarded VMAs

  "mm: perform guard region install/remove under VMA lock" (Lorenzo Stoakes)
     Reduce mmap lock contention for callers performing VMA guard region
     operations

  "vma_start_write_killable" (Matthew Wilcox)
     Start work on permitting applications to be killed when they are
     waiting on a read_lock on the VMA lock

  "mm/damon/tests: add more tests for online parameters commit" (SeongJae Park)
     Add additional userspace testing of DAMON's "commit" feature

  "mm/damon: misc cleanups" (SeongJae Park)

  "make VM_SOFTDIRTY a sticky VMA flag" (Lorenzo Stoakes)
     Address the possible loss of a VMA's VM_SOFTDIRTY flag when that
     VMA is merged with another

  "mm: support device-private THP" (Balbir Singh)
     Introduce support for Transparent Huge Page (THP) migration in zone
     device-private memory

  "Optimize folio split in memory failure" (Zi Yan)

  "mm/huge_memory: Define split_type and consolidate split support checks" (Wei Yang)
     Some more cleanups in the folio splitting code

  "mm: remove is_swap_[pte, pmd]() + non-swap entries, introduce leaf entries" (Lorenzo Stoakes)
     Clean up our handling of pagetable leaf entries by introducing the
     concept of 'software leaf entries', of type softleaf_t

  "reparent the THP split queue" (Muchun Song)
     Reparent the THP split queue to its parent memcg. This is in
     preparation for addressing the long-standing "dying memcg" problem,
     wherein dead memcg's linger for too long, consuming memory
     resources

  "unify PMD scan results and remove redundant cleanup" (Wei Yang)
     A little cleanup in the hugepage collapse code

  "zram: introduce writeback bio batching" (Sergey Senozhatsky)
     Improve zram writeback efficiency by introducing batched bio
     writeback support

  "memcg: cleanup the memcg stats interfaces" (Shakeel Butt)
     Clean up our handling of the interrupt safety of some memcg stats

  "make vmalloc gfp flags usage more apparent" (Vishal Moola)
     Clean up vmalloc's handling of incoming GFP flags

  "mm: Add soft-dirty and uffd-wp support for RISC-V" (Chunyan Zhang)
     Teach soft dirty and userfaultfd write protect tracking to use
     RISC-V's Svrsw60t59b extension

  "mm: swap: small fixes and comment cleanups" (Youngjun Park)
     Fix a small bug and clean up some of the swap code

  "initial work on making VMA flags a bitmap" (Lorenzo Stoakes)
     Start work on converting the vma struct's flags to a bitmap, so we
     stop running out of them, especially on 32-bit

  "mm/swapfile: fix and cleanup swap list iterations" (Youngjun Park)
     Address a possible bug in the swap discard code and clean things
     up a little

[ This merge also reverts commit ebb9aeb980e5 ("vfio/nvgrace-gpu:
  register device memory for poison handling") because it looks
  broken to me, I've asked for clarification   - Linus ]

* tag 'mm-stable-2025-12-03-21-26' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (321 commits)
  mm: fix vma_start_write_killable() signal handling
  mm/swapfile: use plist_for_each_entry in __folio_throttle_swaprate
  mm/swapfile: fix list iteration when next node is removed during discard
  fs/proc/task_mmu.c: fix make_uffd_wp_huge_pte() huge pte handling
  mm/kfence: add reboot notifier to disable KFENCE on shutdown
  memcg: remove inc/dec_lruvec_kmem_state helpers
  selftests/mm/uffd: initialize char variable to Null
  mm: fix DEBUG_RODATA_TEST indentation in Kconfig
  mm: introduce VMA flags bitmap type
  tools/testing/vma: eliminate dependency on vma->__vm_flags
  mm: simplify and rename mm flags function for clarity
  mm: declare VMA flags by bit
  zram: fix a spelling mistake
  mm/page_alloc: optimize lowmem_reserve max lookup using its semantic monotonicity
  mm/vmscan: skip increasing kswapd_failures when reclaim was boosted
  pagemap: update BUDDY flag documentation
  mm: swap: remove scan_swap_map_slots() references from comments
  mm: swap: change swap_alloc_slow() to void
  mm, swap: remove redundant comment for read_swap_cache_async
  mm, swap: use SWP_SOLIDSTATE to determine if swap is rotational
  ...

53 files changed:
1  2 
.clang-format
Documentation/admin-guide/cgroup-v2.rst
Documentation/filesystems/porting.rst
MAINTAINERS
arch/arm64/mm/mmu.c
arch/riscv/Kconfig
arch/s390/mm/gmap.c
arch/s390/mm/pgtable.c
arch/x86/Kconfig
arch/x86/kernel/cpu/sgx/driver.c
arch/x86/mm/init_64.c
drivers/acpi/apei/ghes.c
drivers/base/memory.c
drivers/firmware/efi/riscv-runtime.c
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
drivers/pci/p2pdma.c
drivers/s390/char/sclp_sd.c
fs/dax.c
fs/ntfs3/file.c
fs/userfaultfd.c
include/linux/fs.h
include/linux/huge_mm.h
include/linux/iommu.h
include/linux/memory.h
include/linux/memremap.h
include/linux/mm.h
include/linux/mm_types.h
include/linux/pgtable.h
include/linux/shmem_fs.h
include/net/sock.h
include/ras/ras_event.h
io_uring/memmap.c
kernel/bpf/syscall.c
kernel/cgroup/cgroup.c
kernel/fork.c
mm/filemap.c
mm/hmm.c
mm/huge_memory.c
mm/internal.h
mm/kasan/common.c
mm/kfence/core.c
mm/memcontrol.c
mm/memory.c
mm/memory_hotplug.c
mm/page-writeback.c
mm/page_alloc.c
mm/secretmem.c
mm/shmem.c
mm/slab_common.c
mm/slub.c
mm/vmscan.c
mm/workingset.c
rust/bindings/bindings_helper.h

diff --cc .clang-format
Simple merge
Simple merge
diff --cc MAINTAINERS
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index 981a76b6b7c0cc586e008b7f29ecd7febcdf201a,ee74b75d3e1f2c00fba5f33905fa493136e0ff82..4a2fc7ab42c349a15d397c5194bf3d732dd65658
@@@ -200,12 -200,13 +200,13 @@@ static const struct attribute_group p2p
        .name = "p2pmem",
  };
  
- static void p2pdma_page_free(struct page *page)
+ static void p2pdma_folio_free(struct folio *folio)
  {
+       struct page *page = &folio->page;
        struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page_pgmap(page));
        /* safe to dereference while a reference is held to the percpu ref */
 -      struct pci_p2pdma *p2pdma =
 -              rcu_dereference_protected(pgmap->provider->p2pdma, 1);
 +      struct pci_p2pdma *p2pdma = rcu_dereference_protected(
 +              to_pci_dev(pgmap->mem->owner)->p2pdma, 1);
        struct percpu_ref *ref;
  
        gen_pool_free_owner(p2pdma->pool, (uintptr_t)page_to_virt(page),
Simple merge
diff --cc fs/dax.c
Simple merge
diff --cc fs/ntfs3/file.c
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index ba1515160894e303c608470358c47cb3ed242115,ca3eb1db6cc87e56ca63692a59b1cab3da3209b8..faeaa921e55b270cb410f6078f453d7328ccacdc
@@@ -64,9 -64,21 +64,19 @@@ struct memory_group 
        };
  };
  
 -      MEM_PREPARE_ONLINE,
 -      MEM_FINISH_OFFLINE,
+ enum memory_block_state {
+       /* These states are exposed to userspace as text strings in sysfs */
+       MEM_ONLINE,             /* exposed to userspace */
+       MEM_GOING_OFFLINE,      /* exposed to userspace */
+       MEM_OFFLINE,            /* exposed to userspace */
+       MEM_GOING_ONLINE,
+       MEM_CANCEL_ONLINE,
+       MEM_CANCEL_OFFLINE,
+ };
  struct memory_block {
        unsigned long start_section_nr;
-       unsigned long state;            /* serialized by the dev->lock */
+       enum memory_block_state state;  /* serialized by the dev->lock */
        int online_type;                /* for passing data to online routine */
        int nid;                        /* NID for this memory block */
        /*
@@@ -89,15 -101,14 +99,7 @@@ int arch_get_memory_phys_device(unsigne
  unsigned long memory_block_size_bytes(void);
  int set_memory_block_size_order(unsigned int order);
  
- /* These states are exposed to userspace as text strings in sysfs */
- #define       MEM_ONLINE              (1<<0) /* exposed to userspace */
- #define       MEM_GOING_OFFLINE       (1<<1) /* exposed to userspace */
- #define       MEM_OFFLINE             (1<<2) /* exposed to userspace */
- #define       MEM_GOING_ONLINE        (1<<3)
- #define       MEM_CANCEL_ONLINE       (1<<4)
- #define       MEM_CANCEL_OFFLINE      (1<<5)
  struct memory_notify {
 -      /*
 -       * The altmap_start_pfn and altmap_nr_pages fields are designated for
 -       * specifying the altmap range and are exclusively intended for use in
 -       * MEM_PREPARE_ONLINE/MEM_FINISH_OFFLINE notifiers.
 -       */
 -      unsigned long altmap_start_pfn;
 -      unsigned long altmap_nr_pages;
        unsigned long start_pfn;
        unsigned long nr_pages;
  };
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc kernel/fork.c
Simple merge
diff --cc mm/filemap.c
Simple merge
diff --cc mm/hmm.c
Simple merge
index 1192e62531cd53b946d6c251dc9c16009b052c88,041b554c71158e398e21e6c1dc54858569aeb2af..f7c565f11a985ab1cbb3db96424f1fb12117e523
@@@ -1642,32 -1761,75 +1761,88 @@@ vm_fault_t vmf_insert_folio_pud(struct 
  EXPORT_SYMBOL_GPL(vmf_insert_folio_pud);
  #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
  
 -void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
 +/**
 + * touch_pmd - Mark page table pmd entry as accessed and dirty (for write)
 + * @vma: The VMA covering @addr
 + * @addr: The virtual address
 + * @pmd: pmd pointer into the page table mapping @addr
 + * @write: Whether it's a write access
 + *
 + * Return: whether the pmd entry is changed
 + */
 +bool touch_pmd(struct vm_area_struct *vma, unsigned long addr,
               pmd_t *pmd, bool write)
  {
 -      pmd_t _pmd;
 +      pmd_t entry;
  
 -      _pmd = pmd_mkyoung(*pmd);
 +      entry = pmd_mkyoung(*pmd);
        if (write)
 -              _pmd = pmd_mkdirty(_pmd);
 +              entry = pmd_mkdirty(entry);
        if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK,
 -                                pmd, _pmd, write))
 +                                pmd, entry, write)) {
                update_mmu_cache_pmd(vma, addr, pmd);
 +              return true;
 +      }
 +
 +      return false;
  }
  
+ static void copy_huge_non_present_pmd(
+               struct mm_struct *dst_mm, struct mm_struct *src_mm,
+               pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
+               struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
+               pmd_t pmd, pgtable_t pgtable)
+ {
+       softleaf_t entry = softleaf_from_pmd(pmd);
+       struct folio *src_folio;
+       VM_WARN_ON_ONCE(!pmd_is_valid_softleaf(pmd));
+       if (softleaf_is_migration_write(entry) ||
+           softleaf_is_migration_read_exclusive(entry)) {
+               entry = make_readable_migration_entry(swp_offset(entry));
+               pmd = swp_entry_to_pmd(entry);
+               if (pmd_swp_soft_dirty(*src_pmd))
+                       pmd = pmd_swp_mksoft_dirty(pmd);
+               if (pmd_swp_uffd_wp(*src_pmd))
+                       pmd = pmd_swp_mkuffd_wp(pmd);
+               set_pmd_at(src_mm, addr, src_pmd, pmd);
+       } else if (softleaf_is_device_private(entry)) {
+               /*
+                * For device private entries, since there are no
+                * read exclusive entries, writable = !readable
+                */
+               if (softleaf_is_device_private_write(entry)) {
+                       entry = make_readable_device_private_entry(swp_offset(entry));
+                       pmd = swp_entry_to_pmd(entry);
+                       if (pmd_swp_soft_dirty(*src_pmd))
+                               pmd = pmd_swp_mksoft_dirty(pmd);
+                       if (pmd_swp_uffd_wp(*src_pmd))
+                               pmd = pmd_swp_mkuffd_wp(pmd);
+                       set_pmd_at(src_mm, addr, src_pmd, pmd);
+               }
+               src_folio = softleaf_to_folio(entry);
+               VM_WARN_ON(!folio_test_large(src_folio));
+               folio_get(src_folio);
+               /*
+                * folio_try_dup_anon_rmap_pmd does not fail for
+                * device private entries.
+                */
+               folio_try_dup_anon_rmap_pmd(src_folio, &src_folio->page,
+                                           dst_vma, src_vma);
+       }
+       add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
+       mm_inc_nr_ptes(dst_mm);
+       pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
+       if (!userfaultfd_wp(dst_vma))
+               pmd = pmd_swp_clear_uffd_wp(pmd);
+       set_pmd_at(dst_mm, addr, dst_pmd, pmd);
+ }
  int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                  pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
                  struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
diff --cc mm/internal.h
Simple merge
Simple merge
Simple merge
diff --cc mm/memcontrol.c
Simple merge
diff --cc mm/memory.c
index aad432e71251c3a7e9bec44367d075c72b2ceca4,6675e87eb7dd74100aa447a86d7ca646bef3d232..2a55edc48a6562c7af235121eb6fb9abdd183697
@@@ -6332,34 -6344,36 +6371,39 @@@ retry_pud
        if (pmd_none(*vmf.pmd) &&
            thp_vma_allowable_order(vma, vm_flags, TVA_PAGEFAULT, PMD_ORDER)) {
                ret = create_huge_pmd(&vmf);
-               if (!(ret & VM_FAULT_FALLBACK))
+               if (ret & VM_FAULT_FALLBACK)
+                       goto fallback;
+               else
                        return ret;
-       } else {
-               vmf.orig_pmd = pmdp_get_lockless(vmf.pmd);
+       }
  
-               if (unlikely(is_swap_pmd(vmf.orig_pmd))) {
-                       VM_BUG_ON(thp_migration_supported() &&
-                                         !is_pmd_migration_entry(vmf.orig_pmd));
-                       if (is_pmd_migration_entry(vmf.orig_pmd))
-                               pmd_migration_entry_wait(mm, vmf.pmd);
-                       return 0;
-               }
-               if (pmd_trans_huge(vmf.orig_pmd)) {
-                       if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma))
-                               return do_huge_pmd_numa_page(&vmf);
+       vmf.orig_pmd = pmdp_get_lockless(vmf.pmd);
+       if (pmd_none(vmf.orig_pmd))
+               goto fallback;
  
-                       if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) &&
-                           !pmd_write(vmf.orig_pmd)) {
-                               ret = wp_huge_pmd(&vmf);
-                               if (!(ret & VM_FAULT_FALLBACK))
-                                       return ret;
-                       } else {
-                               vmf.ptl = pmd_lock(mm, vmf.pmd);
-                               if (!huge_pmd_set_accessed(&vmf))
-                                       fix_spurious_fault(&vmf, PGTABLE_LEVEL_PMD);
-                               spin_unlock(vmf.ptl);
-                               return 0;
-                       }
+       if (unlikely(!pmd_present(vmf.orig_pmd))) {
+               if (pmd_is_device_private_entry(vmf.orig_pmd))
+                       return do_huge_pmd_device_private(&vmf);
+               if (pmd_is_migration_entry(vmf.orig_pmd))
+                       pmd_migration_entry_wait(mm, vmf.pmd);
+               return 0;
+       }
+       if (pmd_trans_huge(vmf.orig_pmd)) {
+               if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma))
+                       return do_huge_pmd_numa_page(&vmf);
+               if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) &&
+                   !pmd_write(vmf.orig_pmd)) {
+                       ret = wp_huge_pmd(&vmf);
+                       if (!(ret & VM_FAULT_FALLBACK))
+                               return ret;
+               } else {
 -                      huge_pmd_set_accessed(&vmf);
++                      vmf.ptl = pmd_lock(mm, vmf.pmd);
++                      if (!huge_pmd_set_accessed(&vmf))
++                              fix_spurious_fault(&vmf, PGTABLE_LEVEL_PMD);
++                      spin_unlock(vmf.ptl);
+                       return 0;
                }
        }
  
Simple merge
Simple merge
diff --cc mm/page_alloc.c
Simple merge
diff --cc mm/secretmem.c
index f0ef4e198884377a64c2c2682b63eec19f29945c,37f6d1097853f7a74e646cf309738c41d1728fba..edf111e0a1bbba9d33278f991cdf1108d8fb0c4f
@@@ -224,7 -224,10 +224,7 @@@ err_free_inode
  
  SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
  {
-       /* make sure local flags do not confict with global fcntl.h */
 -      struct file *file;
 -      int fd, err;
 -
+       /* make sure local flags do not conflict with global fcntl.h */
        BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC);
  
        if (!secretmem_enable || !can_set_direct_map())
diff --cc mm/shmem.c
Simple merge
Simple merge
diff --cc mm/slub.c
Simple merge
diff --cc mm/vmscan.c
Simple merge
diff --cc mm/workingset.c
Simple merge
Simple merge