From: Usama Arif Date: Fri, 30 Aug 2024 10:03:40 +0000 (+0100) Subject: mm: add sysfs entry to disable splitting underused THPs X-Git-Tag: v6.12-rc1~115^2~92 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=81d3ff3c6f76306b22138d69e980634f53bf17fa;p=thirdparty%2Fkernel%2Flinux.git mm: add sysfs entry to disable splitting underused THPs If disabled, THPs faulted in or collapsed will not be added to _deferred_list, and therefore won't be considered for splitting under memory pressure if underused. Link: https://lkml.kernel.org/r/20240830100438.3623486-7-usamaarif642@gmail.com Signed-off-by: Usama Arif Cc: Alexander Zhu Cc: Barry Song Cc: David Hildenbrand Cc: Domenico Cerasuolo Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kairui Song Cc: Matthew Wilcox Cc: Mike Rapoport Cc: Nico Pache Cc: Rik van Riel Cc: Roman Gushchin Cc: Ryan Roberts Cc: Shakeel Butt Cc: Shuang Zhai Cc: Shuang Zhai Cc: Yu Zhao Cc: Hugh Dickins Signed-off-by: Andrew Morton --- diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst index aca0cff852b85..cfdd16a52e39f 100644 --- a/Documentation/admin-guide/mm/transhuge.rst +++ b/Documentation/admin-guide/mm/transhuge.rst @@ -202,6 +202,16 @@ PMD-mappable transparent hugepage:: cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size +All THPs at fault and collapse time will be added to _deferred_list, +and will therefore be split under memory presure if they are considered +"underused". A THP is underused if the number of zero-filled pages in +the THP is above max_ptes_none (see below). It is possible to disable +this behaviour by writing 0 to shrink_underused, and enable it by writing +1 to it:: + + echo 0 > /sys/kernel/mm/transparent_hugepage/shrink_underused + echo 1 > /sys/kernel/mm/transparent_hugepage/shrink_underused + khugepaged will be automatically started when PMD-sized THP is enabled (either of the per-size anon control or the top-level control are set to "always" or "madvise"), and it'll be automatically shutdown when diff --git a/mm/huge_memory.c b/mm/huge_memory.c index cec5bce046a0d..691702e39f851 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -74,6 +74,7 @@ static unsigned long deferred_split_count(struct shrinker *shrink, struct shrink_control *sc); static unsigned long deferred_split_scan(struct shrinker *shrink, struct shrink_control *sc); +static bool split_underused_thp = true; static atomic_t huge_zero_refcount; struct folio *huge_zero_folio __read_mostly; @@ -440,6 +441,27 @@ static ssize_t hpage_pmd_size_show(struct kobject *kobj, static struct kobj_attribute hpage_pmd_size_attr = __ATTR_RO(hpage_pmd_size); +static ssize_t split_underused_thp_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%d\n", split_underused_thp); +} + +static ssize_t split_underused_thp_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int err = kstrtobool(buf, &split_underused_thp); + + if (err < 0) + return err; + + return count; +} + +static struct kobj_attribute split_underused_thp_attr = __ATTR( + shrink_underused, 0644, split_underused_thp_show, split_underused_thp_store); + static struct attribute *hugepage_attr[] = { &enabled_attr.attr, &defrag_attr.attr, @@ -448,6 +470,7 @@ static struct attribute *hugepage_attr[] = { #ifdef CONFIG_SHMEM &shmem_enabled_attr.attr, #endif + &split_underused_thp_attr.attr, NULL, }; @@ -3557,6 +3580,9 @@ void deferred_split_folio(struct folio *folio, bool partially_mapped) if (folio_order(folio) <= 1) return; + if (!partially_mapped && !split_underused_thp) + return; + /* * The try_to_unmap() in page reclaim path might reach here too, * this may cause a race condition to corrupt deferred split queue.