]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
mm/ksm: refactor out try_to_merge_with_zero_page()
authorChengming Zhou <chengming.zhou@linux.dev>
Fri, 21 Jun 2024 07:54:29 +0000 (15:54 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Fri, 5 Jul 2024 01:05:50 +0000 (18:05 -0700)
Patch series "mm/ksm: cmp_and_merge_page() optimizations and cleanup", v2.

This series mainly optimizes cmp_and_merge_page() to have more efficient
separate code flow for ksm page and non-ksm anon page.

- ksm page: don't need to calculate the checksum obviously.
- anon page: don't need to search stable tree if changing fast and try
  to merge with zero page before searching ksm page on stable tree.

Please see the patch-2 for details.

Patch-3 is cleanup also a little optimization for the chain()/chain_prune
interfaces, which made the stable_tree_search()/stable_tree_insert() over
complex.

I have done simple testing using "hackbench -g 1 -l 300000" (maybe I need
to use a better workload) on my machine, have seen a little CPU usage
decrease of ksmd and some improvements of cmp_and_merge_page() latency:

We can see the latency of cmp_and_merge_page() when handling non-ksm anon
pages has been improved.

This patch (of 3):

In preparation for later changes, refactor out a new function called
try_to_merge_with_zero_page(), which tries to merge with zero page.

Link: https://lkml.kernel.org/r/20240621-b4-ksm-scan-optimize-v2-0-1c328aa9e30b@linux.dev
Link: https://lkml.kernel.org/r/20240621-b4-ksm-scan-optimize-v2-1-1c328aa9e30b@linux.dev
Signed-off-by: Chengming Zhou <chengming.zhou@linux.dev>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Stefan Roesch <shr@devkernel.io>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/hugetlb.c
mm/ksm.c

index c52810d82ee3f0eb780ede8592611d889bca886d..9691624fcb7925a2c139fcb95c2194bc96f5862c 100644 (file)
@@ -2666,7 +2666,6 @@ static int gather_surplus_pages(struct hstate *h, long delta)
 retry:
        spin_unlock_irq(&hugetlb_lock);
        for (i = 0; i < needed; i++) {
-               folio = NULL;
                for_each_node_mask(node, cpuset_current_mems_allowed) {
                        if (!mbind_nodemask || node_isset(node, *mbind_nodemask)) {
                                folio = alloc_surplus_hugetlb_folio(h, htlb_alloc_mask(h),
index acf250791b4e198732107f82204dd0f334f9aa56..dd9ed0bdb9f6936d7c5c3e000f8f5eb9c9c2fb36 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1527,6 +1527,44 @@ out:
        return err;
 }
 
+/*
+ * This function returns 0 if the pages were merged or if they are
+ * no longer merging candidates (e.g., VMA stale), -EFAULT otherwise.
+ */
+static int try_to_merge_with_zero_page(struct ksm_rmap_item *rmap_item,
+                                      struct page *page)
+{
+       struct mm_struct *mm = rmap_item->mm;
+       int err = -EFAULT;
+
+       /*
+        * Same checksum as an empty page. We attempt to merge it with the
+        * appropriate zero page if the user enabled this via sysfs.
+        */
+       if (ksm_use_zero_pages && (rmap_item->oldchecksum == zero_checksum)) {
+               struct vm_area_struct *vma;
+
+               mmap_read_lock(mm);
+               vma = find_mergeable_vma(mm, rmap_item->address);
+               if (vma) {
+                       err = try_to_merge_one_page(vma, page,
+                                       ZERO_PAGE(rmap_item->address));
+                       trace_ksm_merge_one_page(
+                               page_to_pfn(ZERO_PAGE(rmap_item->address)),
+                               rmap_item, mm, err);
+               } else {
+                       /*
+                        * If the vma is out of date, we do not need to
+                        * continue.
+                        */
+                       err = 0;
+               }
+               mmap_read_unlock(mm);
+       }
+
+       return err;
+}
+
 /*
  * try_to_merge_with_ksm_page - like try_to_merge_two_pages,
  * but no new kernel page is allocated: kpage must already be a ksm page.
@@ -2302,7 +2340,6 @@ static void stable_tree_append(struct ksm_rmap_item *rmap_item,
  */
 static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_item)
 {
-       struct mm_struct *mm = rmap_item->mm;
        struct ksm_rmap_item *tree_rmap_item;
        struct page *tree_page = NULL;
        struct ksm_stable_node *stable_node;
@@ -2371,36 +2408,9 @@ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_ite
                return;
        }
 
-       /*
-        * Same checksum as an empty page. We attempt to merge it with the
-        * appropriate zero page if the user enabled this via sysfs.
-        */
-       if (ksm_use_zero_pages && (checksum == zero_checksum)) {
-               struct vm_area_struct *vma;
+       if (!try_to_merge_with_zero_page(rmap_item, page))
+               return;
 
-               mmap_read_lock(mm);
-               vma = find_mergeable_vma(mm, rmap_item->address);
-               if (vma) {
-                       err = try_to_merge_one_page(vma, page,
-                                       ZERO_PAGE(rmap_item->address));
-                       trace_ksm_merge_one_page(
-                               page_to_pfn(ZERO_PAGE(rmap_item->address)),
-                               rmap_item, mm, err);
-               } else {
-                       /*
-                        * If the vma is out of date, we do not need to
-                        * continue.
-                        */
-                       err = 0;
-               }
-               mmap_read_unlock(mm);
-               /*
-                * In case of failure, the page was not really empty, so we
-                * need to continue. Otherwise we're done.
-                */
-               if (!err)
-                       return;
-       }
        tree_rmap_item =
                unstable_tree_search_insert(rmap_item, page, &tree_page);
        if (tree_rmap_item) {