]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
mm: page_frag: reuse existing space for 'size' and 'pfmemalloc'
authorYunsheng Lin <linyunsheng@huawei.com>
Mon, 28 Oct 2024 11:53:41 +0000 (19:53 +0800)
committerJakub Kicinski <kuba@kernel.org>
Mon, 11 Nov 2024 18:56:27 +0000 (10:56 -0800)
Currently there is one 'struct page_frag' for every 'struct
sock' and 'struct task_struct', we are about to replace the
'struct page_frag' with 'struct page_frag_cache' for them.
Before begin the replacing, we need to ensure the size of
'struct page_frag_cache' is not bigger than the size of
'struct page_frag', as there may be tens of thousands of
'struct sock' and 'struct task_struct' instances in the
system.

By or'ing the page order & pfmemalloc with lower bits of
'va' instead of using 'u16' or 'u32' for page size and 'u8'
for pfmemalloc, we are able to avoid 3 or 5 bytes space waste.
And page address & pfmemalloc & order is unchanged for the
same page in the same 'page_frag_cache' instance, it makes
sense to fit them together.

After this patch, the size of 'struct page_frag_cache' should be
the same as the size of 'struct page_frag'.

CC: Andrew Morton <akpm@linux-foundation.org>
CC: Linux-MM <linux-mm@kvack.org>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Link: https://patch.msgid.link/20241028115343.3405838-7-linyunsheng@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/linux/mm_types_task.h
include/linux/page_frag_cache.h
mm/page_frag_cache.c

index 0ac6daebdd5cd3bd65ab7e746ff8f2f66d411036..a82aa80c0ba46e52de7fd463304dc99a2e62160d 100644 (file)
@@ -47,18 +47,21 @@ struct page_frag {
 #define PAGE_FRAG_CACHE_MAX_SIZE       __ALIGN_MASK(32768, ~PAGE_MASK)
 #define PAGE_FRAG_CACHE_MAX_ORDER      get_order(PAGE_FRAG_CACHE_MAX_SIZE)
 struct page_frag_cache {
-       void *va;
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+       /* encoded_page consists of the virtual address, pfmemalloc bit and
+        * order of a page.
+        */
+       unsigned long encoded_page;
+
+       /* we maintain a pagecount bias, so that we dont dirty cache line
+        * containing page->_refcount every time we allocate a fragment.
+        */
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32)
        __u16 offset;
-       __u16 size;
+       __u16 pagecnt_bias;
 #else
        __u32 offset;
+       __u32 pagecnt_bias;
 #endif
-       /* we maintain a pagecount bias, so that we dont dirty cache line
-        * containing page->_refcount every time we allocate a fragment.
-        */
-       unsigned int            pagecnt_bias;
-       bool pfmemalloc;
 };
 
 /* Track pages that require TLB flushes */
index 0a52f7a179c8f9c1400fb92990aa07142b5b0548..41a91df826310ee1e8b8794b10818d72309d74ed 100644 (file)
@@ -3,18 +3,38 @@
 #ifndef _LINUX_PAGE_FRAG_CACHE_H
 #define _LINUX_PAGE_FRAG_CACHE_H
 
+#include <linux/bits.h>
 #include <linux/log2.h>
 #include <linux/mm_types_task.h>
 #include <linux/types.h>
 
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+/* Use a full byte here to enable assembler optimization as the shift
+ * operation is usually expecting a byte.
+ */
+#define PAGE_FRAG_CACHE_ORDER_MASK             GENMASK(7, 0)
+#else
+/* Compiler should be able to figure out we don't read things as any value
+ * ANDed with 0 is 0.
+ */
+#define PAGE_FRAG_CACHE_ORDER_MASK             0
+#endif
+
+#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT         (PAGE_FRAG_CACHE_ORDER_MASK + 1)
+
+static inline bool encoded_page_decode_pfmemalloc(unsigned long encoded_page)
+{
+       return !!(encoded_page & PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
+}
+
 static inline void page_frag_cache_init(struct page_frag_cache *nc)
 {
-       nc->va = NULL;
+       nc->encoded_page = 0;
 }
 
 static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc)
 {
-       return !!nc->pfmemalloc;
+       return encoded_page_decode_pfmemalloc(nc->encoded_page);
 }
 
 void page_frag_cache_drain(struct page_frag_cache *nc);
index 4c8e04379cb3e11ea1563b4c6ab7fc7753046a0e..a36fd09bf275ab56d717d78ecdb791ff2fde8697 100644 (file)
@@ -12,6 +12,7 @@
  * be used in the "frags" portion of skb_shared_info.
  */
 
+#include <linux/build_bug.h>
 #include <linux/export.h>
 #include <linux/gfp_types.h>
 #include <linux/init.h>
 #include <linux/page_frag_cache.h>
 #include "internal.h"
 
+static unsigned long encoded_page_create(struct page *page, unsigned int order,
+                                        bool pfmemalloc)
+{
+       BUILD_BUG_ON(PAGE_FRAG_CACHE_MAX_ORDER > PAGE_FRAG_CACHE_ORDER_MASK);
+       BUILD_BUG_ON(PAGE_FRAG_CACHE_PFMEMALLOC_BIT >= PAGE_SIZE);
+
+       return (unsigned long)page_address(page) |
+               (order & PAGE_FRAG_CACHE_ORDER_MASK) |
+               ((unsigned long)pfmemalloc * PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
+}
+
+static unsigned long encoded_page_decode_order(unsigned long encoded_page)
+{
+       return encoded_page & PAGE_FRAG_CACHE_ORDER_MASK;
+}
+
+static void *encoded_page_decode_virt(unsigned long encoded_page)
+{
+       return (void *)(encoded_page & PAGE_MASK);
+}
+
+static struct page *encoded_page_decode_page(unsigned long encoded_page)
+{
+       return virt_to_page((void *)encoded_page);
+}
+
 static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
                                             gfp_t gfp_mask)
 {
+       unsigned long order = PAGE_FRAG_CACHE_MAX_ORDER;
        struct page *page = NULL;
        gfp_t gfp = gfp_mask;
 
@@ -30,23 +58,26 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
                   __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
        page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
                                PAGE_FRAG_CACHE_MAX_ORDER);
-       nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
 #endif
-       if (unlikely(!page))
+       if (unlikely(!page)) {
                page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
+               order = 0;
+       }
 
-       nc->va = page ? page_address(page) : NULL;
+       nc->encoded_page = page ?
+               encoded_page_create(page, order, page_is_pfmemalloc(page)) : 0;
 
        return page;
 }
 
 void page_frag_cache_drain(struct page_frag_cache *nc)
 {
-       if (!nc->va)
+       if (!nc->encoded_page)
                return;
 
-       __page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
-       nc->va = NULL;
+       __page_frag_cache_drain(encoded_page_decode_page(nc->encoded_page),
+                               nc->pagecnt_bias);
+       nc->encoded_page = 0;
 }
 EXPORT_SYMBOL(page_frag_cache_drain);
 
@@ -63,35 +94,29 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
                              unsigned int fragsz, gfp_t gfp_mask,
                              unsigned int align_mask)
 {
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-       unsigned int size = nc->size;
-#else
-       unsigned int size = PAGE_SIZE;
-#endif
-       unsigned int offset;
+       unsigned long encoded_page = nc->encoded_page;
+       unsigned int size, offset;
        struct page *page;
 
-       if (unlikely(!nc->va)) {
+       if (unlikely(!encoded_page)) {
 refill:
                page = __page_frag_cache_refill(nc, gfp_mask);
                if (!page)
                        return NULL;
 
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-               /* if size can vary use size else just use PAGE_SIZE */
-               size = nc->size;
-#endif
+               encoded_page = nc->encoded_page;
+
                /* Even if we own the page, we do not use atomic_set().
                 * This would break get_page_unless_zero() users.
                 */
                page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
 
                /* reset page count bias and offset to start of new frag */
-               nc->pfmemalloc = page_is_pfmemalloc(page);
                nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
                nc->offset = 0;
        }
 
+       size = PAGE_SIZE << encoded_page_decode_order(encoded_page);
        offset = __ALIGN_KERNEL_MASK(nc->offset, ~align_mask);
        if (unlikely(offset + fragsz > size)) {
                if (unlikely(fragsz > PAGE_SIZE)) {
@@ -107,13 +132,14 @@ refill:
                        return NULL;
                }
 
-               page = virt_to_page(nc->va);
+               page = encoded_page_decode_page(encoded_page);
 
                if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
                        goto refill;
 
-               if (unlikely(nc->pfmemalloc)) {
-                       free_unref_page(page, compound_order(page));
+               if (unlikely(encoded_page_decode_pfmemalloc(encoded_page))) {
+                       free_unref_page(page,
+                                       encoded_page_decode_order(encoded_page));
                        goto refill;
                }
 
@@ -128,7 +154,7 @@ refill:
        nc->pagecnt_bias--;
        nc->offset = offset + fragsz;
 
-       return nc->va + offset;
+       return encoded_page_decode_virt(encoded_page) + offset;
 }
 EXPORT_SYMBOL(__page_frag_alloc_align);