]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
lib: add allocation tagging support for memory allocation profiling
authorSuren Baghdasaryan <surenb@google.com>
Thu, 21 Mar 2024 16:36:35 +0000 (09:36 -0700)
committerAndrew Morton <akpm@linux-foundation.org>
Fri, 26 Apr 2024 03:55:52 +0000 (20:55 -0700)
Introduce CONFIG_MEM_ALLOC_PROFILING which provides definitions to easily
instrument memory allocators.  It registers an "alloc_tags" codetag type
with /proc/allocinfo interface to output allocation tag information when
the feature is enabled.

CONFIG_MEM_ALLOC_PROFILING_DEBUG is provided for debugging the memory
allocation profiling instrumentation.

Memory allocation profiling can be enabled or disabled at runtime using
/proc/sys/vm/mem_profiling sysctl when CONFIG_MEM_ALLOC_PROFILING_DEBUG=n.
CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT enables memory allocation
profiling by default.

[surenb@google.com: Documentation/filesystems/proc.rst: fix allocinfo title]
Link: https://lkml.kernel.org/r/20240326073813.727090-1-surenb@google.com
[surenb@google.com: do limited memory accounting for modules with ARCH_NEEDS_WEAK_PER_CPU]
Link: https://lkml.kernel.org/r/20240402180933.1663992-2-surenb@google.com
[klarasmodin@gmail.com: explicitly include irqflags.h in alloc_tag.h]
Link: https://lkml.kernel.org/r/20240407133252.173636-1-klarasmodin@gmail.com
[surenb@google.com: fix alloc_tag_init() to prevent passing NULL to PTR_ERR()]
Link: https://lkml.kernel.org/r/20240417003349.2520094-1-surenb@google.com
Link: https://lkml.kernel.org/r/20240321163705.3067592-14-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Co-developed-by: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Klara Modin <klarasmodin@gmail.com>
Tested-by: Kees Cook <keescook@chromium.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Alex Gaynor <alex.gaynor@gmail.com>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Andreas Hindborg <a.hindborg@samsung.com>
Cc: Benno Lossin <benno.lossin@proton.me>
Cc: "Björn Roy Baron" <bjorn3_gh@protonmail.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Gary Guo <gary@garyguo.net>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wedson Almeida Filho <wedsonaf@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Documentation/admin-guide/sysctl/vm.rst
Documentation/filesystems/proc.rst
include/asm-generic/codetag.lds.h [new file with mode: 0644]
include/asm-generic/vmlinux.lds.h
include/linux/alloc_tag.h [new file with mode: 0644]
include/linux/sched.h
lib/Kconfig.debug
lib/Makefile
lib/alloc_tag.c [new file with mode: 0644]
scripts/module.lds.S

index c59889de122b9f928f2d515c2950bfea16bad4fb..e86c968a7a0ece58837b836e4196f69faadaed38 100644 (file)
@@ -43,6 +43,7 @@ Currently, these files are in /proc/sys/vm:
 - legacy_va_layout
 - lowmem_reserve_ratio
 - max_map_count
+- mem_profiling         (only if CONFIG_MEM_ALLOC_PROFILING=y)
 - memory_failure_early_kill
 - memory_failure_recovery
 - min_free_kbytes
@@ -425,6 +426,21 @@ e.g., up to one or two maps per allocation.
 The default value is 65530.
 
 
+mem_profiling
+==============
+
+Enable memory profiling (when CONFIG_MEM_ALLOC_PROFILING=y)
+
+1: Enable memory profiling.
+
+0: Disable memory profiling.
+
+Enabling memory profiling introduces a small performance overhead for all
+memory allocations.
+
+The default value depends on CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT.
+
+
 memory_failure_early_kill:
 ==========================
 
index c6a6b9df210497de6f2ad8e8a7dffc1bd974016a..245269dd6e02899dbaaaf0f5146f60116640fa71 100644 (file)
@@ -688,6 +688,7 @@ files are there, and which are missing.
  ============ ===============================================================
  File         Content
  ============ ===============================================================
+ allocinfo    Memory allocations profiling information
  apm          Advanced power management info
  bootconfig   Kernel command line obtained from boot config,
              and, if there were kernel parameters from the
@@ -953,6 +954,34 @@ also be allocatable although a lot of filesystem metadata may have to be
 reclaimed to achieve this.
 
 
+allocinfo
+~~~~~~~~~
+
+Provides information about memory allocations at all locations in the code
+base. Each allocation in the code is identified by its source file, line
+number, module (if originates from a loadable module) and the function calling
+the allocation. The number of bytes allocated and number of calls at each
+location are reported.
+
+Example output.
+
+::
+
+    > sort -rn /proc/allocinfo
+   127664128    31168 mm/page_ext.c:270 func:alloc_page_ext
+    56373248     4737 mm/slub.c:2259 func:alloc_slab_page
+    14880768     3633 mm/readahead.c:247 func:page_cache_ra_unbounded
+    14417920     3520 mm/mm_init.c:2530 func:alloc_large_system_hash
+    13377536      234 block/blk-mq.c:3421 func:blk_mq_alloc_rqs
+    11718656     2861 mm/filemap.c:1919 func:__filemap_get_folio
+     9192960     2800 kernel/fork.c:307 func:alloc_thread_stack_node
+     4206592        4 net/netfilter/nf_conntrack_core.c:2567 func:nf_ct_alloc_hashtable
+     4136960     1010 drivers/staging/ctagmod/ctagmod.c:20 [ctagmod] func:ctagmod_start
+     3940352      962 mm/memory.c:4214 func:alloc_anon_folio
+     2894464    22613 fs/kernfs/dir.c:615 func:__kernfs_new_node
+     ...
+
+
 meminfo
 ~~~~~~~
 
diff --git a/include/asm-generic/codetag.lds.h b/include/asm-generic/codetag.lds.h
new file mode 100644 (file)
index 0000000..64f536b
--- /dev/null
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_GENERIC_CODETAG_LDS_H
+#define __ASM_GENERIC_CODETAG_LDS_H
+
+#define SECTION_WITH_BOUNDARIES(_name) \
+       . = ALIGN(8);                   \
+       __start_##_name = .;            \
+       KEEP(*(_name))                  \
+       __stop_##_name = .;
+
+#define CODETAG_SECTIONS()             \
+       SECTION_WITH_BOUNDARIES(alloc_tags)
+
+#endif /* __ASM_GENERIC_CODETAG_LDS_H */
index f7749d0f2562f11fd8ca4112e0e7d99e79305780..3e4497b5135a1650153b3a5ae726d97335c8eec7 100644 (file)
@@ -50,6 +50,8 @@
  *               [__nosave_begin, __nosave_end] for the nosave data
  */
 
+#include <asm-generic/codetag.lds.h>
+
 #ifndef LOAD_OFFSET
 #define LOAD_OFFSET 0
 #endif
        . = ALIGN(8);                                                   \
        BOUNDED_SECTION_BY(__dyndbg_classes, ___dyndbg_classes)         \
        BOUNDED_SECTION_BY(__dyndbg, ___dyndbg)                         \
+       CODETAG_SECTIONS()                                              \
        LIKELY_PROFILE()                                                \
        BRANCH_PROFILE()                                                \
        TRACE_PRINTKS()                                                 \
diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
new file mode 100644 (file)
index 0000000..1356122
--- /dev/null
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * allocation tagging
+ */
+#ifndef _LINUX_ALLOC_TAG_H
+#define _LINUX_ALLOC_TAG_H
+
+#include <linux/bug.h>
+#include <linux/codetag.h>
+#include <linux/container_of.h>
+#include <linux/preempt.h>
+#include <asm/percpu.h>
+#include <linux/cpumask.h>
+#include <linux/static_key.h>
+#include <linux/irqflags.h>
+
+struct alloc_tag_counters {
+       u64 bytes;
+       u64 calls;
+};
+
+/*
+ * An instance of this structure is created in a special ELF section at every
+ * allocation callsite. At runtime, the special section is treated as
+ * an array of these. Embedded codetag utilizes codetag framework.
+ */
+struct alloc_tag {
+       struct codetag                  ct;
+       struct alloc_tag_counters __percpu      *counters;
+} __aligned(8);
+
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+
+static inline struct alloc_tag *ct_to_alloc_tag(struct codetag *ct)
+{
+       return container_of(ct, struct alloc_tag, ct);
+}
+
+#ifdef ARCH_NEEDS_WEAK_PER_CPU
+/*
+ * When percpu variables are required to be defined as weak, static percpu
+ * variables can't be used inside a function (see comments for DECLARE_PER_CPU_SECTION).
+ * Instead we will accound all module allocations to a single counter.
+ */
+DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
+
+#define DEFINE_ALLOC_TAG(_alloc_tag)                                           \
+       static struct alloc_tag _alloc_tag __used __aligned(8)                  \
+       __section("alloc_tags") = {                                             \
+               .ct = CODE_TAG_INIT,                                            \
+               .counters = &_shared_alloc_tag };
+
+#else /* ARCH_NEEDS_WEAK_PER_CPU */
+
+#define DEFINE_ALLOC_TAG(_alloc_tag)                                           \
+       static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr);      \
+       static struct alloc_tag _alloc_tag __used __aligned(8)                  \
+       __section("alloc_tags") = {                                             \
+               .ct = CODE_TAG_INIT,                                            \
+               .counters = &_alloc_tag_cntr };
+
+#endif /* ARCH_NEEDS_WEAK_PER_CPU */
+
+DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
+                       mem_alloc_profiling_key);
+
+static inline bool mem_alloc_profiling_enabled(void)
+{
+       return static_branch_maybe(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
+                                  &mem_alloc_profiling_key);
+}
+
+static inline struct alloc_tag_counters alloc_tag_read(struct alloc_tag *tag)
+{
+       struct alloc_tag_counters v = { 0, 0 };
+       struct alloc_tag_counters *counter;
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               counter = per_cpu_ptr(tag->counters, cpu);
+               v.bytes += counter->bytes;
+               v.calls += counter->calls;
+       }
+
+       return v;
+}
+
+#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag)
+{
+       WARN_ONCE(ref && ref->ct,
+                 "alloc_tag was not cleared (got tag for %s:%u)\n",
+                 ref->ct->filename, ref->ct->lineno);
+
+       WARN_ONCE(!tag, "current->alloc_tag not set");
+}
+
+static inline void alloc_tag_sub_check(union codetag_ref *ref)
+{
+       WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n");
+}
+#else
+static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) {}
+static inline void alloc_tag_sub_check(union codetag_ref *ref) {}
+#endif
+
+/* Caller should verify both ref and tag to be valid */
+static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
+{
+       ref->ct = &tag->ct;
+       /*
+        * We need in increment the call counter every time we have a new
+        * allocation or when we split a large allocation into smaller ones.
+        * Each new reference for every sub-allocation needs to increment call
+        * counter because when we free each part the counter will be decremented.
+        */
+       this_cpu_inc(tag->counters->calls);
+}
+
+static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes)
+{
+       alloc_tag_add_check(ref, tag);
+       if (!ref || !tag)
+               return;
+
+       __alloc_tag_ref_set(ref, tag);
+       this_cpu_add(tag->counters->bytes, bytes);
+}
+
+static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes)
+{
+       struct alloc_tag *tag;
+
+       alloc_tag_sub_check(ref);
+       if (!ref || !ref->ct)
+               return;
+
+       tag = ct_to_alloc_tag(ref->ct);
+
+       this_cpu_sub(tag->counters->bytes, bytes);
+       this_cpu_dec(tag->counters->calls);
+
+       ref->ct = NULL;
+}
+
+#else /* CONFIG_MEM_ALLOC_PROFILING */
+
+#define DEFINE_ALLOC_TAG(_alloc_tag)
+static inline bool mem_alloc_profiling_enabled(void) { return false; }
+static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag,
+                                size_t bytes) {}
+static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes) {}
+
+#endif /* CONFIG_MEM_ALLOC_PROFILING */
+
+#endif /* _LINUX_ALLOC_TAG_H */
index 3c2abbc587b49c308835ad05aac7b81d7a223831..4118b3f959c324eadd2032e51341fd74cfc35ef5 100644 (file)
@@ -770,6 +770,10 @@ struct task_struct {
        unsigned int                    flags;
        unsigned int                    ptrace;
 
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+       struct alloc_tag                *alloc_tag;
+#endif
+
 #ifdef CONFIG_SMP
        int                             on_cpu;
        struct __call_single_node       wake_entry;
@@ -810,6 +814,7 @@ struct task_struct {
        struct task_group               *sched_task_group;
 #endif
 
+
 #ifdef CONFIG_UCLAMP_TASK
        /*
         * Clamp values requested for a scheduling entity.
@@ -2187,4 +2192,23 @@ static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); }
 
 extern void sched_set_stop_task(int cpu, struct task_struct *stop);
 
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+static inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag)
+{
+       swap(current->alloc_tag, tag);
+       return tag;
+}
+
+static inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old)
+{
+#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+       WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n");
+#endif
+       current->alloc_tag = old;
+}
+#else
+#define alloc_tag_save(_tag)                   NULL
+#define alloc_tag_restore(_tag, _old)          do {} while (0)
+#endif
+
 #endif
index 015fc6ee9849a81db85da37f4090b96d8ad512ba..fa7aa32ba11a2d6878613d91be08fbb2881723f9 100644 (file)
@@ -972,6 +972,31 @@ config CODE_TAGGING
        bool
        select KALLSYMS
 
+config MEM_ALLOC_PROFILING
+       bool "Enable memory allocation profiling"
+       default n
+       depends on PROC_FS
+       depends on !DEBUG_FORCE_WEAK_PER_CPU
+       select CODE_TAGGING
+       help
+         Track allocation source code and record total allocation size
+         initiated at that code location. The mechanism can be used to track
+         memory leaks with a low performance and memory impact.
+
+config MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
+       bool "Enable memory allocation profiling by default"
+       default y
+       depends on MEM_ALLOC_PROFILING
+
+config MEM_ALLOC_PROFILING_DEBUG
+       bool "Memory allocation profiler debugging"
+       default n
+       depends on MEM_ALLOC_PROFILING
+       select MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
+       help
+         Adds warnings with helpful error messages for memory allocation
+         profiling.
+
 source "lib/Kconfig.kasan"
 source "lib/Kconfig.kfence"
 source "lib/Kconfig.kmsan"
index 910335da8f1334dc9109fc1ffac9cbb51641aae0..2f4e17bfb2990d87a3a8a42e8608783be4cbd7f1 100644 (file)
@@ -234,6 +234,8 @@ obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
 
 obj-$(CONFIG_CODE_TAGGING) += codetag.o
+obj-$(CONFIG_MEM_ALLOC_PROFILING) += alloc_tag.o
+
 lib-$(CONFIG_GENERIC_BUG) += bug.o
 
 obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
new file mode 100644 (file)
index 0000000..331dd17
--- /dev/null
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/alloc_tag.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_buf.h>
+#include <linux/seq_file.h>
+
+static struct codetag_type *alloc_tag_cttype;
+
+DEFINE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag);
+EXPORT_SYMBOL(_shared_alloc_tag);
+
+DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
+                       mem_alloc_profiling_key);
+
+static void *allocinfo_start(struct seq_file *m, loff_t *pos)
+{
+       struct codetag_iterator *iter;
+       struct codetag *ct;
+       loff_t node = *pos;
+
+       iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+       m->private = iter;
+       if (!iter)
+               return NULL;
+
+       codetag_lock_module_list(alloc_tag_cttype, true);
+       *iter = codetag_get_ct_iter(alloc_tag_cttype);
+       while ((ct = codetag_next_ct(iter)) != NULL && node)
+               node--;
+
+       return ct ? iter : NULL;
+}
+
+static void *allocinfo_next(struct seq_file *m, void *arg, loff_t *pos)
+{
+       struct codetag_iterator *iter = (struct codetag_iterator *)arg;
+       struct codetag *ct = codetag_next_ct(iter);
+
+       (*pos)++;
+       if (!ct)
+               return NULL;
+
+       return iter;
+}
+
+static void allocinfo_stop(struct seq_file *m, void *arg)
+{
+       struct codetag_iterator *iter = (struct codetag_iterator *)m->private;
+
+       if (iter) {
+               codetag_lock_module_list(alloc_tag_cttype, false);
+               kfree(iter);
+       }
+}
+
+static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct)
+{
+       struct alloc_tag *tag = ct_to_alloc_tag(ct);
+       struct alloc_tag_counters counter = alloc_tag_read(tag);
+       s64 bytes = counter.bytes;
+
+       seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls);
+       codetag_to_text(out, ct);
+       seq_buf_putc(out, ' ');
+       seq_buf_putc(out, '\n');
+}
+
+static int allocinfo_show(struct seq_file *m, void *arg)
+{
+       struct codetag_iterator *iter = (struct codetag_iterator *)arg;
+       char *bufp;
+       size_t n = seq_get_buf(m, &bufp);
+       struct seq_buf buf;
+
+       seq_buf_init(&buf, bufp, n);
+       alloc_tag_to_text(&buf, iter->ct);
+       seq_commit(m, seq_buf_used(&buf));
+       return 0;
+}
+
+static const struct seq_operations allocinfo_seq_op = {
+       .start  = allocinfo_start,
+       .next   = allocinfo_next,
+       .stop   = allocinfo_stop,
+       .show   = allocinfo_show,
+};
+
+static void __init procfs_init(void)
+{
+       proc_create_seq("allocinfo", 0444, NULL, &allocinfo_seq_op);
+}
+
+static bool alloc_tag_module_unload(struct codetag_type *cttype,
+                                   struct codetag_module *cmod)
+{
+       struct codetag_iterator iter = codetag_get_ct_iter(cttype);
+       struct alloc_tag_counters counter;
+       bool module_unused = true;
+       struct alloc_tag *tag;
+       struct codetag *ct;
+
+       for (ct = codetag_next_ct(&iter); ct; ct = codetag_next_ct(&iter)) {
+               if (iter.cmod != cmod)
+                       continue;
+
+               tag = ct_to_alloc_tag(ct);
+               counter = alloc_tag_read(tag);
+
+               if (WARN(counter.bytes,
+                        "%s:%u module %s func:%s has %llu allocated at module unload",
+                        ct->filename, ct->lineno, ct->modname, ct->function, counter.bytes))
+                       module_unused = false;
+       }
+
+       return module_unused;
+}
+
+static struct ctl_table memory_allocation_profiling_sysctls[] = {
+       {
+               .procname       = "mem_profiling",
+               .data           = &mem_alloc_profiling_key,
+#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+               .mode           = 0444,
+#else
+               .mode           = 0644,
+#endif
+               .proc_handler   = proc_do_static_key,
+       },
+       { }
+};
+
+static int __init alloc_tag_init(void)
+{
+       const struct codetag_type_desc desc = {
+               .section        = "alloc_tags",
+               .tag_size       = sizeof(struct alloc_tag),
+               .module_unload  = alloc_tag_module_unload,
+       };
+
+       alloc_tag_cttype = codetag_register_type(&desc);
+       if (IS_ERR(alloc_tag_cttype))
+               return PTR_ERR(alloc_tag_cttype);
+
+       register_sysctl_init("vm", memory_allocation_profiling_sysctls);
+       procfs_init();
+
+       return 0;
+}
+module_init(alloc_tag_init);
index bf5bcf2836d8152ec8a940e7c7f59baabe0da1b5..45c67a0994f3e38a141c0e32e48658361566978c 100644 (file)
@@ -9,6 +9,8 @@
 #define DISCARD_EH_FRAME       *(.eh_frame)
 #endif
 
+#include <asm-generic/codetag.lds.h>
+
 SECTIONS {
        /DISCARD/ : {
                *(.discard)
@@ -47,12 +49,17 @@ SECTIONS {
        .data : {
                *(.data .data.[0-9a-zA-Z_]*)
                *(.data..L*)
+               CODETAG_SECTIONS()
        }
 
        .rodata : {
                *(.rodata .rodata.[0-9a-zA-Z_]*)
                *(.rodata..L*)
        }
+#else
+       .data : {
+               CODETAG_SECTIONS()
+       }
 #endif
 }