missing annotation
Boot parameter:
- sysctl.vm.mem_profiling=0|1|never
+ sysctl.vm.mem_profiling={0|1|never}[,compressed]
When set to "never", memory allocation profiling overhead is minimized and it
cannot be enabled at runtime (sysctl becomes read-only).
When CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT=y, default value is "1".
When CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT=n, default value is "never".
+ "compressed" optional parameter will try to store page tag references in a
+ compact format, avoiding page extensions. This results in improved performance
+ and memory consumption, however it might fail depending on system configuration.
+ If compression fails, a warning is issued and memory allocation profiling gets
+ disabled.
sysctl:
/proc/sys/vm/mem_profiling
struct alloc_tag_counters __percpu *counters;
} __aligned(8);
+struct alloc_tag_kernel_section {
+ struct alloc_tag *first_tag;
+ unsigned long count;
+};
+
struct alloc_tag_module_section {
- unsigned long start_addr;
+ union {
+ unsigned long start_addr;
+ struct alloc_tag *first_tag;
+ };
unsigned long end_addr;
/* used size */
unsigned long size;
struct seq_buf;
struct module;
+#define CODETAG_SECTION_START_PREFIX "__start_"
+#define CODETAG_SECTION_STOP_PREFIX "__stop_"
+
/*
* An instance of this structure is created in a special ELF section at every
* code location being tagged. At runtime, the special section is treated as
ZONES_WIDTH - LRU_GEN_WIDTH - SECTIONS_WIDTH - \
NODES_WIDTH - KASAN_TAG_WIDTH - LAST_CPUPID_WIDTH)
+#define NR_NON_PAGEFLAG_BITS (SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH + \
+ LAST_CPUPID_SHIFT + KASAN_TAG_WIDTH + \
+ LRU_GEN_WIDTH + LRU_REFS_WIDTH)
+
+#define NR_UNUSED_PAGEFLAG_BITS (BITS_PER_LONG - \
+ (NR_NON_PAGEFLAG_BITS + NR_PAGEFLAGS))
+
#endif
#endif /* _LINUX_PAGE_FLAGS_LAYOUT */
#include <linux/page_ext.h>
+extern struct page_ext_operations page_alloc_tagging_ops;
+extern unsigned long alloc_tag_ref_mask;
+extern int alloc_tag_ref_offs;
+extern struct alloc_tag_kernel_section kernel_tags;
+
+DECLARE_STATIC_KEY_FALSE(mem_profiling_compressed);
+
+typedef u16 pgalloc_tag_idx;
+
union pgtag_ref_handle {
union codetag_ref *ref; /* reference in page extension */
+ struct page *page; /* reference in page flags */
};
-extern struct page_ext_operations page_alloc_tagging_ops;
+/* Reserved indexes */
+#define CODETAG_ID_NULL 0
+#define CODETAG_ID_EMPTY 1
+#define CODETAG_ID_FIRST 2
+
+#ifdef CONFIG_MODULES
+
+extern struct alloc_tag_module_section module_tags;
+
+static inline struct alloc_tag *module_idx_to_tag(pgalloc_tag_idx idx)
+{
+ return &module_tags.first_tag[idx - kernel_tags.count];
+}
+
+static inline pgalloc_tag_idx module_tag_to_idx(struct alloc_tag *tag)
+{
+ return CODETAG_ID_FIRST + kernel_tags.count + (tag - module_tags.first_tag);
+}
+
+#else /* CONFIG_MODULES */
+
+static inline struct alloc_tag *module_idx_to_tag(pgalloc_tag_idx idx)
+{
+ pr_warn("invalid page tag reference %lu\n", (unsigned long)idx);
+ return NULL;
+}
+
+static inline pgalloc_tag_idx module_tag_to_idx(struct alloc_tag *tag)
+{
+ pr_warn("invalid page tag 0x%lx\n", (unsigned long)tag);
+ return CODETAG_ID_NULL;
+}
+
+#endif /* CONFIG_MODULES */
+
+static inline void idx_to_ref(pgalloc_tag_idx idx, union codetag_ref *ref)
+{
+ switch (idx) {
+ case (CODETAG_ID_NULL):
+ ref->ct = NULL;
+ break;
+ case (CODETAG_ID_EMPTY):
+ set_codetag_empty(ref);
+ break;
+ default:
+ idx -= CODETAG_ID_FIRST;
+ ref->ct = idx < kernel_tags.count ?
+ &kernel_tags.first_tag[idx].ct :
+ &module_idx_to_tag(idx)->ct;
+ break;
+ }
+}
+
+static inline pgalloc_tag_idx ref_to_idx(union codetag_ref *ref)
+{
+ struct alloc_tag *tag;
+
+ if (!ref->ct)
+ return CODETAG_ID_NULL;
+
+ if (is_codetag_empty(ref))
+ return CODETAG_ID_EMPTY;
+
+ tag = ct_to_alloc_tag(ref->ct);
+ if (tag >= kernel_tags.first_tag && tag < kernel_tags.first_tag + kernel_tags.count)
+ return CODETAG_ID_FIRST + (tag - kernel_tags.first_tag);
+
+ return module_tag_to_idx(tag);
+}
+
+
/* Should be called only if mem_alloc_profiling_enabled() */
static inline bool get_page_tag_ref(struct page *page, union codetag_ref *ref,
union pgtag_ref_handle *handle)
{
- struct page_ext *page_ext;
- union codetag_ref *tmp;
-
if (!page)
return false;
- page_ext = page_ext_get(page);
- if (!page_ext)
- return false;
+ if (static_key_enabled(&mem_profiling_compressed)) {
+ pgalloc_tag_idx idx;
+
+ idx = (page->flags >> alloc_tag_ref_offs) & alloc_tag_ref_mask;
+ idx_to_ref(idx, ref);
+ handle->page = page;
+ } else {
+ struct page_ext *page_ext;
+ union codetag_ref *tmp;
+
+ page_ext = page_ext_get(page);
+ if (!page_ext)
+ return false;
+
+ tmp = (union codetag_ref *)page_ext_data(page_ext, &page_alloc_tagging_ops);
+ ref->ct = tmp->ct;
+ handle->ref = tmp;
+ }
- tmp = (union codetag_ref *)page_ext_data(page_ext, &page_alloc_tagging_ops);
- ref->ct = tmp->ct;
- handle->ref = tmp;
return true;
}
if (WARN_ON(!handle.ref))
return;
- page_ext_put((void *)handle.ref - page_alloc_tagging_ops.offset);
+ if (!static_key_enabled(&mem_profiling_compressed))
+ page_ext_put((void *)handle.ref - page_alloc_tagging_ops.offset);
}
-static inline void update_page_tag_ref(union pgtag_ref_handle handle,
- union codetag_ref *ref)
+static inline void update_page_tag_ref(union pgtag_ref_handle handle, union codetag_ref *ref)
{
- if (WARN_ON(!handle.ref || !ref))
- return;
-
- handle.ref->ct = ref->ct;
+ if (static_key_enabled(&mem_profiling_compressed)) {
+ struct page *page = handle.page;
+ unsigned long old_flags;
+ unsigned long flags;
+ unsigned long idx;
+
+ if (WARN_ON(!page || !ref))
+ return;
+
+ idx = (unsigned long)ref_to_idx(ref);
+ idx = (idx & alloc_tag_ref_mask) << alloc_tag_ref_offs;
+ do {
+ old_flags = READ_ONCE(page->flags);
+ flags = old_flags;
+ flags &= ~(alloc_tag_ref_mask << alloc_tag_ref_offs);
+ flags |= idx;
+ } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags)));
+ } else {
+ if (WARN_ON(!handle.ref || !ref))
+ return;
+
+ handle.ref->ct = ref->ct;
+ }
}
static inline void clear_page_tag_ref(struct page *page)
this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr);
}
+void __init alloc_tag_sec_init(void);
+
#else /* CONFIG_MEM_ALLOC_PROFILING */
static inline void clear_page_tag_ref(struct page *page) {}
static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; }
static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
+static inline void alloc_tag_sec_init(void) {}
#endif /* CONFIG_MEM_ALLOC_PROFILING */
#include <linux/execmem.h>
#include <linux/fs.h>
#include <linux/gfp.h>
+#include <linux/kallsyms.h>
#include <linux/module.h>
#include <linux/page_ext.h>
#include <linux/proc_fs.h>
#define ALLOCINFO_FILE_NAME "allocinfo"
#define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag))
+#define SECTION_START(NAME) (CODETAG_SECTION_START_PREFIX NAME)
+#define SECTION_STOP(NAME) (CODETAG_SECTION_STOP_PREFIX NAME)
#ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT
static bool mem_profiling_support = true;
DEFINE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
mem_alloc_profiling_key);
+DEFINE_STATIC_KEY_FALSE(mem_profiling_compressed);
+
+struct alloc_tag_kernel_section kernel_tags = { NULL, 0 };
+unsigned long alloc_tag_ref_mask;
+int alloc_tag_ref_offs;
struct allocinfo_private {
struct codetag_iterator iter;
return nr;
}
-static void shutdown_mem_profiling(void)
+static void shutdown_mem_profiling(bool remove_file)
{
if (mem_alloc_profiling_enabled())
static_branch_disable(&mem_alloc_profiling_key);
if (!mem_profiling_support)
return;
+ if (remove_file)
+ remove_proc_entry(ALLOCINFO_FILE_NAME, NULL);
mem_profiling_support = false;
}
if (!proc_create_seq(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op)) {
pr_err("Failed to create %s file\n", ALLOCINFO_FILE_NAME);
- shutdown_mem_profiling();
+ shutdown_mem_profiling(false);
}
}
+void __init alloc_tag_sec_init(void)
+{
+ struct alloc_tag *last_codetag;
+
+ if (!mem_profiling_support)
+ return;
+
+ if (!static_key_enabled(&mem_profiling_compressed))
+ return;
+
+ kernel_tags.first_tag = (struct alloc_tag *)kallsyms_lookup_name(
+ SECTION_START(ALLOC_TAG_SECTION_NAME));
+ last_codetag = (struct alloc_tag *)kallsyms_lookup_name(
+ SECTION_STOP(ALLOC_TAG_SECTION_NAME));
+ kernel_tags.count = last_codetag - kernel_tags.first_tag;
+
+ /* Check if kernel tags fit into page flags */
+ if (kernel_tags.count > (1UL << NR_UNUSED_PAGEFLAG_BITS)) {
+ shutdown_mem_profiling(false); /* allocinfo file does not exist yet */
+ pr_err("%lu allocation tags cannot be references using %d available page flag bits. Memory allocation profiling is disabled!\n",
+ kernel_tags.count, NR_UNUSED_PAGEFLAG_BITS);
+ return;
+ }
+
+ alloc_tag_ref_offs = (LRU_REFS_PGOFF - NR_UNUSED_PAGEFLAG_BITS);
+ alloc_tag_ref_mask = ((1UL << NR_UNUSED_PAGEFLAG_BITS) - 1);
+ pr_debug("Memory allocation profiling compression is using %d page flag bits!\n",
+ NR_UNUSED_PAGEFLAG_BITS);
+}
+
#ifdef CONFIG_MODULES
static struct maple_tree mod_area_mt = MTREE_INIT(mod_area_mt, MT_FLAGS_ALLOC_RANGE);
/* A dummy object used to indicate a module prepended area */
static struct module prepend_mod;
-static struct alloc_tag_module_section module_tags;
+struct alloc_tag_module_section module_tags;
+
+static inline unsigned long alloc_tag_align(unsigned long val)
+{
+ if (!static_key_enabled(&mem_profiling_compressed)) {
+ /* No alignment requirements when we are not indexing the tags */
+ return val;
+ }
+
+ if (val % sizeof(struct alloc_tag) == 0)
+ return val;
+ return ((val / sizeof(struct alloc_tag)) + 1) * sizeof(struct alloc_tag);
+}
+
+static bool ensure_alignment(unsigned long align, unsigned int *prepend)
+{
+ if (!static_key_enabled(&mem_profiling_compressed)) {
+ /* No alignment requirements when we are not indexing the tags */
+ return true;
+ }
+
+ /*
+ * If alloc_tag size is not a multiple of required alignment, tag
+ * indexing does not work.
+ */
+ if (!IS_ALIGNED(sizeof(struct alloc_tag), align))
+ return false;
+
+ /* Ensure prepend consumes multiple of alloc_tag-sized blocks */
+ if (*prepend)
+ *prepend = alloc_tag_align(*prepend);
+
+ return true;
+}
+
+static inline bool tags_addressable(void)
+{
+ unsigned long tag_idx_count;
+
+ if (!static_key_enabled(&mem_profiling_compressed))
+ return true; /* with page_ext tags are always addressable */
+
+ tag_idx_count = CODETAG_ID_FIRST + kernel_tags.count +
+ module_tags.size / sizeof(struct alloc_tag);
+
+ return tag_idx_count < (1UL << NR_UNUSED_PAGEFLAG_BITS);
+}
static bool needs_section_mem(struct module *mod, unsigned long size)
{
+ if (!mem_profiling_support)
+ return false;
+
return size >= sizeof(struct alloc_tag);
}
if (!align)
align = 1;
+ if (!ensure_alignment(align, &prepend)) {
+ shutdown_mem_profiling(true);
+ pr_err("%s: alignment %lu is incompatible with allocation tag indexing. Memory allocation profiling is disabled!\n",
+ mod->name, align);
+ return ERR_PTR(-EINVAL);
+ }
+
mas_lock(&mas);
if (!find_aligned_area(&mas, section_size, size, prepend, align)) {
ret = ERR_PTR(-ENOMEM);
int grow_res;
module_tags.size = offset + size;
+ if (mem_alloc_profiling_enabled() && !tags_addressable()) {
+ shutdown_mem_profiling(true);
+ pr_warn("With module %s there are too many tags to fit in %d page flag bits. Memory allocation profiling is disabled!\n",
+ mod->name, NR_UNUSED_PAGEFLAG_BITS);
+ }
+
grow_res = vm_module_tags_populate();
if (grow_res) {
- shutdown_mem_profiling();
+ shutdown_mem_profiling(true);
pr_err("Failed to allocate memory for allocation tags in the module %s. Memory allocation profiling is disabled!\n",
mod->name);
return ERR_PTR(grow_res);
module_tags.start_addr = (unsigned long)vm_module_tags->addr;
module_tags.end_addr = module_tags.start_addr + MODULE_ALLOC_TAG_VMAP_SIZE;
+ /* Ensure the base is alloc_tag aligned when required for indexing */
+ module_tags.start_addr = alloc_tag_align(module_tags.start_addr);
return 0;
}
#endif /* CONFIG_MODULES */
+/* See: Documentation/mm/allocation-profiling.rst */
static int __init setup_early_mem_profiling(char *str)
{
+ bool compressed = false;
bool enable;
if (!str || !str[0])
if (!strncmp(str, "never", 5)) {
enable = false;
mem_profiling_support = false;
+ pr_info("Memory allocation profiling is disabled!\n");
} else {
- int res;
+ char *token = strsep(&str, ",");
+
+ if (kstrtobool(token, &enable))
+ return -EINVAL;
- res = kstrtobool(str, &enable);
- if (res)
- return res;
+ if (str) {
+ if (strcmp(str, "compressed"))
+ return -EINVAL;
+
+ compressed = true;
+ }
mem_profiling_support = true;
+ pr_info("Memory allocation profiling is enabled %s compression and is turned %s!\n",
+ compressed ? "with" : "without", enable ? "on" : "off");
}
- if (enable != static_key_enabled(&mem_alloc_profiling_key)) {
+ if (enable != mem_alloc_profiling_enabled()) {
if (enable)
static_branch_enable(&mem_alloc_profiling_key);
else
static_branch_disable(&mem_alloc_profiling_key);
}
+ if (compressed != static_key_enabled(&mem_profiling_compressed)) {
+ if (compressed)
+ static_branch_enable(&mem_profiling_compressed);
+ else
+ static_branch_disable(&mem_profiling_compressed);
+ }
return 0;
}
static __init bool need_page_alloc_tagging(void)
{
+ if (static_key_enabled(&mem_profiling_compressed))
+ return false;
+
return mem_profiling_support;
}
const char *section)
{
return (struct codetag_range) {
- get_symbol(mod, "__start_", section),
- get_symbol(mod, "__stop_", section),
+ get_symbol(mod, CODETAG_SECTION_START_PREFIX, section),
+ get_symbol(mod, CODETAG_SECTION_STOP_PREFIX, section),
};
}
unsigned long or_mask, add_mask;
shift = BITS_PER_LONG;
- width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH
- - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH - LRU_GEN_WIDTH - LRU_REFS_WIDTH;
+ width = shift - NR_NON_PAGEFLAG_BITS;
mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths",
"Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Gen %d Tier %d Flags %d\n",
SECTIONS_WIDTH,
BUILD_BUG_ON(MAX_ZONELISTS > 2);
build_all_zonelists(NULL);
page_alloc_init_cpuhp();
-
+ alloc_tag_sec_init();
/*
* page_ext requires contiguous pages,
* bigger than MAX_PAGE_ORDER unless SPARSEMEM.