--- /dev/null
+#!/bin/sh
+
+# Try to find out kernel modules with large total memory allocation during loading.
+# For large slab allocation, it will fall into buddy, also not trace "mm_page_free"
+# considering large free is quite rare for module_init, thus saving tons of events
+# to avoid trace data overwritten.
+#
+# Therefore, tracing "mm_page_alloc"alone should be enough for the purpose.
+
+# "sys/kernel/tracing" has the priority if exists.
+get_trace_base() {
+ # trace access through debugfs would be obsolete if "/sys/kernel/tracing" is available.
+ if [ -d "/sys/kernel/tracing" ]; then
+ echo "/sys/kernel"
+ else
+ echo "/sys/kernel/debug"
+ fi
+}
+
+# We want to enable these trace events.
+get_want_events() {
+ echo "module:module_put module:module_load kmem:mm_page_alloc"
+}
+
+get_event_filter() {
+ echo "comm == systemd-udevd || comm == modprobe || comm == insmod"
+}
+
+is_trace_ready() {
+ local trace_base want_events current_events
+
+ trace_base=$(get_trace_base)
+ ! [ -f "$trace_base/tracing/trace" ] && return 1
+
+ [ "$(cat $trace_base/tracing/tracing_on)" -eq 0 ] && return 1
+
+ # Also check if trace events were properly setup.
+ want_events=$(get_want_events)
+ current_events=$(echo $(cat $trace_base/tracing/set_event))
+ [ "$current_events" != "$want_events" ] && return 1
+
+ return 0
+}
+
+prepare_trace() {
+ local trace_base
+
+ trace_base=$(get_trace_base)
+ # old debugfs interface case.
+ if ! [ -d "$trace_base/tracing" ]; then
+ mount none -t debugfs $trace_base
+ # new tracefs interface case.
+ elif ! [ -f "$trace_base/tracing/trace" ]; then
+ mount none -t tracefs "$trace_base/tracing"
+ fi
+
+ if ! [ -f "$trace_base/tracing/trace" ]; then
+ echo "WARN: Mount trace failed for kernel module memory analyzing."
+ return 1
+ fi
+
+ # Active all the wanted trace events.
+ echo "$(get_want_events)" > $trace_base/tracing/set_event
+
+ # There are three kinds of known applications for module loading:
+ # "systemd-udevd", "modprobe" and "insmod".
+ # Set them as the global events filter.
+ # NOTE: Some kernel may not support this format of filter, anyway
+ # the operation will fail and it doesn't matter.
+ echo "$(get_event_filter)" > $trace_base/tracing/events/kmem/filter 2>&1
+ echo "$(get_event_filter)" > $trace_base/tracing/events/module/filter 2>&1
+
+ # Set the number of comm-pid if supported.
+ if [ -f "$trace_base/tracing/saved_cmdlines_size" ]; then
+ # Thanks to filters, 4096 is big enough(also well supported).
+ echo 4096 > $trace_base/tracing/saved_cmdlines_size
+ fi
+
+ # Enable and clear trace data for the first time.
+ echo 1 > $trace_base/tracing/tracing_on
+ echo > $trace_base/tracing/trace
+ echo "Prepare trace success."
+ return 0
+}
+
+order_to_pages()
+{
+ local pages=1
+ local order=$1
+
+ while [ "$order" != 0 ]; do
+ order=$((order-1))
+ pages=$(($pages*2))
+ done
+
+ echo $pages
+}
+
+parse_trace_data() {
+ local module_name tmp_eval pages
+
+ cat "$(get_trace_base)/tracing/trace" | while read pid cpu flags ts function args
+ do
+ # Skip comment lines
+ if [ "$pid" = "#" ]; then
+ continue
+ fi
+
+ pid=${pid##*-}
+ function=${function%:}
+ if [ "$function" = "module_load" ]; then
+ # One module is being loaded, save the task pid for tracking.
+ # Remove the trailing after whitespace, there may be the module flags.
+ module_name=${args%% *}
+ # Mark current_module to track the task.
+ eval current_module_$pid="$module_name"
+ tmp_eval=$(eval echo '${module_loaded_'${module_name}'}')
+ if [ -n "$tmp_eval" ]; then
+ echo "WARN: \"$module_name\" was loaded multiple times!"
+ fi
+ eval unset module_loaded_$module_name
+ eval nr_alloc_pages_$module_name=0
+ continue
+ fi
+
+ module_name=$(eval echo '${current_module_'${pid}'}')
+ if [ -z "$module_name" ]; then
+ continue
+ fi
+
+ # Once we get here, the task is being tracked(is loading a module).
+ if [ "$function" = "module_put" ]; then
+ # Mark the module as loaded when the first module_put event happens after module_load.
+ tmp_eval=$(eval echo '${nr_alloc_pages_'${module_name}'}')
+ echo "$tmp_eval pages consumed by \"$module_name\""
+ eval module_loaded_$module_name=1
+ # Module loading finished, so untrack the task.
+ eval unset current_module_$pid
+ eval unset nr_alloc_pages_$module_name
+ continue
+ fi
+
+ if [ "$function" = "mm_page_alloc" ]; then
+ # Get order first, then convert to actual pages.
+ pages=$(echo $args | sed -e 's/.*order=\([0-9]*\) .*/\1/')
+ pages=$(order_to_pages "$pages")
+ tmp_eval=$(eval echo '${nr_alloc_pages_'${module_name}'}')
+ eval nr_alloc_pages_$module_name="$(($tmp_eval+$pages))"
+ fi
+ done
+}
+
+cleanup_trace() {
+ local trace_base
+
+ if is_trace_ready; then
+ trace_base=$(get_trace_base)
+ echo 0 > $trace_base/tracing/tracing_on
+ echo > $trace_base/tracing/trace
+ echo > $trace_base/tracing/set_event
+ echo 0 > $trace_base/tracing/events/kmem/filter
+ echo 0 > $trace_base/tracing/events/module/filter
+ fi
+}
+
+show_usage() {
+ echo "Find out kernel modules with large memory consumption during loading based on trace."
+ echo "Usage:"
+ echo "1) run it first to setup trace."
+ echo "2) run again to parse the trace data if any."
+ echo "3) run with \"--cleanup\" option to cleanup trace after use."
+}
+
+if [ "$1" = "--help" ]; then
+ show_usage
+ exit 0
+fi
+
+if [ "$1" = "--cleanup" ]; then
+ cleanup_trace
+ exit 0
+fi
+
+if is_trace_ready ; then
+ echo "tracekomem - Rough memory consumption by loading kernel modules (larger value with better accuracy)"
+ parse_trace_data
+else
+ prepare_trace
+fi
+
+exit $?