From: Greg Kroah-Hartman Date: Fri, 15 May 2026 14:49:35 +0000 (+0200) Subject: 6.18-stable patches X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=139cbb5b901633bbbfdd4ebf4cdf60512b9e5861;p=thirdparty%2Fkernel%2Fstable-queue.git 6.18-stable patches added patches: batman-adv-tp_meter-fix-tp_num-leak-on-kmalloc-failure.patch btrfs-fix-btrfs_ioctl_space_info-slot_count-toctou-which-can-lead-to-info-leak.patch btrfs-fix-double-free-in-create_space_info_sub_group-error-path.patch btrfs-remove-fs_info-argument-from-btrfs_sysfs_add_space_info_type.patch tracing-fprobe-optimization-for-entry-only-case.patch tracing-fprobe-remove-fprobe-from-hash-in-failure-path.patch tracing-fprobe-unregister-fprobe-even-if-memory-allocation-fails.patch tracing-fprobe-use-rhltable-for-fprobe_ip_table.patch --- diff --git a/queue-6.18/batman-adv-tp_meter-fix-tp_num-leak-on-kmalloc-failure.patch b/queue-6.18/batman-adv-tp_meter-fix-tp_num-leak-on-kmalloc-failure.patch new file mode 100644 index 0000000000..22b597b052 --- /dev/null +++ b/queue-6.18/batman-adv-tp_meter-fix-tp_num-leak-on-kmalloc-failure.patch @@ -0,0 +1,54 @@ +From stable+bounces-247676-greg=kroah.com@vger.kernel.org Fri May 15 14:24:53 2026 +From: Sven Eckelmann +Date: Fri, 15 May 2026 13:44:28 +0200 +Subject: batman-adv: tp_meter: fix tp_num leak on kmalloc failure +To: stable@vger.kernel.org +Cc: Sven Eckelmann , stable@kernel.org +Message-ID: <20260515114428.385372-1-sven@narfation.org> + +From: Sven Eckelmann + +commit ce425dd05d0fe7594930a0fb103634f35ac47bb6 upstream. + +When batadv_tp_start() or batadv_tp_init_recv() fail to allocate a new +tp_vars object, the previously incremented bat_priv->tp_num counter is +never decremented. This causes tp_num to drift upward on each allocation +failure. Since only BATADV_TP_MAX_NUM sessions can be started and the count +is never reduced for these failed allocations, it causes to an exhaustion +of throughput meter sessions. In worst case, no new throughput meter +session can be started until the mesh interface is removed. + +The error handling must decrement tp_num releasing the lock and aborting +the creation of an throughput meter session + +Cc: stable@kernel.org +Fixes: 33a3bb4a3345 ("batman-adv: throughput meter implementation") +[ Context ] +Signed-off-by: Sven Eckelmann +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/tp_meter.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/net/batman-adv/tp_meter.c ++++ b/net/batman-adv/tp_meter.c +@@ -994,6 +994,7 @@ void batadv_tp_start(struct batadv_priv + + tp_vars = kmalloc(sizeof(*tp_vars), GFP_ATOMIC); + if (!tp_vars) { ++ atomic_dec(&bat_priv->tp_num); + spin_unlock_bh(&bat_priv->tp_list_lock); + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: %s cannot allocate list elements\n", +@@ -1366,8 +1367,10 @@ batadv_tp_init_recv(struct batadv_priv * + } + + tp_vars = kmalloc(sizeof(*tp_vars), GFP_ATOMIC); +- if (!tp_vars) ++ if (!tp_vars) { ++ atomic_dec(&bat_priv->tp_num); + goto out_unlock; ++ } + + ether_addr_copy(tp_vars->other_end, icmp->orig); + tp_vars->role = BATADV_TP_RECEIVER; diff --git a/queue-6.18/btrfs-fix-btrfs_ioctl_space_info-slot_count-toctou-which-can-lead-to-info-leak.patch b/queue-6.18/btrfs-fix-btrfs_ioctl_space_info-slot_count-toctou-which-can-lead-to-info-leak.patch new file mode 100644 index 0000000000..6be3108c11 --- /dev/null +++ b/queue-6.18/btrfs-fix-btrfs_ioctl_space_info-slot_count-toctou-which-can-lead-to-info-leak.patch @@ -0,0 +1,59 @@ +From stable+bounces-247683-greg=kroah.com@vger.kernel.org Fri May 15 14:22:40 2026 +From: Sasha Levin +Date: Fri, 15 May 2026 07:45:37 -0400 +Subject: btrfs: fix btrfs_ioctl_space_info() slot_count TOCTOU which can lead to info-leak +To: stable@vger.kernel.org +Cc: Yochai Eisenrich , Yochai Eisenrich , David Sterba , Sasha Levin +Message-ID: <20260515114537.3024682-1-sashal@kernel.org> + +From: Yochai Eisenrich + +[ Upstream commit 973e57c726c1f8e77259d1c8e519519f1e9aea77 ] + +btrfs_ioctl_space_info() has a TOCTOU race between two passes over the +block group RAID type lists. The first pass counts entries to determine +the allocation size, then the second pass fills the buffer. The +groups_sem rwlock is released between passes, allowing concurrent block +group removal to reduce the entry count. + +When the second pass fills fewer entries than the first pass counted, +copy_to_user() copies the full alloc_size bytes including trailing +uninitialized kmalloc bytes to userspace. + +Fix by copying only total_spaces entries (the actually-filled count from +the second pass) instead of alloc_size bytes, and switch to kzalloc so +any future copy size mismatch cannot leak heap data. + +Fixes: 7fde62bffb57 ("Btrfs: buffer results in the space_info ioctl") +CC: stable@vger.kernel.org # 3.0 +Signed-off-by: Yochai Eisenrich +Reviewed-by: David Sterba +Signed-off-by: David Sterba +[ adapted upstream's `return -EFAULT;` to stable's `ret = -EFAULT;` fall-through to existing `out:` cleanup label ] +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -3025,7 +3025,7 @@ static long btrfs_ioctl_space_info(struc + return -ENOMEM; + + space_args.total_spaces = 0; +- dest = kmalloc(alloc_size, GFP_KERNEL); ++ dest = kzalloc(alloc_size, GFP_KERNEL); + if (!dest) + return -ENOMEM; + dest_orig = dest; +@@ -3081,7 +3081,8 @@ static long btrfs_ioctl_space_info(struc + user_dest = (struct btrfs_ioctl_space_info __user *) + (arg + sizeof(struct btrfs_ioctl_space_args)); + +- if (copy_to_user(user_dest, dest_orig, alloc_size)) ++ if (copy_to_user(user_dest, dest_orig, ++ space_args.total_spaces * sizeof(*dest_orig))) + ret = -EFAULT; + + kfree(dest_orig); diff --git a/queue-6.18/btrfs-fix-double-free-in-create_space_info_sub_group-error-path.patch b/queue-6.18/btrfs-fix-double-free-in-create_space_info_sub_group-error-path.patch new file mode 100644 index 0000000000..90d4b0e236 --- /dev/null +++ b/queue-6.18/btrfs-fix-double-free-in-create_space_info_sub_group-error-path.patch @@ -0,0 +1,58 @@ +From stable+bounces-247727-greg=kroah.com@vger.kernel.org Fri May 15 14:11:57 2026 +From: Sasha Levin +Date: Fri, 15 May 2026 08:06:15 -0400 +Subject: btrfs: fix double free in create_space_info_sub_group() error path +To: stable@vger.kernel.org +Cc: Guangshuo Li , Qu Wenruo , David Sterba , Sasha Levin +Message-ID: <20260515120615.3073340-2-sashal@kernel.org> + +From: Guangshuo Li + +[ Upstream commit a7449edf96143f192606ec8647e3167e1ecbd728 ] + +When kobject_init_and_add() fails, the call chain is: + +create_space_info_sub_group() +-> btrfs_sysfs_add_space_info_type() +-> kobject_init_and_add() +-> failure +-> kobject_put(&sub_group->kobj) +-> space_info_release() +-> kfree(sub_group) + +Then control returns to create_space_info_sub_group(), where: + +btrfs_sysfs_add_space_info_type() returns error +-> kfree(sub_group) + +Thus, sub_group is freed twice. + +Keep parent->sub_group[index] = NULL for the failure path, but after +btrfs_sysfs_add_space_info_type() has called kobject_put(), let the +kobject release callback handle the cleanup. + +Fixes: f92ee31e031c ("btrfs: introduce btrfs_space_info sub-group") +CC: stable@vger.kernel.org # 6.18+ +Reviewed-by: Qu Wenruo +Signed-off-by: Guangshuo Li +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/space-info.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/fs/btrfs/space-info.c ++++ b/fs/btrfs/space-info.c +@@ -276,10 +276,8 @@ static int create_space_info_sub_group(s + sub_group->subgroup_id = id; + + ret = btrfs_sysfs_add_space_info_type(sub_group); +- if (ret) { +- kfree(sub_group); ++ if (ret) + parent->sub_group[index] = NULL; +- } + return ret; + } + diff --git a/queue-6.18/btrfs-remove-fs_info-argument-from-btrfs_sysfs_add_space_info_type.patch b/queue-6.18/btrfs-remove-fs_info-argument-from-btrfs_sysfs_add_space_info_type.patch new file mode 100644 index 0000000000..ff451114b3 --- /dev/null +++ b/queue-6.18/btrfs-remove-fs_info-argument-from-btrfs_sysfs_add_space_info_type.patch @@ -0,0 +1,78 @@ +From stable+bounces-247726-greg=kroah.com@vger.kernel.org Fri May 15 14:40:08 2026 +From: Sasha Levin +Date: Fri, 15 May 2026 08:06:14 -0400 +Subject: btrfs: remove fs_info argument from btrfs_sysfs_add_space_info_type() +To: stable@vger.kernel.org +Cc: Filipe Manana , Johannes Thumshirn , David Sterba , Sasha Levin +Message-ID: <20260515120615.3073340-1-sashal@kernel.org> + +From: Filipe Manana + +[ Upstream commit 771af6ff72e0ed0eb8bf97e5ae4fa5094e0c5d1d ] + +We don't need it since we can grab fs_info from the given space_info. +So remove the fs_info argument. + +Reviewed-by: Johannes Thumshirn +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: a7449edf9614 ("btrfs: fix double free in create_space_info_sub_group() error path") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/space-info.c | 4 ++-- + fs/btrfs/sysfs.c | 5 ++--- + fs/btrfs/sysfs.h | 3 +-- + 3 files changed, 5 insertions(+), 7 deletions(-) + +--- a/fs/btrfs/space-info.c ++++ b/fs/btrfs/space-info.c +@@ -275,7 +275,7 @@ static int create_space_info_sub_group(s + sub_group->parent = parent; + sub_group->subgroup_id = id; + +- ret = btrfs_sysfs_add_space_info_type(fs_info, sub_group); ++ ret = btrfs_sysfs_add_space_info_type(sub_group); + if (ret) { + kfree(sub_group); + parent->sub_group[index] = NULL; +@@ -309,7 +309,7 @@ static int create_space_info(struct btrf + goto out_free; + } + +- ret = btrfs_sysfs_add_space_info_type(info, space_info); ++ ret = btrfs_sysfs_add_space_info_type(space_info); + if (ret) + return ret; + +--- a/fs/btrfs/sysfs.c ++++ b/fs/btrfs/sysfs.c +@@ -1981,13 +1981,12 @@ static const char *alloc_name(struct btr + * Create a sysfs entry for a space info type at path + * /sys/fs/btrfs/UUID/allocation/TYPE + */ +-int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info, +- struct btrfs_space_info *space_info) ++int btrfs_sysfs_add_space_info_type(struct btrfs_space_info *space_info) + { + int ret; + + ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype, +- fs_info->space_info_kobj, "%s", ++ space_info->fs_info->space_info_kobj, "%s", + alloc_name(space_info)); + if (ret) { + kobject_put(&space_info->kobj); +--- a/fs/btrfs/sysfs.h ++++ b/fs/btrfs/sysfs.h +@@ -37,8 +37,7 @@ void __cold btrfs_exit_sysfs(void); + int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info); + void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info); + void btrfs_sysfs_add_block_group_type(struct btrfs_block_group *cache); +-int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info, +- struct btrfs_space_info *space_info); ++int btrfs_sysfs_add_space_info_type(struct btrfs_space_info *space_info); + void btrfs_sysfs_remove_space_info(struct btrfs_space_info *space_info); + void btrfs_sysfs_update_devid(struct btrfs_device *device); + diff --git a/queue-6.18/series b/queue-6.18/series index 28c32c2d03..7964a51a65 100644 --- a/queue-6.18/series +++ b/queue-6.18/series @@ -171,3 +171,11 @@ sched_ext-read-scx_root-under-scx_cgroup_ops_rwsem-in-cgroup-setters.patch usb-dwc3-remove-of-dep-regs.patch usb-dwc3-add-dwc-pointer-to-dwc3_readl-writel.patch usb-dwc3-move-guid-programming-after-phy-initialization.patch +btrfs-remove-fs_info-argument-from-btrfs_sysfs_add_space_info_type.patch +btrfs-fix-double-free-in-create_space_info_sub_group-error-path.patch +btrfs-fix-btrfs_ioctl_space_info-slot_count-toctou-which-can-lead-to-info-leak.patch +tracing-fprobe-use-rhltable-for-fprobe_ip_table.patch +tracing-fprobe-optimization-for-entry-only-case.patch +tracing-fprobe-unregister-fprobe-even-if-memory-allocation-fails.patch +tracing-fprobe-remove-fprobe-from-hash-in-failure-path.patch +batman-adv-tp_meter-fix-tp_num-leak-on-kmalloc-failure.patch diff --git a/queue-6.18/tracing-fprobe-optimization-for-entry-only-case.patch b/queue-6.18/tracing-fprobe-optimization-for-entry-only-case.patch new file mode 100644 index 0000000000..a11b7cc751 --- /dev/null +++ b/queue-6.18/tracing-fprobe-optimization-for-entry-only-case.patch @@ -0,0 +1,228 @@ +From stable+bounces-247670-greg=kroah.com@vger.kernel.org Fri May 15 13:41:37 2026 +From: Sasha Levin +Date: Fri, 15 May 2026 07:32:24 -0400 +Subject: tracing: fprobe: optimization for entry only case +To: stable@vger.kernel.org +Cc: Menglong Dong , Menglong Dong , "Masami Hiramatsu (Google)" , Sasha Levin +Message-ID: <20260515113226.2979191-2-sashal@kernel.org> + +From: Menglong Dong + +[ Upstream commit 2c67dc457bc67367dc8fcd8f471ce2d5bb5f7b2b ] + +For now, fgraph is used for the fprobe, even if we need trace the entry +only. However, the performance of ftrace is better than fgraph, and we +can use ftrace_ops for this case. + +Then performance of kprobe-multi increases from 54M to 69M. Before this +commit: + + $ ./benchs/run_bench_trigger.sh kprobe-multi + kprobe-multi : 54.663 ± 0.493M/s + +After this commit: + + $ ./benchs/run_bench_trigger.sh kprobe-multi + kprobe-multi : 69.447 ± 0.143M/s + +Mitigation is disable during the bench testing above. + +Link: https://lore.kernel.org/all/20251015083238.2374294-2-dongml2@chinatelecom.cn/ + +Signed-off-by: Menglong Dong +Signed-off-by: Masami Hiramatsu (Google) +Stable-dep-of: 845947aca681 ("tracing/fprobe: Remove fprobe from hash in failure path") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/fprobe.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++---- + 1 file changed, 119 insertions(+), 9 deletions(-) + +--- a/kernel/trace/fprobe.c ++++ b/kernel/trace/fprobe.c +@@ -252,8 +252,106 @@ static inline int __fprobe_kprobe_handle + return ret; + } + +-static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops, +- struct ftrace_regs *fregs) ++#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS ++/* ftrace_ops callback, this processes fprobes which have only entry_handler. */ ++static void fprobe_ftrace_entry(unsigned long ip, unsigned long parent_ip, ++ struct ftrace_ops *ops, struct ftrace_regs *fregs) ++{ ++ struct fprobe_hlist_node *node; ++ struct rhlist_head *head, *pos; ++ struct fprobe *fp; ++ int bit; ++ ++ bit = ftrace_test_recursion_trylock(ip, parent_ip); ++ if (bit < 0) ++ return; ++ ++ /* ++ * ftrace_test_recursion_trylock() disables preemption, but ++ * rhltable_lookup() checks whether rcu_read_lcok is held. ++ * So we take rcu_read_lock() here. ++ */ ++ rcu_read_lock(); ++ head = rhltable_lookup(&fprobe_ip_table, &ip, fprobe_rht_params); ++ ++ rhl_for_each_entry_rcu(node, pos, head, hlist) { ++ if (node->addr != ip) ++ break; ++ fp = READ_ONCE(node->fp); ++ if (unlikely(!fp || fprobe_disabled(fp) || fp->exit_handler)) ++ continue; ++ ++ if (fprobe_shared_with_kprobes(fp)) ++ __fprobe_kprobe_handler(ip, parent_ip, fp, fregs, NULL); ++ else ++ __fprobe_handler(ip, parent_ip, fp, fregs, NULL); ++ } ++ rcu_read_unlock(); ++ ftrace_test_recursion_unlock(bit); ++} ++NOKPROBE_SYMBOL(fprobe_ftrace_entry); ++ ++static struct ftrace_ops fprobe_ftrace_ops = { ++ .func = fprobe_ftrace_entry, ++ .flags = FTRACE_OPS_FL_SAVE_REGS, ++}; ++static int fprobe_ftrace_active; ++ ++static int fprobe_ftrace_add_ips(unsigned long *addrs, int num) ++{ ++ int ret; ++ ++ lockdep_assert_held(&fprobe_mutex); ++ ++ ret = ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 0, 0); ++ if (ret) ++ return ret; ++ ++ if (!fprobe_ftrace_active) { ++ ret = register_ftrace_function(&fprobe_ftrace_ops); ++ if (ret) { ++ ftrace_free_filter(&fprobe_ftrace_ops); ++ return ret; ++ } ++ } ++ fprobe_ftrace_active++; ++ return 0; ++} ++ ++static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num) ++{ ++ lockdep_assert_held(&fprobe_mutex); ++ ++ fprobe_ftrace_active--; ++ if (!fprobe_ftrace_active) ++ unregister_ftrace_function(&fprobe_ftrace_ops); ++ if (num) ++ ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 1, 0); ++} ++ ++static bool fprobe_is_ftrace(struct fprobe *fp) ++{ ++ return !fp->exit_handler; ++} ++#else ++static int fprobe_ftrace_add_ips(unsigned long *addrs, int num) ++{ ++ return -ENOENT; ++} ++ ++static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num) ++{ ++} ++ ++static bool fprobe_is_ftrace(struct fprobe *fp) ++{ ++ return false; ++} ++#endif ++ ++/* fgraph_ops callback, this processes fprobes which have exit_handler. */ ++static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops, ++ struct ftrace_regs *fregs) + { + unsigned long *fgraph_data = NULL; + unsigned long func = trace->func; +@@ -289,7 +387,7 @@ static int fprobe_entry(struct ftrace_gr + if (node->addr != func) + continue; + fp = READ_ONCE(node->fp); +- if (fp && !fprobe_disabled(fp)) ++ if (fp && !fprobe_disabled(fp) && !fprobe_is_ftrace(fp)) + fp->nmissed++; + } + return 0; +@@ -309,7 +407,7 @@ static int fprobe_entry(struct ftrace_gr + if (node->addr != func) + continue; + fp = READ_ONCE(node->fp); +- if (!fp || fprobe_disabled(fp)) ++ if (unlikely(!fp || fprobe_disabled(fp) || fprobe_is_ftrace(fp))) + continue; + + data_size = fp->entry_data_size; +@@ -337,7 +435,7 @@ static int fprobe_entry(struct ftrace_gr + /* If any exit_handler is set, data must be used. */ + return used != 0; + } +-NOKPROBE_SYMBOL(fprobe_entry); ++NOKPROBE_SYMBOL(fprobe_fgraph_entry); + + static void fprobe_return(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops, +@@ -376,7 +474,7 @@ static void fprobe_return(struct ftrace_ + NOKPROBE_SYMBOL(fprobe_return); + + static struct fgraph_ops fprobe_graph_ops = { +- .entryfunc = fprobe_entry, ++ .entryfunc = fprobe_fgraph_entry, + .retfunc = fprobe_return, + }; + static int fprobe_graph_active; +@@ -498,9 +596,14 @@ static int fprobe_module_callback(struct + } while (node == ERR_PTR(-EAGAIN)); + rhashtable_walk_exit(&iter); + +- if (alist.index > 0) ++ if (alist.index > 0) { + ftrace_set_filter_ips(&fprobe_graph_ops.ops, + alist.addrs, alist.index, 1, 0); ++#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS ++ ftrace_set_filter_ips(&fprobe_ftrace_ops, ++ alist.addrs, alist.index, 1, 0); ++#endif ++ } + mutex_unlock(&fprobe_mutex); + + kfree(alist.addrs); +@@ -735,7 +838,11 @@ int register_fprobe_ips(struct fprobe *f + return ret; + + hlist_array = fp->hlist_array; +- ret = fprobe_graph_add_ips(addrs, num); ++ if (fprobe_is_ftrace(fp)) ++ ret = fprobe_ftrace_add_ips(addrs, num); ++ else ++ ret = fprobe_graph_add_ips(addrs, num); ++ + if (!ret) { + add_fprobe_hash(fp); + for (i = 0; i < hlist_array->size; i++) { +@@ -830,7 +937,10 @@ int unregister_fprobe(struct fprobe *fp) + } + del_fprobe_hash(fp); + +- fprobe_graph_remove_ips(addrs, count); ++ if (fprobe_is_ftrace(fp)) ++ fprobe_ftrace_remove_ips(addrs, count); ++ else ++ fprobe_graph_remove_ips(addrs, count); + + kfree_rcu(hlist_array, rcu); + fp->hlist_array = NULL; diff --git a/queue-6.18/tracing-fprobe-remove-fprobe-from-hash-in-failure-path.patch b/queue-6.18/tracing-fprobe-remove-fprobe-from-hash-in-failure-path.patch new file mode 100644 index 0000000000..a1b5e86ce9 --- /dev/null +++ b/queue-6.18/tracing-fprobe-remove-fprobe-from-hash-in-failure-path.patch @@ -0,0 +1,187 @@ +From stable+bounces-247672-greg=kroah.com@vger.kernel.org Fri May 15 13:42:00 2026 +From: Sasha Levin +Date: Fri, 15 May 2026 07:32:26 -0400 +Subject: tracing/fprobe: Remove fprobe from hash in failure path +To: stable@vger.kernel.org +Cc: "Masami Hiramatsu (Google)" , Sasha Levin +Message-ID: <20260515113226.2979191-4-sashal@kernel.org> + +From: "Masami Hiramatsu (Google)" + +[ Upstream commit 845947aca6814f5723ed65e556eb5ee09493f05b ] + +When register_fprobe_ips() fails, it tries to remove a list of +fprobe_hash_node from fprobe_ip_table, but it missed to remove +fprobe itself from fprobe_table. Moreover, when removing +the fprobe_hash_node which is added to rhltable once, it must +use kfree_rcu() after removing from rhltable. + +To fix these issues, this reuses unregister_fprobe() internal +code to rollback the half-way registered fprobe. + +Link: https://lore.kernel.org/all/177669366417.132053.17874946321744910456.stgit@mhiramat.tok.corp.google.com/ + +Fixes: 4346ba160409 ("fprobe: Rewrite fprobe on function-graph tracer") +Cc: stable@vger.kernel.org +Signed-off-by: Masami Hiramatsu (Google) +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/fprobe.c | 90 ++++++++++++++++++++++++++------------------------ + 1 file changed, 47 insertions(+), 43 deletions(-) + +--- a/kernel/trace/fprobe.c ++++ b/kernel/trace/fprobe.c +@@ -78,20 +78,27 @@ static const struct rhashtable_params fp + }; + + /* Node insertion and deletion requires the fprobe_mutex */ +-static int insert_fprobe_node(struct fprobe_hlist_node *node) ++static int insert_fprobe_node(struct fprobe_hlist_node *node, struct fprobe *fp) + { ++ int ret; ++ + lockdep_assert_held(&fprobe_mutex); + +- return rhltable_insert(&fprobe_ip_table, &node->hlist, fprobe_rht_params); ++ ret = rhltable_insert(&fprobe_ip_table, &node->hlist, fprobe_rht_params); ++ /* Set the fprobe pointer if insertion was successful. */ ++ if (!ret) ++ WRITE_ONCE(node->fp, fp); ++ return ret; + } + + /* Return true if there are synonims */ + static bool delete_fprobe_node(struct fprobe_hlist_node *node) + { +- lockdep_assert_held(&fprobe_mutex); + bool ret; + +- /* Avoid double deleting */ ++ lockdep_assert_held(&fprobe_mutex); ++ ++ /* Avoid double deleting and non-inserted nodes */ + if (READ_ONCE(node->fp) != NULL) { + WRITE_ONCE(node->fp, NULL); + rhltable_remove(&fprobe_ip_table, &node->hlist, +@@ -748,7 +755,6 @@ static int fprobe_init(struct fprobe *fp + fp->hlist_array = hlist_array; + hlist_array->fp = fp; + for (i = 0; i < num; i++) { +- hlist_array->array[i].fp = fp; + addr = ftrace_location(addrs[i]); + if (!addr) { + fprobe_fail_cleanup(fp); +@@ -812,6 +818,8 @@ int register_fprobe(struct fprobe *fp, c + } + EXPORT_SYMBOL_GPL(register_fprobe); + ++static int unregister_fprobe_nolock(struct fprobe *fp); ++ + /** + * register_fprobe_ips() - Register fprobe to ftrace by address. + * @fp: A fprobe data structure to be registered. +@@ -838,28 +846,25 @@ int register_fprobe_ips(struct fprobe *f + if (ret) + return ret; + +- hlist_array = fp->hlist_array; + if (fprobe_is_ftrace(fp)) + ret = fprobe_ftrace_add_ips(addrs, num); + else + ret = fprobe_graph_add_ips(addrs, num); +- +- if (!ret) { +- add_fprobe_hash(fp); +- for (i = 0; i < hlist_array->size; i++) { +- ret = insert_fprobe_node(&hlist_array->array[i]); +- if (ret) +- break; +- } +- /* fallback on insert error */ +- if (ret) { +- for (i--; i >= 0; i--) +- delete_fprobe_node(&hlist_array->array[i]); +- } ++ if (ret) { ++ fprobe_fail_cleanup(fp); ++ return ret; + } + +- if (ret) +- fprobe_fail_cleanup(fp); ++ hlist_array = fp->hlist_array; ++ ret = add_fprobe_hash(fp); ++ for (i = 0; i < hlist_array->size && !ret; i++) ++ ret = insert_fprobe_node(&hlist_array->array[i], fp); ++ ++ if (ret) { ++ unregister_fprobe_nolock(fp); ++ /* In error case, wait for clean up safely. */ ++ synchronize_rcu(); ++ } + + return ret; + } +@@ -903,27 +908,12 @@ bool fprobe_is_registered(struct fprobe + return true; + } + +-/** +- * unregister_fprobe() - Unregister fprobe. +- * @fp: A fprobe data structure to be unregistered. +- * +- * Unregister fprobe (and remove ftrace hooks from the function entries). +- * +- * Return 0 if @fp is unregistered successfully, -errno if not. +- */ +-int unregister_fprobe(struct fprobe *fp) ++static int unregister_fprobe_nolock(struct fprobe *fp) + { +- struct fprobe_hlist *hlist_array; ++ struct fprobe_hlist *hlist_array = fp->hlist_array; + unsigned long *addrs = NULL; +- int ret = 0, i, count; ++ int i, count; + +- mutex_lock(&fprobe_mutex); +- if (!fp || !fprobe_registered(fp)) { +- ret = -EINVAL; +- goto out; +- } +- +- hlist_array = fp->hlist_array; + addrs = kcalloc(hlist_array->size, sizeof(unsigned long), GFP_KERNEL); + /* + * This will remove fprobe_hash_node from the hash table even if +@@ -949,12 +939,26 @@ int unregister_fprobe(struct fprobe *fp) + + kfree_rcu(hlist_array, rcu); + fp->hlist_array = NULL; ++ kfree(addrs); + +-out: +- mutex_unlock(&fprobe_mutex); ++ return 0; ++} + +- kfree(addrs); +- return ret; ++/** ++ * unregister_fprobe() - Unregister fprobe. ++ * @fp: A fprobe data structure to be unregistered. ++ * ++ * Unregister fprobe (and remove ftrace hooks from the function entries). ++ * ++ * Return 0 if @fp is unregistered successfully, -errno if not. ++ */ ++int unregister_fprobe(struct fprobe *fp) ++{ ++ guard(mutex)(&fprobe_mutex); ++ if (!fp || !fprobe_registered(fp)) ++ return -EINVAL; ++ ++ return unregister_fprobe_nolock(fp); + } + EXPORT_SYMBOL_GPL(unregister_fprobe); + diff --git a/queue-6.18/tracing-fprobe-unregister-fprobe-even-if-memory-allocation-fails.patch b/queue-6.18/tracing-fprobe-unregister-fprobe-even-if-memory-allocation-fails.patch new file mode 100644 index 0000000000..da11e27d56 --- /dev/null +++ b/queue-6.18/tracing-fprobe-unregister-fprobe-even-if-memory-allocation-fails.patch @@ -0,0 +1,87 @@ +From stable+bounces-247671-greg=kroah.com@vger.kernel.org Fri May 15 13:59:12 2026 +From: Sasha Levin +Date: Fri, 15 May 2026 07:32:25 -0400 +Subject: tracing/fprobe: Unregister fprobe even if memory allocation fails +To: stable@vger.kernel.org +Cc: "Masami Hiramatsu (Google)" , Sasha Levin +Message-ID: <20260515113226.2979191-3-sashal@kernel.org> + +From: "Masami Hiramatsu (Google)" + +[ Upstream commit 1aec9e5c3e31ce1e28f914427fb7f90b91d310df ] + +unregister_fprobe() can fail under memory pressure because of memory +allocation failure, but this maybe called from module unloading, and +usually there is no way to retry it. Moreover. trace_fprobe does not +check the return value. + +To fix this problem, unregister fprobe and fprobe_hash_node even if +working memory allocation fails. +Anyway, if the last fprobe is removed, the filter will be freed. + +Link: https://lore.kernel.org/all/177669365629.132053.8433032896213721288.stgit@mhiramat.tok.corp.google.com/ + +Fixes: 4346ba160409 ("fprobe: Rewrite fprobe on function-graph tracer") +Cc: stable@vger.kernel.org +Signed-off-by: Masami Hiramatsu (Google) +Stable-dep-of: 845947aca681 ("tracing/fprobe: Remove fprobe from hash in failure path") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/fprobe.c | 25 +++++++++++++++---------- + 1 file changed, 15 insertions(+), 10 deletions(-) + +--- a/kernel/trace/fprobe.c ++++ b/kernel/trace/fprobe.c +@@ -323,9 +323,10 @@ static void fprobe_ftrace_remove_ips(uns + lockdep_assert_held(&fprobe_mutex); + + fprobe_ftrace_active--; +- if (!fprobe_ftrace_active) ++ if (!fprobe_ftrace_active) { + unregister_ftrace_function(&fprobe_ftrace_ops); +- if (num) ++ ftrace_free_filter(&fprobe_ftrace_ops); ++ } else if (num) + ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 1, 0); + } + +@@ -508,10 +509,10 @@ static void fprobe_graph_remove_ips(unsi + + fprobe_graph_active--; + /* Q: should we unregister it ? */ +- if (!fprobe_graph_active) ++ if (!fprobe_graph_active) { + unregister_ftrace_graph(&fprobe_graph_ops); +- +- if (num) ++ ftrace_free_filter(&fprobe_graph_ops.ops); ++ } else if (num) + ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0); + } + +@@ -924,15 +925,19 @@ int unregister_fprobe(struct fprobe *fp) + + hlist_array = fp->hlist_array; + addrs = kcalloc(hlist_array->size, sizeof(unsigned long), GFP_KERNEL); +- if (!addrs) { +- ret = -ENOMEM; /* TODO: Fallback to one-by-one loop */ +- goto out; +- } ++ /* ++ * This will remove fprobe_hash_node from the hash table even if ++ * memory allocation fails. However, ftrace_ops will not be updated. ++ * Anyway, when the last fprobe is unregistered, ftrace_ops is also ++ * unregistered. ++ */ ++ if (!addrs) ++ pr_warn("Failed to allocate working array. ftrace_ops may not sync.\n"); + + /* Remove non-synonim ips from table and hash */ + count = 0; + for (i = 0; i < hlist_array->size; i++) { +- if (!delete_fprobe_node(&hlist_array->array[i])) ++ if (!delete_fprobe_node(&hlist_array->array[i]) && addrs) + addrs[count++] = hlist_array->array[i].addr; + } + del_fprobe_hash(fp); diff --git a/queue-6.18/tracing-fprobe-use-rhltable-for-fprobe_ip_table.patch b/queue-6.18/tracing-fprobe-use-rhltable-for-fprobe_ip_table.patch new file mode 100644 index 0000000000..478c9f4813 --- /dev/null +++ b/queue-6.18/tracing-fprobe-use-rhltable-for-fprobe_ip_table.patch @@ -0,0 +1,324 @@ +From stable+bounces-247669-greg=kroah.com@vger.kernel.org Fri May 15 14:11:05 2026 +From: Sasha Levin +Date: Fri, 15 May 2026 07:32:23 -0400 +Subject: tracing: fprobe: use rhltable for fprobe_ip_table +To: stable@vger.kernel.org +Cc: Menglong Dong , Menglong Dong , "Masami Hiramatsu (Google)" , Sasha Levin +Message-ID: <20260515113226.2979191-1-sashal@kernel.org> + +From: Menglong Dong + +[ Upstream commit 0de4c70d04a46a3c266547dd4275ce25f623796a ] + +For now, all the kernel functions who are hooked by the fprobe will be +added to the hash table "fprobe_ip_table". The key of it is the function +address, and the value of it is "struct fprobe_hlist_node". + +The budget of the hash table is FPROBE_IP_TABLE_SIZE, which is 256. And +this means the overhead of the hash table lookup will grow linearly if +the count of the functions in the fprobe more than 256. When we try to +hook all the kernel functions, the overhead will be huge. + +Therefore, replace the hash table with rhltable to reduce the overhead. + +Link: https://lore.kernel.org/all/20250819031825.55653-1-dongml2@chinatelecom.cn/ + +Signed-off-by: Menglong Dong +Signed-off-by: Masami Hiramatsu (Google) +Stable-dep-of: 845947aca681 ("tracing/fprobe: Remove fprobe from hash in failure path") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/fprobe.h | 3 + kernel/trace/fprobe.c | 157 ++++++++++++++++++++++++++++--------------------- + 2 files changed, 93 insertions(+), 67 deletions(-) + +--- a/include/linux/fprobe.h ++++ b/include/linux/fprobe.h +@@ -7,6 +7,7 @@ + #include + #include + #include ++#include + #include + + struct fprobe; +@@ -26,7 +27,7 @@ typedef void (*fprobe_exit_cb)(struct fp + * @fp: The fprobe which owns this. + */ + struct fprobe_hlist_node { +- struct hlist_node hlist; ++ struct rhlist_head hlist; + unsigned long addr; + struct fprobe *fp; + }; +--- a/kernel/trace/fprobe.c ++++ b/kernel/trace/fprobe.c +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -42,60 +43,67 @@ + * - RCU hlist traversal under disabling preempt + */ + static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE]; +-static struct hlist_head fprobe_ip_table[FPROBE_IP_TABLE_SIZE]; ++static struct rhltable fprobe_ip_table; + static DEFINE_MUTEX(fprobe_mutex); + +-/* +- * Find first fprobe in the hlist. It will be iterated twice in the entry +- * probe, once for correcting the total required size, the second time is +- * calling back the user handlers. +- * Thus the hlist in the fprobe_table must be sorted and new probe needs to +- * be added *before* the first fprobe. +- */ +-static struct fprobe_hlist_node *find_first_fprobe_node(unsigned long ip) ++static u32 fprobe_node_hashfn(const void *data, u32 len, u32 seed) + { +- struct fprobe_hlist_node *node; +- struct hlist_head *head; ++ return hash_ptr(*(unsigned long **)data, 32); ++} + +- head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)]; +- hlist_for_each_entry_rcu(node, head, hlist, +- lockdep_is_held(&fprobe_mutex)) { +- if (node->addr == ip) +- return node; +- } +- return NULL; ++static int fprobe_node_cmp(struct rhashtable_compare_arg *arg, ++ const void *ptr) ++{ ++ unsigned long key = *(unsigned long *)arg->key; ++ const struct fprobe_hlist_node *n = ptr; ++ ++ return n->addr != key; + } +-NOKPROBE_SYMBOL(find_first_fprobe_node); + +-/* Node insertion and deletion requires the fprobe_mutex */ +-static void insert_fprobe_node(struct fprobe_hlist_node *node) ++static u32 fprobe_node_obj_hashfn(const void *data, u32 len, u32 seed) + { +- unsigned long ip = node->addr; +- struct fprobe_hlist_node *next; +- struct hlist_head *head; ++ const struct fprobe_hlist_node *n = data; ++ ++ return hash_ptr((void *)n->addr, 32); ++} ++ ++static const struct rhashtable_params fprobe_rht_params = { ++ .head_offset = offsetof(struct fprobe_hlist_node, hlist), ++ .key_offset = offsetof(struct fprobe_hlist_node, addr), ++ .key_len = sizeof_field(struct fprobe_hlist_node, addr), ++ .hashfn = fprobe_node_hashfn, ++ .obj_hashfn = fprobe_node_obj_hashfn, ++ .obj_cmpfn = fprobe_node_cmp, ++ .automatic_shrinking = true, ++}; + ++/* Node insertion and deletion requires the fprobe_mutex */ ++static int insert_fprobe_node(struct fprobe_hlist_node *node) ++{ + lockdep_assert_held(&fprobe_mutex); + +- next = find_first_fprobe_node(ip); +- if (next) { +- hlist_add_before_rcu(&node->hlist, &next->hlist); +- return; +- } +- head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)]; +- hlist_add_head_rcu(&node->hlist, head); ++ return rhltable_insert(&fprobe_ip_table, &node->hlist, fprobe_rht_params); + } + + /* Return true if there are synonims */ + static bool delete_fprobe_node(struct fprobe_hlist_node *node) + { + lockdep_assert_held(&fprobe_mutex); ++ bool ret; + + /* Avoid double deleting */ + if (READ_ONCE(node->fp) != NULL) { + WRITE_ONCE(node->fp, NULL); +- hlist_del_rcu(&node->hlist); ++ rhltable_remove(&fprobe_ip_table, &node->hlist, ++ fprobe_rht_params); + } +- return !!find_first_fprobe_node(node->addr); ++ ++ rcu_read_lock(); ++ ret = !!rhltable_lookup(&fprobe_ip_table, &node->addr, ++ fprobe_rht_params); ++ rcu_read_unlock(); ++ ++ return ret; + } + + /* Check existence of the fprobe */ +@@ -247,9 +255,10 @@ static inline int __fprobe_kprobe_handle + static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops, + struct ftrace_regs *fregs) + { +- struct fprobe_hlist_node *node, *first; + unsigned long *fgraph_data = NULL; + unsigned long func = trace->func; ++ struct fprobe_hlist_node *node; ++ struct rhlist_head *head, *pos; + unsigned long ret_ip; + int reserved_words; + struct fprobe *fp; +@@ -258,14 +267,11 @@ static int fprobe_entry(struct ftrace_gr + if (WARN_ON_ONCE(!fregs)) + return 0; + +- first = node = find_first_fprobe_node(func); +- if (unlikely(!first)) +- return 0; +- ++ head = rhltable_lookup(&fprobe_ip_table, &func, fprobe_rht_params); + reserved_words = 0; +- hlist_for_each_entry_from_rcu(node, hlist) { ++ rhl_for_each_entry_rcu(node, pos, head, hlist) { + if (node->addr != func) +- break; ++ continue; + fp = READ_ONCE(node->fp); + if (!fp || !fp->exit_handler) + continue; +@@ -276,13 +282,12 @@ static int fprobe_entry(struct ftrace_gr + reserved_words += + FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(fp->entry_data_size); + } +- node = first; + if (reserved_words) { + fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * sizeof(long)); + if (unlikely(!fgraph_data)) { +- hlist_for_each_entry_from_rcu(node, hlist) { ++ rhl_for_each_entry_rcu(node, pos, head, hlist) { + if (node->addr != func) +- break; ++ continue; + fp = READ_ONCE(node->fp); + if (fp && !fprobe_disabled(fp)) + fp->nmissed++; +@@ -297,12 +302,12 @@ static int fprobe_entry(struct ftrace_gr + */ + ret_ip = ftrace_regs_get_return_address(fregs); + used = 0; +- hlist_for_each_entry_from_rcu(node, hlist) { ++ rhl_for_each_entry_rcu(node, pos, head, hlist) { + int data_size; + void *data; + + if (node->addr != func) +- break; ++ continue; + fp = READ_ONCE(node->fp); + if (!fp || fprobe_disabled(fp)) + continue; +@@ -447,25 +452,21 @@ static int fprobe_addr_list_add(struct f + return 0; + } + +-static void fprobe_remove_node_in_module(struct module *mod, struct hlist_head *head, +- struct fprobe_addr_list *alist) ++static void fprobe_remove_node_in_module(struct module *mod, struct fprobe_hlist_node *node, ++ struct fprobe_addr_list *alist) + { +- struct fprobe_hlist_node *node; + int ret = 0; + +- hlist_for_each_entry_rcu(node, head, hlist, +- lockdep_is_held(&fprobe_mutex)) { +- if (!within_module(node->addr, mod)) +- continue; +- if (delete_fprobe_node(node)) +- continue; +- /* +- * If failed to update alist, just continue to update hlist. +- * Therefore, at list user handler will not hit anymore. +- */ +- if (!ret) +- ret = fprobe_addr_list_add(alist, node->addr); +- } ++ if (!within_module(node->addr, mod)) ++ return; ++ if (delete_fprobe_node(node)) ++ return; ++ /* ++ * If failed to update alist, just continue to update hlist. ++ * Therefore, at list user handler will not hit anymore. ++ */ ++ if (!ret) ++ ret = fprobe_addr_list_add(alist, node->addr); + } + + /* Handle module unloading to manage fprobe_ip_table. */ +@@ -473,8 +474,9 @@ static int fprobe_module_callback(struct + unsigned long val, void *data) + { + struct fprobe_addr_list alist = {.size = FPROBE_IPS_BATCH_INIT}; ++ struct fprobe_hlist_node *node; ++ struct rhashtable_iter iter; + struct module *mod = data; +- int i; + + if (val != MODULE_STATE_GOING) + return NOTIFY_DONE; +@@ -485,8 +487,16 @@ static int fprobe_module_callback(struct + return NOTIFY_DONE; + + mutex_lock(&fprobe_mutex); +- for (i = 0; i < FPROBE_IP_TABLE_SIZE; i++) +- fprobe_remove_node_in_module(mod, &fprobe_ip_table[i], &alist); ++ rhltable_walk_enter(&fprobe_ip_table, &iter); ++ do { ++ rhashtable_walk_start(&iter); ++ ++ while ((node = rhashtable_walk_next(&iter)) && !IS_ERR(node)) ++ fprobe_remove_node_in_module(mod, node, &alist); ++ ++ rhashtable_walk_stop(&iter); ++ } while (node == ERR_PTR(-EAGAIN)); ++ rhashtable_walk_exit(&iter); + + if (alist.index > 0) + ftrace_set_filter_ips(&fprobe_graph_ops.ops, +@@ -728,8 +738,16 @@ int register_fprobe_ips(struct fprobe *f + ret = fprobe_graph_add_ips(addrs, num); + if (!ret) { + add_fprobe_hash(fp); +- for (i = 0; i < hlist_array->size; i++) +- insert_fprobe_node(&hlist_array->array[i]); ++ for (i = 0; i < hlist_array->size; i++) { ++ ret = insert_fprobe_node(&hlist_array->array[i]); ++ if (ret) ++ break; ++ } ++ /* fallback on insert error */ ++ if (ret) { ++ for (i--; i >= 0; i--) ++ delete_fprobe_node(&hlist_array->array[i]); ++ } + } + + if (ret) +@@ -824,3 +842,10 @@ out: + return ret; + } + EXPORT_SYMBOL_GPL(unregister_fprobe); ++ ++static int __init fprobe_initcall(void) ++{ ++ rhltable_init(&fprobe_ip_table, &fprobe_rht_params); ++ return 0; ++} ++late_initcall(fprobe_initcall);