From: Greg Kroah-Hartman Date: Mon, 24 Feb 2025 14:08:07 +0000 (+0100) Subject: 5.4-stable patches X-Git-Tag: v6.6.80~13 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=239cfd9e2fe7696a7d0c8805f1e99b186b82c551;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: batman-adv-drop-unmanaged-elp-metric-worker.patch batman-adv-ignore-neighbor-throughput-metrics-in-error-case.patch --- diff --git a/queue-5.4/batman-adv-drop-unmanaged-elp-metric-worker.patch b/queue-5.4/batman-adv-drop-unmanaged-elp-metric-worker.patch new file mode 100644 index 0000000000..b948e7f42f --- /dev/null +++ b/queue-5.4/batman-adv-drop-unmanaged-elp-metric-worker.patch @@ -0,0 +1,256 @@ +From 8c8ecc98f5c65947b0070a24bac11e12e47cc65d Mon Sep 17 00:00:00 2001 +From: Sven Eckelmann +Date: Mon, 20 Jan 2025 00:06:11 +0100 +Subject: batman-adv: Drop unmanaged ELP metric worker + +From: Sven Eckelmann + +commit 8c8ecc98f5c65947b0070a24bac11e12e47cc65d upstream. + +The ELP worker needs to calculate new metric values for all neighbors +"reachable" over an interface. Some of the used metric sources require +locks which might need to sleep. This sleep is incompatible with the RCU +list iterator used for the recorded neighbors. The initial approach to work +around of this problem was to queue another work item per neighbor and then +run this in a new context. + +Even when this solved the RCU vs might_sleep() conflict, it has a major +problems: Nothing was stopping the work item in case it is not needed +anymore - for example because one of the related interfaces was removed or +the batman-adv module was unloaded - resulting in potential invalid memory +accesses. + +Directly canceling the metric worker also has various problems: + +* cancel_work_sync for a to-be-deactivated interface is called with + rtnl_lock held. But the code in the ELP metric worker also tries to use + rtnl_lock() - which will never return in this case. This also means that + cancel_work_sync would never return because it is waiting for the worker + to finish. +* iterating over the neighbor list for the to-be-deactivated interface is + currently done using the RCU specific methods. Which means that it is + possible to miss items when iterating over it without the associated + spinlock - a behaviour which is acceptable for a periodic metric check + but not for a cleanup routine (which must "stop" all still running + workers) + +The better approch is to get rid of the per interface neighbor metric +worker and handle everything in the interface worker. The original problems +are solved by: + +* creating a list of neighbors which require new metric information inside + the RCU protected context, gathering the metric according to the new list + outside the RCU protected context +* only use rcu_trylock inside metric gathering code to avoid a deadlock + when the cancel_delayed_work_sync is called in the interface removal code + (which is called with the rtnl_lock held) + +Cc: stable@vger.kernel.org +Fixes: c833484e5f38 ("batman-adv: ELP - compute the metric based on the estimated throughput") +Signed-off-by: Sven Eckelmann +Signed-off-by: Simon Wunderlich +Signed-off-by: Sven Eckelmann +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/bat_v.c | 2 - + net/batman-adv/bat_v_elp.c | 74 ++++++++++++++++++++++++++++++--------------- + net/batman-adv/bat_v_elp.h | 2 - + net/batman-adv/types.h | 3 - + 4 files changed, 50 insertions(+), 31 deletions(-) + +--- a/net/batman-adv/bat_v.c ++++ b/net/batman-adv/bat_v.c +@@ -115,8 +115,6 @@ static void + batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh) + { + ewma_throughput_init(&hardif_neigh->bat_v.throughput); +- INIT_WORK(&hardif_neigh->bat_v.metric_work, +- batadv_v_elp_throughput_metric_update); + } + + #ifdef CONFIG_BATMAN_ADV_DEBUGFS +--- a/net/batman-adv/bat_v_elp.c ++++ b/net/batman-adv/bat_v_elp.c +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -25,6 +26,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -41,6 +43,18 @@ + #include "send.h" + + /** ++ * struct batadv_v_metric_queue_entry - list of hardif neighbors which require ++ * and metric update ++ */ ++struct batadv_v_metric_queue_entry { ++ /** @hardif_neigh: hardif neighbor scheduled for metric update */ ++ struct batadv_hardif_neigh_node *hardif_neigh; ++ ++ /** @list: list node for metric_queue */ ++ struct list_head list; ++}; ++ ++/** + * batadv_v_elp_start_timer() - restart timer for ELP periodic work + * @hard_iface: the interface for which the timer has to be reset + */ +@@ -126,11 +140,19 @@ static bool batadv_v_elp_get_throughput( + return true; + } + ++ memset(&link_settings, 0, sizeof(link_settings)); ++ ++ /* only use rtnl_trylock because the elp worker will be cancelled while ++ * the rntl_lock is held. the cancel_delayed_work_sync() would otherwise ++ * wait forever when the elp work_item was started and it is then also ++ * trying to rtnl_lock ++ */ ++ if (!rtnl_trylock()) ++ return false; ++ + /* if not a wifi interface, check if this device provides data via + * ethtool (e.g. an Ethernet adapter) + */ +- memset(&link_settings, 0, sizeof(link_settings)); +- rtnl_lock(); + ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings); + rtnl_unlock(); + if (ret == 0) { +@@ -165,31 +187,19 @@ default_throughput: + /** + * batadv_v_elp_throughput_metric_update() - worker updating the throughput + * metric of a single hop neighbour +- * @work: the work queue item ++ * @neigh: the neighbour to probe + */ +-void batadv_v_elp_throughput_metric_update(struct work_struct *work) ++static void ++batadv_v_elp_throughput_metric_update(struct batadv_hardif_neigh_node *neigh) + { +- struct batadv_hardif_neigh_node_bat_v *neigh_bat_v; +- struct batadv_hardif_neigh_node *neigh; + u32 throughput; + bool valid; + +- neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v, +- metric_work); +- neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node, +- bat_v); +- + valid = batadv_v_elp_get_throughput(neigh, &throughput); + if (!valid) +- goto put_neigh; ++ return; + + ewma_throughput_add(&neigh->bat_v.throughput, throughput); +- +-put_neigh: +- /* decrement refcounter to balance increment performed before scheduling +- * this task +- */ +- batadv_hardif_neigh_put(neigh); + } + + /** +@@ -263,14 +273,16 @@ batadv_v_elp_wifi_neigh_probe(struct bat + */ + static void batadv_v_elp_periodic_work(struct work_struct *work) + { ++ struct batadv_v_metric_queue_entry *metric_entry; ++ struct batadv_v_metric_queue_entry *metric_safe; + struct batadv_hardif_neigh_node *hardif_neigh; + struct batadv_hard_iface *hard_iface; + struct batadv_hard_iface_bat_v *bat_v; + struct batadv_elp_packet *elp_packet; ++ struct list_head metric_queue; + struct batadv_priv *bat_priv; + struct sk_buff *skb; + u32 elp_interval; +- bool ret; + + bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work); + hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v); +@@ -306,6 +318,8 @@ static void batadv_v_elp_periodic_work(s + + atomic_inc(&hard_iface->bat_v.elp_seqno); + ++ INIT_LIST_HEAD(&metric_queue); ++ + /* The throughput metric is updated on each sent packet. This way, if a + * node is dead and no longer sends packets, batman-adv is still able to + * react timely to its death. +@@ -330,16 +344,28 @@ static void batadv_v_elp_periodic_work(s + + /* Reading the estimated throughput from cfg80211 is a task that + * may sleep and that is not allowed in an rcu protected +- * context. Therefore schedule a task for that. ++ * context. Therefore add it to metric_queue and process it ++ * outside rcu protected context. + */ +- ret = queue_work(batadv_event_workqueue, +- &hardif_neigh->bat_v.metric_work); +- +- if (!ret) ++ metric_entry = kzalloc(sizeof(*metric_entry), GFP_ATOMIC); ++ if (!metric_entry) { + batadv_hardif_neigh_put(hardif_neigh); ++ continue; ++ } ++ ++ metric_entry->hardif_neigh = hardif_neigh; ++ list_add(&metric_entry->list, &metric_queue); + } + rcu_read_unlock(); + ++ list_for_each_entry_safe(metric_entry, metric_safe, &metric_queue, list) { ++ batadv_v_elp_throughput_metric_update(metric_entry->hardif_neigh); ++ ++ batadv_hardif_neigh_put(metric_entry->hardif_neigh); ++ list_del(&metric_entry->list); ++ kfree(metric_entry); ++ } ++ + restart_timer: + batadv_v_elp_start_timer(hard_iface); + out: +--- a/net/batman-adv/bat_v_elp.h ++++ b/net/batman-adv/bat_v_elp.h +@@ -10,7 +10,6 @@ + #include "main.h" + + #include +-#include + + int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface); + void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface); +@@ -19,6 +18,5 @@ void batadv_v_elp_iface_activate(struct + void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface); + int batadv_v_elp_packet_recv(struct sk_buff *skb, + struct batadv_hard_iface *if_incoming); +-void batadv_v_elp_throughput_metric_update(struct work_struct *work); + + #endif /* _NET_BATMAN_ADV_BAT_V_ELP_H_ */ +--- a/net/batman-adv/types.h ++++ b/net/batman-adv/types.h +@@ -603,9 +603,6 @@ struct batadv_hardif_neigh_node_bat_v { + * neighbor + */ + unsigned long last_unicast_tx; +- +- /** @metric_work: work queue callback item for metric update */ +- struct work_struct metric_work; + }; + + /** diff --git a/queue-5.4/batman-adv-ignore-neighbor-throughput-metrics-in-error-case.patch b/queue-5.4/batman-adv-ignore-neighbor-throughput-metrics-in-error-case.patch new file mode 100644 index 0000000000..c63680ef6b --- /dev/null +++ b/queue-5.4/batman-adv-ignore-neighbor-throughput-metrics-in-error-case.patch @@ -0,0 +1,127 @@ +From e7e34ffc976aaae4f465b7898303241b81ceefc3 Mon Sep 17 00:00:00 2001 +From: Sven Eckelmann +Date: Mon, 20 Jan 2025 20:35:28 +0100 +Subject: batman-adv: Ignore neighbor throughput metrics in error case + +From: Sven Eckelmann + +commit e7e34ffc976aaae4f465b7898303241b81ceefc3 upstream. + +If a temporary error happened in the evaluation of the neighbor throughput +information, then the invalid throughput result should not be stored in the +throughtput EWMA. + +Cc: stable@vger.kernel.org +Signed-off-by: Sven Eckelmann +Signed-off-by: Simon Wunderlich +Signed-off-by: Sven Eckelmann +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/bat_v_elp.c | 41 ++++++++++++++++++++++++++++------------- + 1 file changed, 28 insertions(+), 13 deletions(-) + +--- a/net/batman-adv/bat_v_elp.c ++++ b/net/batman-adv/bat_v_elp.c +@@ -58,11 +58,13 @@ static void batadv_v_elp_start_timer(str + /** + * batadv_v_elp_get_throughput() - get the throughput towards a neighbour + * @neigh: the neighbour for which the throughput has to be obtained ++ * @pthroughput: calculated throughput towards the given neighbour in multiples ++ * of 100kpbs (a value of '1' equals 0.1Mbps, '10' equals 1Mbps, etc). + * +- * Return: The throughput towards the given neighbour in multiples of 100kpbs +- * (a value of '1' equals to 0.1Mbps, '10' equals 1Mbps, etc). ++ * Return: true when value behind @pthroughput was set + */ +-static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) ++static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh, ++ u32 *pthroughput) + { + struct batadv_hard_iface *hard_iface = neigh->if_incoming; + struct net_device *soft_iface = hard_iface->soft_iface; +@@ -76,14 +78,16 @@ static u32 batadv_v_elp_get_throughput(s + * batman-adv interface + */ + if (!soft_iface) +- return BATADV_THROUGHPUT_DEFAULT_VALUE; ++ return false; + + /* if the user specified a customised value for this interface, then + * return it directly + */ + throughput = atomic_read(&hard_iface->bat_v.throughput_override); +- if (throughput != 0) +- return throughput; ++ if (throughput != 0) { ++ *pthroughput = throughput; ++ return true; ++ } + + /* if this is a wireless device, then ask its throughput through + * cfg80211 API +@@ -110,14 +114,16 @@ static u32 batadv_v_elp_get_throughput(s + * possible to delete this neighbor. For now set + * the throughput metric to 0. + */ +- return 0; ++ *pthroughput = 0; ++ return true; + } + if (ret) + goto default_throughput; + if (!(sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT))) + goto default_throughput; + +- return sinfo.expected_throughput / 100; ++ *pthroughput = sinfo.expected_throughput / 100; ++ return true; + } + + /* if not a wifi interface, check if this device provides data via +@@ -135,8 +141,10 @@ static u32 batadv_v_elp_get_throughput(s + hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX; + + throughput = link_settings.base.speed; +- if (throughput && throughput != SPEED_UNKNOWN) +- return throughput * 10; ++ if (throughput && throughput != SPEED_UNKNOWN) { ++ *pthroughput = throughput * 10; ++ return true; ++ } + } + + default_throughput: +@@ -150,7 +158,8 @@ default_throughput: + } + + /* if none of the above cases apply, return the base_throughput */ +- return BATADV_THROUGHPUT_DEFAULT_VALUE; ++ *pthroughput = BATADV_THROUGHPUT_DEFAULT_VALUE; ++ return true; + } + + /** +@@ -162,15 +171,21 @@ void batadv_v_elp_throughput_metric_upda + { + struct batadv_hardif_neigh_node_bat_v *neigh_bat_v; + struct batadv_hardif_neigh_node *neigh; ++ u32 throughput; ++ bool valid; + + neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v, + metric_work); + neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node, + bat_v); + +- ewma_throughput_add(&neigh->bat_v.throughput, +- batadv_v_elp_get_throughput(neigh)); ++ valid = batadv_v_elp_get_throughput(neigh, &throughput); ++ if (!valid) ++ goto put_neigh; ++ ++ ewma_throughput_add(&neigh->bat_v.throughput, throughput); + ++put_neigh: + /* decrement refcounter to balance increment performed before scheduling + * this task + */ diff --git a/queue-5.4/series b/queue-5.4/series index d404ededea..637f42eb55 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -245,3 +245,5 @@ tee-optee-fix-supplicant-wait-loop.patch nfp-bpf-add-check-for-nfp_app_ctrl_msg_alloc.patch alsa-hda-conexant-add-quirk-for-hp-probook-450-g4-mute-led.patch acct-block-access-to-kernel-internal-filesystems.patch +batman-adv-ignore-neighbor-throughput-metrics-in-error-case.patch +batman-adv-drop-unmanaged-elp-metric-worker.patch