From: Greg Kroah-Hartman Date: Fri, 20 Oct 2023 20:53:35 +0000 (+0200) Subject: 6.1-stable patches X-Git-Tag: v4.14.328~77 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=94b0ee7a9cad4151c4928e7901299e94ad99672e;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: bonding-return-pointer-to-data-after-pull-on-skb.patch i40e-prevent-crash-on-probe-if-hw-registers-have-invalid-values.patch ipv4-fib-annotate-races-around-nh-nh_saddr_genid-and-nh-nh_saddr.patch neighbor-tracing-move-pin6-inside-config_ipv6-y-section.patch net-avoid-uaf-on-deleted-altname.patch net-check-for-altname-conflicts-when-changing-netdev-s-netns.patch net-dsa-bcm_sf2-fix-possible-memory-leak-in-bcm_sf2_mdio_register.patch net-fix-ifname-in-netlink-ntf-during-netns-move.patch net-ipv4-fix-return-value-check-in-esp_remove_trailer.patch net-ipv6-fix-return-value-check-in-esp_remove_trailer.patch net-phy-bcm7xxx-add-missing-16nm-ephy-statistics.patch net-pktgen-fix-interface-flags-printing.patch net-rfkill-gpio-prevent-value-glitch-during-probe.patch net-sched-sch_hfsc-upgrade-rt-to-sc-when-it-becomes-a-inner-curve.patch net-usb-smsc95xx-fix-an-error-code-in-smsc95xx_reset.patch netfilter-nf_tables-do-not-remove-elements-if-set-backend-implements-.abort.patch netfilter-nf_tables-revert-do-not-remove-elements-if-set-backend-implements-.abort.patch netfilter-nft_set_rbtree-.deactivate-fails-if-element-has-expired.patch netlink-correct-offload_xstats-size.patch octeon_ep-update-bql-sent-bytes-before-ringing-doorbell.patch selftests-netfilter-run-nft_audit.sh-in-its-own-netns.patch selftests-openvswitch-catch-cases-where-the-tests-are-killed.patch tcp-fix-excessive-tlp-and-rack-timeouts-from-hz-rounding.patch tcp-fix-listen-warning-with-v4-mapped-v6-address.patch tcp-tsq-relax-tcp_small_queue_check-when-rtx-queue-contains-a-single-skb.patch tun-prevent-negative-ifindex.patch wifi-cfg80211-use-system_unbound_wq-for-wiphy-work.patch xfrm-fix-a-data-race-in-xfrm_gen_index.patch xfrm-fix-a-data-race-in-xfrm_lookup_with_ifid.patch xfrm-interface-use-dev_stats_inc.patch --- diff --git a/queue-6.1/bonding-return-pointer-to-data-after-pull-on-skb.patch b/queue-6.1/bonding-return-pointer-to-data-after-pull-on-skb.patch new file mode 100644 index 00000000000..45835c373a6 --- /dev/null +++ b/queue-6.1/bonding-return-pointer-to-data-after-pull-on-skb.patch @@ -0,0 +1,37 @@ +From d93f3f992780af4a21e6c1ab86946b7c5602f1b9 Mon Sep 17 00:00:00 2001 +From: Jiri Wiesner +Date: Tue, 10 Oct 2023 18:39:33 +0200 +Subject: bonding: Return pointer to data after pull on skb + +From: Jiri Wiesner + +commit d93f3f992780af4a21e6c1ab86946b7c5602f1b9 upstream. + +Since 429e3d123d9a ("bonding: Fix extraction of ports from the packet +headers"), header offsets used to compute a hash in bond_xmit_hash() are +relative to skb->data and not skb->head. If the tail of the header buffer +of an skb really needs to be advanced and the operation is successful, the +pointer to the data must be returned (and not a pointer to the head of the +buffer). + +Fixes: 429e3d123d9a ("bonding: Fix extraction of ports from the packet headers") +Signed-off-by: Jiri Wiesner +Acked-by: Jay Vosburgh +Reviewed-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -3990,7 +3990,7 @@ static inline const void *bond_pull_data + if (likely(n <= hlen)) + return data; + else if (skb && likely(pskb_may_pull(skb, n))) +- return skb->head; ++ return skb->data; + + return NULL; + } diff --git a/queue-6.1/i40e-prevent-crash-on-probe-if-hw-registers-have-invalid-values.patch b/queue-6.1/i40e-prevent-crash-on-probe-if-hw-registers-have-invalid-values.patch new file mode 100644 index 00000000000..18094ad15ce --- /dev/null +++ b/queue-6.1/i40e-prevent-crash-on-probe-if-hw-registers-have-invalid-values.patch @@ -0,0 +1,57 @@ +From fc6f716a5069180c40a8c9b63631e97da34f64a3 Mon Sep 17 00:00:00 2001 +From: Michal Schmidt +Date: Wed, 11 Oct 2023 16:33:32 -0700 +Subject: i40e: prevent crash on probe if hw registers have invalid values + +From: Michal Schmidt + +commit fc6f716a5069180c40a8c9b63631e97da34f64a3 upstream. + +The hardware provides the indexes of the first and the last available +queue and VF. From the indexes, the driver calculates the numbers of +queues and VFs. In theory, a faulty device might say the last index is +smaller than the first index. In that case, the driver's calculation +would underflow, it would attempt to write to non-existent registers +outside of the ioremapped range and crash. + +I ran into this not by having a faulty device, but by an operator error. +I accidentally ran a QE test meant for i40e devices on an ice device. +The test used 'echo i40e > /sys/...ice PCI device.../driver_override', +bound the driver to the device and crashed in one of the wr32 calls in +i40e_clear_hw. + +Add checks to prevent underflows in the calculations of num_queues and +num_vfs. With this fix, the wrong device probing reports errors and +returns a failure without crashing. + +Fixes: 838d41d92a90 ("i40e: clear all queues and interrupts") +Signed-off-by: Michal Schmidt +Reviewed-by: Simon Horman +Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) +Link: https://lore.kernel.org/r/20231011233334.336092-2-jacob.e.keller@intel.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/i40e/i40e_common.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/intel/i40e/i40e_common.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_common.c +@@ -1082,7 +1082,7 @@ void i40e_clear_hw(struct i40e_hw *hw) + I40E_PFLAN_QALLOC_FIRSTQ_SHIFT; + j = (val & I40E_PFLAN_QALLOC_LASTQ_MASK) >> + I40E_PFLAN_QALLOC_LASTQ_SHIFT; +- if (val & I40E_PFLAN_QALLOC_VALID_MASK) ++ if (val & I40E_PFLAN_QALLOC_VALID_MASK && j >= base_queue) + num_queues = (j - base_queue) + 1; + else + num_queues = 0; +@@ -1092,7 +1092,7 @@ void i40e_clear_hw(struct i40e_hw *hw) + I40E_PF_VT_PFALLOC_FIRSTVF_SHIFT; + j = (val & I40E_PF_VT_PFALLOC_LASTVF_MASK) >> + I40E_PF_VT_PFALLOC_LASTVF_SHIFT; +- if (val & I40E_PF_VT_PFALLOC_VALID_MASK) ++ if (val & I40E_PF_VT_PFALLOC_VALID_MASK && j >= i) + num_vfs = (j - i) + 1; + else + num_vfs = 0; diff --git a/queue-6.1/ipv4-fib-annotate-races-around-nh-nh_saddr_genid-and-nh-nh_saddr.patch b/queue-6.1/ipv4-fib-annotate-races-around-nh-nh_saddr_genid-and-nh-nh_saddr.patch new file mode 100644 index 00000000000..dbc539e1af7 --- /dev/null +++ b/queue-6.1/ipv4-fib-annotate-races-around-nh-nh_saddr_genid-and-nh-nh_saddr.patch @@ -0,0 +1,110 @@ +From 195374d893681da43a39796e53b30ac4f20400c4 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 17 Oct 2023 19:23:04 +0000 +Subject: ipv4: fib: annotate races around nh->nh_saddr_genid and nh->nh_saddr + +From: Eric Dumazet + +commit 195374d893681da43a39796e53b30ac4f20400c4 upstream. + +syzbot reported a data-race while accessing nh->nh_saddr_genid [1] + +Add annotations, but leave the code lazy as intended. + +[1] +BUG: KCSAN: data-race in fib_select_path / fib_select_path + +write to 0xffff8881387166f0 of 4 bytes by task 6778 on cpu 1: +fib_info_update_nhc_saddr net/ipv4/fib_semantics.c:1334 [inline] +fib_result_prefsrc net/ipv4/fib_semantics.c:1354 [inline] +fib_select_path+0x292/0x330 net/ipv4/fib_semantics.c:2269 +ip_route_output_key_hash_rcu+0x659/0x12c0 net/ipv4/route.c:2810 +ip_route_output_key_hash net/ipv4/route.c:2644 [inline] +__ip_route_output_key include/net/route.h:134 [inline] +ip_route_output_flow+0xa6/0x150 net/ipv4/route.c:2872 +send4+0x1f5/0x520 drivers/net/wireguard/socket.c:61 +wg_socket_send_skb_to_peer+0x94/0x130 drivers/net/wireguard/socket.c:175 +wg_socket_send_buffer_to_peer+0xd6/0x100 drivers/net/wireguard/socket.c:200 +wg_packet_send_handshake_initiation drivers/net/wireguard/send.c:40 [inline] +wg_packet_handshake_send_worker+0x10c/0x150 drivers/net/wireguard/send.c:51 +process_one_work kernel/workqueue.c:2630 [inline] +process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703 +worker_thread+0x525/0x730 kernel/workqueue.c:2784 +kthread+0x1d7/0x210 kernel/kthread.c:388 +ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147 +ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 + +read to 0xffff8881387166f0 of 4 bytes by task 6759 on cpu 0: +fib_result_prefsrc net/ipv4/fib_semantics.c:1350 [inline] +fib_select_path+0x1cb/0x330 net/ipv4/fib_semantics.c:2269 +ip_route_output_key_hash_rcu+0x659/0x12c0 net/ipv4/route.c:2810 +ip_route_output_key_hash net/ipv4/route.c:2644 [inline] +__ip_route_output_key include/net/route.h:134 [inline] +ip_route_output_flow+0xa6/0x150 net/ipv4/route.c:2872 +send4+0x1f5/0x520 drivers/net/wireguard/socket.c:61 +wg_socket_send_skb_to_peer+0x94/0x130 drivers/net/wireguard/socket.c:175 +wg_socket_send_buffer_to_peer+0xd6/0x100 drivers/net/wireguard/socket.c:200 +wg_packet_send_handshake_initiation drivers/net/wireguard/send.c:40 [inline] +wg_packet_handshake_send_worker+0x10c/0x150 drivers/net/wireguard/send.c:51 +process_one_work kernel/workqueue.c:2630 [inline] +process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703 +worker_thread+0x525/0x730 kernel/workqueue.c:2784 +kthread+0x1d7/0x210 kernel/kthread.c:388 +ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147 +ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 + +value changed: 0x959d3217 -> 0x959d3218 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 0 PID: 6759 Comm: kworker/u4:15 Not tainted 6.6.0-rc4-syzkaller-00029-gcbf3a2cb156a #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/06/2023 +Workqueue: wg-kex-wg1 wg_packet_handshake_send_worker + +Fixes: 436c3b66ec98 ("ipv4: Invalidate nexthop cache nh_saddr more correctly.") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Reviewed-by: Simon Horman +Reviewed-by: David Ahern +Link: https://lore.kernel.org/r/20231017192304.82626-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_semantics.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -1325,15 +1325,18 @@ __be32 fib_info_update_nhc_saddr(struct + unsigned char scope) + { + struct fib_nh *nh; ++ __be32 saddr; + + if (nhc->nhc_family != AF_INET) + return inet_select_addr(nhc->nhc_dev, 0, scope); + + nh = container_of(nhc, struct fib_nh, nh_common); +- nh->nh_saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope); +- nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid); ++ saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope); + +- return nh->nh_saddr; ++ WRITE_ONCE(nh->nh_saddr, saddr); ++ WRITE_ONCE(nh->nh_saddr_genid, atomic_read(&net->ipv4.dev_addr_genid)); ++ ++ return saddr; + } + + __be32 fib_result_prefsrc(struct net *net, struct fib_result *res) +@@ -1347,8 +1350,9 @@ __be32 fib_result_prefsrc(struct net *ne + struct fib_nh *nh; + + nh = container_of(nhc, struct fib_nh, nh_common); +- if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid)) +- return nh->nh_saddr; ++ if (READ_ONCE(nh->nh_saddr_genid) == ++ atomic_read(&net->ipv4.dev_addr_genid)) ++ return READ_ONCE(nh->nh_saddr); + } + + return fib_info_update_nhc_saddr(net, nhc, res->fi->fib_scope); diff --git a/queue-6.1/neighbor-tracing-move-pin6-inside-config_ipv6-y-section.patch b/queue-6.1/neighbor-tracing-move-pin6-inside-config_ipv6-y-section.patch new file mode 100644 index 00000000000..da07a78531c --- /dev/null +++ b/queue-6.1/neighbor-tracing-move-pin6-inside-config_ipv6-y-section.patch @@ -0,0 +1,99 @@ +From 2915240eddba96b37de4c7e9a3d0ac6f9548454b Mon Sep 17 00:00:00 2001 +From: Geert Uytterhoeven +Date: Mon, 16 Oct 2023 14:49:04 +0200 +Subject: neighbor: tracing: Move pin6 inside CONFIG_IPV6=y section +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Geert Uytterhoeven + +commit 2915240eddba96b37de4c7e9a3d0ac6f9548454b upstream. + +When CONFIG_IPV6=n, and building with W=1: + + In file included from include/trace/define_trace.h:102, + from include/trace/events/neigh.h:255, + from net/core/net-traces.c:51: + include/trace/events/neigh.h: In function ‘trace_event_raw_event_neigh_create’: + include/trace/events/neigh.h:42:34: error: variable ‘pin6’ set but not used [-Werror=unused-but-set-variable] + 42 | struct in6_addr *pin6; + | ^~~~ + include/trace/trace_events.h:402:11: note: in definition of macro ‘DECLARE_EVENT_CLASS’ + 402 | { assign; } \ + | ^~~~~~ + include/trace/trace_events.h:44:30: note: in expansion of macro ‘PARAMS’ + 44 | PARAMS(assign), \ + | ^~~~~~ + include/trace/events/neigh.h:23:1: note: in expansion of macro ‘TRACE_EVENT’ + 23 | TRACE_EVENT(neigh_create, + | ^~~~~~~~~~~ + include/trace/events/neigh.h:41:9: note: in expansion of macro ‘TP_fast_assign’ + 41 | TP_fast_assign( + | ^~~~~~~~~~~~~~ + In file included from include/trace/define_trace.h:103, + from include/trace/events/neigh.h:255, + from net/core/net-traces.c:51: + include/trace/events/neigh.h: In function ‘perf_trace_neigh_create’: + include/trace/events/neigh.h:42:34: error: variable ‘pin6’ set but not used [-Werror=unused-but-set-variable] + 42 | struct in6_addr *pin6; + | ^~~~ + include/trace/perf.h:51:11: note: in definition of macro ‘DECLARE_EVENT_CLASS’ + 51 | { assign; } \ + | ^~~~~~ + include/trace/trace_events.h:44:30: note: in expansion of macro ‘PARAMS’ + 44 | PARAMS(assign), \ + | ^~~~~~ + include/trace/events/neigh.h:23:1: note: in expansion of macro ‘TRACE_EVENT’ + 23 | TRACE_EVENT(neigh_create, + | ^~~~~~~~~~~ + include/trace/events/neigh.h:41:9: note: in expansion of macro ‘TP_fast_assign’ + 41 | TP_fast_assign( + | ^~~~~~~~~~~~~~ + +Indeed, the variable pin6 is declared and initialized unconditionally, +while it is only used and needlessly re-initialized when support for +IPv6 is enabled. + +Fix this by dropping the unused variable initialization, and moving the +variable declaration inside the existing section protected by a check +for CONFIG_IPV6. + +Fixes: fc651001d2c5ca4f ("neighbor: Add tracepoint to __neigh_create") +Signed-off-by: Geert Uytterhoeven +Reviewed-by: Simon Horman +Tested-by: Simon Horman # build-tested +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/trace/events/neigh.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/include/trace/events/neigh.h ++++ b/include/trace/events/neigh.h +@@ -39,7 +39,6 @@ TRACE_EVENT(neigh_create, + ), + + TP_fast_assign( +- struct in6_addr *pin6; + __be32 *p32; + + __entry->family = tbl->family; +@@ -47,7 +46,6 @@ TRACE_EVENT(neigh_create, + __entry->entries = atomic_read(&tbl->gc_entries); + __entry->created = n != NULL; + __entry->gc_exempt = exempt_from_gc; +- pin6 = (struct in6_addr *)__entry->primary_key6; + p32 = (__be32 *)__entry->primary_key4; + + if (tbl->family == AF_INET) +@@ -57,6 +55,8 @@ TRACE_EVENT(neigh_create, + + #if IS_ENABLED(CONFIG_IPV6) + if (tbl->family == AF_INET6) { ++ struct in6_addr *pin6; ++ + pin6 = (struct in6_addr *)__entry->primary_key6; + *pin6 = *(struct in6_addr *)pkey; + } diff --git a/queue-6.1/net-avoid-uaf-on-deleted-altname.patch b/queue-6.1/net-avoid-uaf-on-deleted-altname.patch new file mode 100644 index 00000000000..56eaff99b96 --- /dev/null +++ b/queue-6.1/net-avoid-uaf-on-deleted-altname.patch @@ -0,0 +1,65 @@ +From 1a83f4a7c156fa6bbd6b530e89fa3270bf3d9d1b Mon Sep 17 00:00:00 2001 +From: Jakub Kicinski +Date: Tue, 17 Oct 2023 18:38:15 -0700 +Subject: net: avoid UAF on deleted altname + +From: Jakub Kicinski + +commit 1a83f4a7c156fa6bbd6b530e89fa3270bf3d9d1b upstream. + +Altnames are accessed under RCU (dev_get_by_name_rcu()) +but freed by kfree() with no synchronization point. + +Each node has one or two allocations (node and a variable-size +name, sometimes the name is netdev->name). Adding rcu_heads +here is a bit tedious. Besides most code which unlists the names +already has rcu barriers - so take the simpler approach of adding +synchronize_rcu(). Note that the one on the unregistration path +(which matters more) is removed by the next fix. + +Fixes: ff92741270bf ("net: introduce name_node struct to be used in hashlist") +Reviewed-by: Jiri Pirko +Signed-off-by: Jakub Kicinski +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -345,7 +345,6 @@ int netdev_name_node_alt_create(struct n + static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node) + { + list_del(&name_node->list); +- netdev_name_node_del(name_node); + kfree(name_node->name); + netdev_name_node_free(name_node); + } +@@ -364,6 +363,8 @@ int netdev_name_node_alt_destroy(struct + if (name_node == dev->name_node || name_node->dev != dev) + return -EINVAL; + ++ netdev_name_node_del(name_node); ++ synchronize_rcu(); + __netdev_name_node_alt_destroy(name_node); + + return 0; +@@ -10835,6 +10836,7 @@ void unregister_netdevice_many(struct li + synchronize_net(); + + list_for_each_entry(dev, head, unreg_list) { ++ struct netdev_name_node *name_node; + struct sk_buff *skb = NULL; + + /* Shutdown queueing discipline. */ +@@ -10860,6 +10862,9 @@ void unregister_netdevice_many(struct li + dev_uc_flush(dev); + dev_mc_flush(dev); + ++ netdev_for_each_altname(dev, name_node) ++ netdev_name_node_del(name_node); ++ synchronize_rcu(); + netdev_name_node_alt_flush(dev); + netdev_name_node_free(dev->name_node); + diff --git a/queue-6.1/net-check-for-altname-conflicts-when-changing-netdev-s-netns.patch b/queue-6.1/net-check-for-altname-conflicts-when-changing-netdev-s-netns.patch new file mode 100644 index 00000000000..3f40defab2f --- /dev/null +++ b/queue-6.1/net-check-for-altname-conflicts-when-changing-netdev-s-netns.patch @@ -0,0 +1,87 @@ +From 7663d522099ecc464512164e660bc771b2ff7b64 Mon Sep 17 00:00:00 2001 +From: Jakub Kicinski +Date: Tue, 17 Oct 2023 18:38:14 -0700 +Subject: net: check for altname conflicts when changing netdev's netns + +From: Jakub Kicinski + +commit 7663d522099ecc464512164e660bc771b2ff7b64 upstream. + +It's currently possible to create an altname conflicting +with an altname or real name of another device by creating +it in another netns and moving it over: + + [ ~]$ ip link add dev eth0 type dummy + + [ ~]$ ip netns add test + [ ~]$ ip -netns test link add dev ethX netns test type dummy + [ ~]$ ip -netns test link property add dev ethX altname eth0 + [ ~]$ ip -netns test link set dev ethX netns 1 + + [ ~]$ ip link + ... + 3: eth0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 + link/ether 02:40:88:62:ec:b8 brd ff:ff:ff:ff:ff:ff + ... + 5: ethX: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 + link/ether 26:b7:28:78:38:0f brd ff:ff:ff:ff:ff:ff + altname eth0 + +Create a macro for walking the altnames, this hopefully makes +it clearer that the list we walk contains only altnames. +Which is otherwise not entirely intuitive. + +Fixes: 36fbf1e52bd3 ("net: rtnetlink: add linkprop commands to add and delete alternative ifnames") +Reviewed-by: Jiri Pirko +Signed-off-by: Jakub Kicinski +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 9 ++++++++- + net/core/dev.h | 3 +++ + 2 files changed, 11 insertions(+), 1 deletion(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -1054,7 +1054,8 @@ static int __dev_alloc_name(struct net * + + for_each_netdev(net, d) { + struct netdev_name_node *name_node; +- list_for_each_entry(name_node, &d->name_node->list, list) { ++ ++ netdev_for_each_altname(d, name_node) { + if (!sscanf(name_node->name, name, &i)) + continue; + if (i < 0 || i >= max_netdevices) +@@ -10949,6 +10950,7 @@ EXPORT_SYMBOL(unregister_netdev); + int __dev_change_net_namespace(struct net_device *dev, struct net *net, + const char *pat, int new_ifindex) + { ++ struct netdev_name_node *name_node; + struct net *net_old = dev_net(dev); + char new_name[IFNAMSIZ] = {}; + int err, new_nsid; +@@ -10981,6 +10983,11 @@ int __dev_change_net_namespace(struct ne + if (err < 0) + goto out; + } ++ /* Check that none of the altnames conflicts. */ ++ err = -EEXIST; ++ netdev_for_each_altname(dev, name_node) ++ if (netdev_name_in_use(net, name_node->name)) ++ goto out; + + /* Check that new_ifindex isn't used yet. */ + err = -EBUSY; +--- a/net/core/dev.h ++++ b/net/core/dev.h +@@ -61,6 +61,9 @@ struct netdev_name_node { + int netdev_get_name(struct net *net, char *name, int ifindex); + int dev_change_name(struct net_device *dev, const char *newname); + ++#define netdev_for_each_altname(dev, namenode) \ ++ list_for_each_entry((namenode), &(dev)->name_node->list, list) ++ + int netdev_name_node_alt_create(struct net_device *dev, const char *name); + int netdev_name_node_alt_destroy(struct net_device *dev, const char *name); + diff --git a/queue-6.1/net-dsa-bcm_sf2-fix-possible-memory-leak-in-bcm_sf2_mdio_register.patch b/queue-6.1/net-dsa-bcm_sf2-fix-possible-memory-leak-in-bcm_sf2_mdio_register.patch new file mode 100644 index 00000000000..8d4f71d95cb --- /dev/null +++ b/queue-6.1/net-dsa-bcm_sf2-fix-possible-memory-leak-in-bcm_sf2_mdio_register.patch @@ -0,0 +1,91 @@ +From 61b40cefe51af005c72dbdcf975a3d166c6e6406 Mon Sep 17 00:00:00 2001 +From: Jinjie Ruan +Date: Wed, 11 Oct 2023 11:24:19 +0800 +Subject: net: dsa: bcm_sf2: Fix possible memory leak in bcm_sf2_mdio_register() + +From: Jinjie Ruan + +commit 61b40cefe51af005c72dbdcf975a3d166c6e6406 upstream. + +In bcm_sf2_mdio_register(), the class_find_device() will call get_device() +to increment reference count for priv->master_mii_bus->dev if +of_mdio_find_bus() succeeds. If mdiobus_alloc() or mdiobus_register() +fails, it will call get_device() twice without decrement reference count +for the device. And it is the same if bcm_sf2_mdio_register() succeeds but +fails in bcm_sf2_sw_probe(), or if bcm_sf2_sw_probe() succeeds. If the +reference count has not decremented to zero, the dev related resource will +not be freed. + +So remove the get_device() in bcm_sf2_mdio_register(), and call +put_device() if mdiobus_alloc() or mdiobus_register() fails and in +bcm_sf2_mdio_unregister() to solve the issue. + +And as Simon suggested, unwind from errors for bcm_sf2_mdio_register() and +just return 0 if it succeeds to make it cleaner. + +Fixes: 461cd1b03e32 ("net: dsa: bcm_sf2: Register our slave MDIO bus") +Signed-off-by: Jinjie Ruan +Suggested-by: Simon Horman +Reviewed-by: Simon Horman +Reviewed-by: Florian Fainelli +Link: https://lore.kernel.org/r/20231011032419.2423290-1-ruanjinjie@huawei.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dsa/bcm_sf2.c | 24 +++++++++++++++--------- + 1 file changed, 15 insertions(+), 9 deletions(-) + +--- a/drivers/net/dsa/bcm_sf2.c ++++ b/drivers/net/dsa/bcm_sf2.c +@@ -617,17 +617,16 @@ static int bcm_sf2_mdio_register(struct + dn = of_find_compatible_node(NULL, NULL, "brcm,unimac-mdio"); + priv->master_mii_bus = of_mdio_find_bus(dn); + if (!priv->master_mii_bus) { +- of_node_put(dn); +- return -EPROBE_DEFER; ++ err = -EPROBE_DEFER; ++ goto err_of_node_put; + } + +- get_device(&priv->master_mii_bus->dev); + priv->master_mii_dn = dn; + + priv->slave_mii_bus = mdiobus_alloc(); + if (!priv->slave_mii_bus) { +- of_node_put(dn); +- return -ENOMEM; ++ err = -ENOMEM; ++ goto err_put_master_mii_bus_dev; + } + + priv->slave_mii_bus->priv = priv; +@@ -684,11 +683,17 @@ static int bcm_sf2_mdio_register(struct + } + + err = mdiobus_register(priv->slave_mii_bus); +- if (err && dn) { +- mdiobus_free(priv->slave_mii_bus); +- of_node_put(dn); +- } ++ if (err && dn) ++ goto err_free_slave_mii_bus; ++ ++ return 0; + ++err_free_slave_mii_bus: ++ mdiobus_free(priv->slave_mii_bus); ++err_put_master_mii_bus_dev: ++ put_device(&priv->master_mii_bus->dev); ++err_of_node_put: ++ of_node_put(dn); + return err; + } + +@@ -696,6 +701,7 @@ static void bcm_sf2_mdio_unregister(stru + { + mdiobus_unregister(priv->slave_mii_bus); + mdiobus_free(priv->slave_mii_bus); ++ put_device(&priv->master_mii_bus->dev); + of_node_put(priv->master_mii_dn); + } + diff --git a/queue-6.1/net-fix-ifname-in-netlink-ntf-during-netns-move.patch b/queue-6.1/net-fix-ifname-in-netlink-ntf-during-netns-move.patch new file mode 100644 index 00000000000..e85b00305ec --- /dev/null +++ b/queue-6.1/net-fix-ifname-in-netlink-ntf-during-netns-move.patch @@ -0,0 +1,124 @@ +From 311cca40661f428b7aa114fb5af578cfdbe3e8b6 Mon Sep 17 00:00:00 2001 +From: Jakub Kicinski +Date: Tue, 17 Oct 2023 18:38:13 -0700 +Subject: net: fix ifname in netlink ntf during netns move + +From: Jakub Kicinski + +commit 311cca40661f428b7aa114fb5af578cfdbe3e8b6 upstream. + +dev_get_valid_name() overwrites the netdev's name on success. +This makes it hard to use in prepare-commit-like fashion, +where we do validation first, and "commit" to the change +later. + +Factor out a helper which lets us save the new name to a buffer. +Use it to fix the problem of notification on netns move having +incorrect name: + + 5: eth0: mtu 1500 qdisc noop state DOWN group default + link/ether be:4d:58:f9:d5:40 brd ff:ff:ff:ff:ff:ff + 6: eth1: mtu 1500 qdisc noop state DOWN group default + link/ether 1e:4a:34:36:e3:cd brd ff:ff:ff:ff:ff:ff + + [ ~]# ip link set dev eth0 netns 1 name eth1 + +ip monitor inside netns: + Deleted inet eth0 + Deleted inet6 eth0 + Deleted 5: eth1: mtu 1500 qdisc noop state DOWN group default + link/ether be:4d:58:f9:d5:40 brd ff:ff:ff:ff:ff:ff new-netnsid 0 new-ifindex 7 + +Name is reported as eth1 in old netns for ifindex 5, already renamed. + +Fixes: d90310243fd7 ("net: device name allocation cleanups") +Signed-off-by: Jakub Kicinski +Reviewed-by: Jiri Pirko +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 44 +++++++++++++++++++++++++++++++------------- + 1 file changed, 31 insertions(+), 13 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -1091,6 +1091,26 @@ static int __dev_alloc_name(struct net * + return -ENFILE; + } + ++static int dev_prep_valid_name(struct net *net, struct net_device *dev, ++ const char *want_name, char *out_name) ++{ ++ int ret; ++ ++ if (!dev_valid_name(want_name)) ++ return -EINVAL; ++ ++ if (strchr(want_name, '%')) { ++ ret = __dev_alloc_name(net, want_name, out_name); ++ return ret < 0 ? ret : 0; ++ } else if (netdev_name_in_use(net, want_name)) { ++ return -EEXIST; ++ } else if (out_name != want_name) { ++ strscpy(out_name, want_name, IFNAMSIZ); ++ } ++ ++ return 0; ++} ++ + static int dev_alloc_name_ns(struct net *net, + struct net_device *dev, + const char *name) +@@ -1128,19 +1148,13 @@ EXPORT_SYMBOL(dev_alloc_name); + static int dev_get_valid_name(struct net *net, struct net_device *dev, + const char *name) + { +- BUG_ON(!net); +- +- if (!dev_valid_name(name)) +- return -EINVAL; +- +- if (strchr(name, '%')) +- return dev_alloc_name_ns(net, dev, name); +- else if (netdev_name_in_use(net, name)) +- return -EEXIST; +- else if (dev->name != name) +- strscpy(dev->name, name, IFNAMSIZ); ++ char buf[IFNAMSIZ]; ++ int ret; + +- return 0; ++ ret = dev_prep_valid_name(net, dev, name, buf); ++ if (ret >= 0) ++ strscpy(dev->name, buf, IFNAMSIZ); ++ return ret; + } + + /** +@@ -10936,6 +10950,7 @@ int __dev_change_net_namespace(struct ne + const char *pat, int new_ifindex) + { + struct net *net_old = dev_net(dev); ++ char new_name[IFNAMSIZ] = {}; + int err, new_nsid; + + ASSERT_RTNL(); +@@ -10962,7 +10977,7 @@ int __dev_change_net_namespace(struct ne + /* We get here if we can't use the current device name */ + if (!pat) + goto out; +- err = dev_get_valid_name(net, dev, pat); ++ err = dev_prep_valid_name(net, dev, pat, new_name); + if (err < 0) + goto out; + } +@@ -11030,6 +11045,9 @@ int __dev_change_net_namespace(struct ne + kobject_uevent(&dev->dev.kobj, KOBJ_ADD); + netdev_adjacent_add_links(dev); + ++ if (new_name[0]) /* Rename the netdev to prepared name */ ++ strscpy(dev->name, new_name, IFNAMSIZ); ++ + /* Fixup kobjects */ + err = device_rename(&dev->dev, dev->name); + WARN_ON(err); diff --git a/queue-6.1/net-ipv4-fix-return-value-check-in-esp_remove_trailer.patch b/queue-6.1/net-ipv4-fix-return-value-check-in-esp_remove_trailer.patch new file mode 100644 index 00000000000..3d328d37f20 --- /dev/null +++ b/queue-6.1/net-ipv4-fix-return-value-check-in-esp_remove_trailer.patch @@ -0,0 +1,32 @@ +From 513f61e2193350c7a345da98559b80f61aec4fa6 Mon Sep 17 00:00:00 2001 +From: Ma Ke +Date: Mon, 9 Oct 2023 09:13:37 +0800 +Subject: net: ipv4: fix return value check in esp_remove_trailer + +From: Ma Ke + +commit 513f61e2193350c7a345da98559b80f61aec4fa6 upstream. + +In esp_remove_trailer(), to avoid an unexpected result returned by +pskb_trim, we should check the return value of pskb_trim(). + +Signed-off-by: Ma Ke +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/esp4.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/ipv4/esp4.c ++++ b/net/ipv4/esp4.c +@@ -732,7 +732,9 @@ static inline int esp_remove_trailer(str + skb->csum = csum_block_sub(skb->csum, csumdiff, + skb->len - trimlen); + } +- pskb_trim(skb, skb->len - trimlen); ++ ret = pskb_trim(skb, skb->len - trimlen); ++ if (unlikely(ret)) ++ return ret; + + ret = nexthdr[1]; + diff --git a/queue-6.1/net-ipv6-fix-return-value-check-in-esp_remove_trailer.patch b/queue-6.1/net-ipv6-fix-return-value-check-in-esp_remove_trailer.patch new file mode 100644 index 00000000000..928f0fa7a5d --- /dev/null +++ b/queue-6.1/net-ipv6-fix-return-value-check-in-esp_remove_trailer.patch @@ -0,0 +1,32 @@ +From dad4e491e30b20f4dc615c9da65d2142d703b5c2 Mon Sep 17 00:00:00 2001 +From: Ma Ke +Date: Sat, 7 Oct 2023 08:59:53 +0800 +Subject: net: ipv6: fix return value check in esp_remove_trailer + +From: Ma Ke + +commit dad4e491e30b20f4dc615c9da65d2142d703b5c2 upstream. + +In esp_remove_trailer(), to avoid an unexpected result returned by +pskb_trim, we should check the return value of pskb_trim(). + +Signed-off-by: Ma Ke +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/esp6.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/ipv6/esp6.c ++++ b/net/ipv6/esp6.c +@@ -770,7 +770,9 @@ static inline int esp_remove_trailer(str + skb->csum = csum_block_sub(skb->csum, csumdiff, + skb->len - trimlen); + } +- pskb_trim(skb, skb->len - trimlen); ++ ret = pskb_trim(skb, skb->len - trimlen); ++ if (unlikely(ret)) ++ return ret; + + ret = nexthdr[1]; + diff --git a/queue-6.1/net-phy-bcm7xxx-add-missing-16nm-ephy-statistics.patch b/queue-6.1/net-phy-bcm7xxx-add-missing-16nm-ephy-statistics.patch new file mode 100644 index 00000000000..c585c7a72a0 --- /dev/null +++ b/queue-6.1/net-phy-bcm7xxx-add-missing-16nm-ephy-statistics.patch @@ -0,0 +1,37 @@ +From 6200e00e112ce2d17b066a20dd2476d9aecbefa6 Mon Sep 17 00:00:00 2001 +From: Florian Fainelli +Date: Tue, 17 Oct 2023 13:51:19 -0700 +Subject: net: phy: bcm7xxx: Add missing 16nm EPHY statistics + +From: Florian Fainelli + +commit 6200e00e112ce2d17b066a20dd2476d9aecbefa6 upstream. + +The .probe() function would allocate the necessary space and ensure that +the library call sizes the number of statistics but the callbacks +necessary to fetch the name and values were not wired up. + +Reported-by: Justin Chen +Fixes: f68d08c437f9 ("net: phy: bcm7xxx: Add EPHY entry for 72165") +Reviewed-by: Andrew Lunn +Signed-off-by: Florian Fainelli +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/20231017205119.416392-1-florian.fainelli@broadcom.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/bcm7xxx.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/phy/bcm7xxx.c ++++ b/drivers/net/phy/bcm7xxx.c +@@ -907,6 +907,9 @@ static void bcm7xxx_28nm_remove(struct p + .name = _name, \ + /* PHY_BASIC_FEATURES */ \ + .flags = PHY_IS_INTERNAL, \ ++ .get_sset_count = bcm_phy_get_sset_count, \ ++ .get_strings = bcm_phy_get_strings, \ ++ .get_stats = bcm7xxx_28nm_get_phy_stats, \ + .probe = bcm7xxx_28nm_probe, \ + .remove = bcm7xxx_28nm_remove, \ + .config_init = bcm7xxx_16nm_ephy_config_init, \ diff --git a/queue-6.1/net-pktgen-fix-interface-flags-printing.patch b/queue-6.1/net-pktgen-fix-interface-flags-printing.patch new file mode 100644 index 00000000000..1f3374785ea --- /dev/null +++ b/queue-6.1/net-pktgen-fix-interface-flags-printing.patch @@ -0,0 +1,60 @@ +From 1d30162f35c7a73fc2f8cdcdcdbd690bedb99d1a Mon Sep 17 00:00:00 2001 +From: Gavrilov Ilia +Date: Mon, 16 Oct 2023 14:08:59 +0000 +Subject: net: pktgen: Fix interface flags printing + +From: Gavrilov Ilia + +commit 1d30162f35c7a73fc2f8cdcdcdbd690bedb99d1a upstream. + +Device flags are displayed incorrectly: +1) The comparison (i == F_FLOW_SEQ) is always false, because F_FLOW_SEQ +is equal to (1 << FLOW_SEQ_SHIFT) == 2048, and the maximum value +of the 'i' variable is (NR_PKT_FLAG - 1) == 17. It should be compared +with FLOW_SEQ_SHIFT. + +2) Similarly to the F_IPSEC flag. + +3) Also add spaces to the print end of the string literal "spi:%u" +to prevent the output from merging with the flag that follows. + +Found by InfoTeCS on behalf of Linux Verification Center +(linuxtesting.org) with SVACE. + +Fixes: 99c6d3d20d62 ("pktgen: Remove brute-force printing of flags") +Signed-off-by: Gavrilov Ilia +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/pktgen.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/net/core/pktgen.c ++++ b/net/core/pktgen.c +@@ -669,19 +669,19 @@ static int pktgen_if_show(struct seq_fil + seq_puts(seq, " Flags: "); + + for (i = 0; i < NR_PKT_FLAGS; i++) { +- if (i == F_FLOW_SEQ) ++ if (i == FLOW_SEQ_SHIFT) + if (!pkt_dev->cflows) + continue; + +- if (pkt_dev->flags & (1 << i)) ++ if (pkt_dev->flags & (1 << i)) { + seq_printf(seq, "%s ", pkt_flag_names[i]); +- else if (i == F_FLOW_SEQ) +- seq_puts(seq, "FLOW_RND "); +- + #ifdef CONFIG_XFRM +- if (i == F_IPSEC && pkt_dev->spi) +- seq_printf(seq, "spi:%u", pkt_dev->spi); ++ if (i == IPSEC_SHIFT && pkt_dev->spi) ++ seq_printf(seq, "spi:%u ", pkt_dev->spi); + #endif ++ } else if (i == FLOW_SEQ_SHIFT) { ++ seq_puts(seq, "FLOW_RND "); ++ } + } + + seq_puts(seq, "\n"); diff --git a/queue-6.1/net-rfkill-gpio-prevent-value-glitch-during-probe.patch b/queue-6.1/net-rfkill-gpio-prevent-value-glitch-during-probe.patch new file mode 100644 index 00000000000..dd66d07f3d3 --- /dev/null +++ b/queue-6.1/net-rfkill-gpio-prevent-value-glitch-during-probe.patch @@ -0,0 +1,56 @@ +From b2f750c3a80b285cd60c9346f8c96bd0a2a66cde Mon Sep 17 00:00:00 2001 +From: Josua Mayer +Date: Wed, 4 Oct 2023 18:39:28 +0200 +Subject: net: rfkill: gpio: prevent value glitch during probe + +From: Josua Mayer + +commit b2f750c3a80b285cd60c9346f8c96bd0a2a66cde upstream. + +When either reset- or shutdown-gpio have are initially deasserted, +e.g. after a reboot - or when the hardware does not include pull-down, +there will be a short toggle of both IOs to logical 0 and back to 1. + +It seems that the rfkill default is unblocked, so the driver should not +glitch to output low during probe. +It can lead e.g. to unexpected lte modem reconnect: + +[1] root@localhost:~# dmesg | grep "usb 2-1" +[ 2.136124] usb 2-1: new SuperSpeed USB device number 2 using xhci-hcd +[ 21.215278] usb 2-1: USB disconnect, device number 2 +[ 28.833977] usb 2-1: new SuperSpeed USB device number 3 using xhci-hcd + +The glitch has been discovered on an arm64 board, now that device-tree +support for the rfkill-gpio driver has finally appeared :). + +Change the flags for devm_gpiod_get_optional from GPIOD_OUT_LOW to +GPIOD_ASIS to avoid any glitches. +The rfkill driver will set the intended value during rfkill_sync_work. + +Fixes: 7176ba23f8b5 ("net: rfkill: add generic gpio rfkill driver") +Signed-off-by: Josua Mayer +Link: https://lore.kernel.org/r/20231004163928.14609-1-josua@solid-run.com +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman +--- + net/rfkill/rfkill-gpio.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/rfkill/rfkill-gpio.c ++++ b/net/rfkill/rfkill-gpio.c +@@ -98,13 +98,13 @@ static int rfkill_gpio_probe(struct plat + + rfkill->clk = devm_clk_get(&pdev->dev, NULL); + +- gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_OUT_LOW); ++ gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_ASIS); + if (IS_ERR(gpio)) + return PTR_ERR(gpio); + + rfkill->reset_gpio = gpio; + +- gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_OUT_LOW); ++ gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_ASIS); + if (IS_ERR(gpio)) + return PTR_ERR(gpio); + diff --git a/queue-6.1/net-sched-sch_hfsc-upgrade-rt-to-sc-when-it-becomes-a-inner-curve.patch b/queue-6.1/net-sched-sch_hfsc-upgrade-rt-to-sc-when-it-becomes-a-inner-curve.patch new file mode 100644 index 00000000000..989a671d03e --- /dev/null +++ b/queue-6.1/net-sched-sch_hfsc-upgrade-rt-to-sc-when-it-becomes-a-inner-curve.patch @@ -0,0 +1,90 @@ +From a13b67c9a015c4e21601ef9aa4ec9c5d972df1b4 Mon Sep 17 00:00:00 2001 +From: Pedro Tammela +Date: Tue, 17 Oct 2023 11:36:02 -0300 +Subject: net/sched: sch_hfsc: upgrade 'rt' to 'sc' when it becomes a inner curve + +From: Pedro Tammela + +commit a13b67c9a015c4e21601ef9aa4ec9c5d972df1b4 upstream. + +Christian Theune says: + I upgraded from 6.1.38 to 6.1.55 this morning and it broke my traffic shaping script, + leaving me with a non-functional uplink on a remote router. + +A 'rt' curve cannot be used as a inner curve (parent class), but we were +allowing such configurations since the qdisc was introduced. Such +configurations would trigger a UAF as Budimir explains: + The parent will have vttree_insert() called on it in init_vf(), + but will not have vttree_remove() called on it in update_vf() + because it does not have the HFSC_FSC flag set. + +The qdisc always assumes that inner classes have the HFSC_FSC flag set. +This is by design as it doesn't make sense 'qdisc wise' for an 'rt' +curve to be an inner curve. + +Budimir's original patch disallows users to add classes with a 'rt' +parent, but this is too strict as it breaks users that have been using +'rt' as a inner class. Another approach, taken by this patch, is to +upgrade the inner 'rt' into a 'sc', warning the user in the process. +It avoids the UAF reported by Budimir while also being more permissive +to bad scripts/users/code using 'rt' as a inner class. + +Users checking the `tc class ls [...]` or `tc class get [...]` dumps would +observe the curve change and are potentially breaking with this change. + +v1->v2: https://lore.kernel.org/all/20231013151057.2611860-1-pctammela@mojatatu.com/ +- Correct 'Fixes' tag and merge with revert (Jakub) + +Cc: Christian Theune +Cc: Budimir Markovic +Fixes: b3d26c5702c7 ("net/sched: sch_hfsc: Ensure inner classes have fsc curve") +Signed-off-by: Pedro Tammela +Acked-by: Jamal Hadi Salim +Link: https://lore.kernel.org/r/20231017143602.3191556-1-pctammela@mojatatu.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_hfsc.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +--- a/net/sched/sch_hfsc.c ++++ b/net/sched/sch_hfsc.c +@@ -903,6 +903,14 @@ hfsc_change_usc(struct hfsc_class *cl, s + cl->cl_flags |= HFSC_USC; + } + ++static void ++hfsc_upgrade_rt(struct hfsc_class *cl) ++{ ++ cl->cl_fsc = cl->cl_rsc; ++ rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total); ++ cl->cl_flags |= HFSC_FSC; ++} ++ + static const struct nla_policy hfsc_policy[TCA_HFSC_MAX + 1] = { + [TCA_HFSC_RSC] = { .len = sizeof(struct tc_service_curve) }, + [TCA_HFSC_FSC] = { .len = sizeof(struct tc_service_curve) }, +@@ -1012,10 +1020,6 @@ hfsc_change_class(struct Qdisc *sch, u32 + if (parent == NULL) + return -ENOENT; + } +- if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) { +- NL_SET_ERR_MSG(extack, "Invalid parent - parent class must have FSC"); +- return -EINVAL; +- } + + if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0) + return -EINVAL; +@@ -1066,6 +1070,12 @@ hfsc_change_class(struct Qdisc *sch, u32 + cl->cf_tree = RB_ROOT; + + sch_tree_lock(sch); ++ /* Check if the inner class is a misconfigured 'rt' */ ++ if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) { ++ NL_SET_ERR_MSG(extack, ++ "Forced curve change on parent 'rt' to 'sc'"); ++ hfsc_upgrade_rt(parent); ++ } + qdisc_class_hash_insert(&q->clhash, &cl->cl_common); + list_add_tail(&cl->siblings, &parent->children); + if (parent->level == 0) diff --git a/queue-6.1/net-usb-smsc95xx-fix-an-error-code-in-smsc95xx_reset.patch b/queue-6.1/net-usb-smsc95xx-fix-an-error-code-in-smsc95xx_reset.patch new file mode 100644 index 00000000000..cd9a2fdbd43 --- /dev/null +++ b/queue-6.1/net-usb-smsc95xx-fix-an-error-code-in-smsc95xx_reset.patch @@ -0,0 +1,32 @@ +From c53647a5df9e66dd9fedf240198e1fe50d88c286 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Mon, 16 Oct 2023 20:28:10 +0300 +Subject: net: usb: smsc95xx: Fix an error code in smsc95xx_reset() + +From: Dan Carpenter + +commit c53647a5df9e66dd9fedf240198e1fe50d88c286 upstream. + +Return a negative error code instead of success. + +Fixes: 2f7ca802bdae ("net: Add SMSC LAN9500 USB2.0 10/100 ethernet adapter driver") +Signed-off-by: Dan Carpenter +Reviewed-by: Andrew Lunn +Link: https://lore.kernel.org/r/147927f0-9ada-45cc-81ff-75a19dd30b76@moroto.mountain +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/smsc95xx.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/usb/smsc95xx.c ++++ b/drivers/net/usb/smsc95xx.c +@@ -897,7 +897,7 @@ static int smsc95xx_reset(struct usbnet + + if (timeout >= 100) { + netdev_warn(dev->net, "timeout waiting for completion of Lite Reset\n"); +- return ret; ++ return -ETIMEDOUT; + } + + ret = smsc95xx_set_mac_address(dev); diff --git a/queue-6.1/netfilter-nf_tables-do-not-remove-elements-if-set-backend-implements-.abort.patch b/queue-6.1/netfilter-nf_tables-do-not-remove-elements-if-set-backend-implements-.abort.patch new file mode 100644 index 00000000000..50870d35b38 --- /dev/null +++ b/queue-6.1/netfilter-nf_tables-do-not-remove-elements-if-set-backend-implements-.abort.patch @@ -0,0 +1,36 @@ +From ebd032fa881882fef2acb9da1bbde48d8233241d Mon Sep 17 00:00:00 2001 +From: Pablo Neira Ayuso +Date: Wed, 4 Oct 2023 13:12:58 +0200 +Subject: netfilter: nf_tables: do not remove elements if set backend implements .abort + +From: Pablo Neira Ayuso + +commit ebd032fa881882fef2acb9da1bbde48d8233241d upstream. + +pipapo set backend maintains two copies of the datastructure, removing +the elements from the copy that is going to be discarded slows down +the abort path significantly, from several minutes to few seconds after +this patch. + +Fixes: 212ed75dc5fb ("netfilter: nf_tables: integrate pipapo into commit protocol") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Florian Westphal +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -9931,7 +9931,10 @@ static int __nf_tables_abort(struct net + break; + } + te = (struct nft_trans_elem *)trans->data; +- nft_setelem_remove(net, te->set, &te->elem); ++ if (!te->set->ops->abort || ++ nft_setelem_is_catchall(te->set, &te->elem)) ++ nft_setelem_remove(net, te->set, &te->elem); ++ + if (!nft_setelem_is_catchall(te->set, &te->elem)) + atomic_dec(&te->set->nelems); + diff --git a/queue-6.1/netfilter-nf_tables-revert-do-not-remove-elements-if-set-backend-implements-.abort.patch b/queue-6.1/netfilter-nf_tables-revert-do-not-remove-elements-if-set-backend-implements-.abort.patch new file mode 100644 index 00000000000..b5d12a1cc0b --- /dev/null +++ b/queue-6.1/netfilter-nf_tables-revert-do-not-remove-elements-if-set-backend-implements-.abort.patch @@ -0,0 +1,35 @@ +From f86fb94011aeb3b26337fc22204ca726aeb8bc24 Mon Sep 17 00:00:00 2001 +From: Pablo Neira Ayuso +Date: Wed, 18 Oct 2023 13:18:39 +0200 +Subject: netfilter: nf_tables: revert do not remove elements if set backend implements .abort + +From: Pablo Neira Ayuso + +commit f86fb94011aeb3b26337fc22204ca726aeb8bc24 upstream. + +nf_tables_abort_release() path calls nft_set_elem_destroy() for +NFT_MSG_NEWSETELEM which releases the element, however, a reference to +the element still remains in the working copy. + +Fixes: ebd032fa8818 ("netfilter: nf_tables: do not remove elements if set backend implements .abort") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Florian Westphal +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_tables_api.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -9931,10 +9931,7 @@ static int __nf_tables_abort(struct net + break; + } + te = (struct nft_trans_elem *)trans->data; +- if (!te->set->ops->abort || +- nft_setelem_is_catchall(te->set, &te->elem)) +- nft_setelem_remove(net, te->set, &te->elem); +- ++ nft_setelem_remove(net, te->set, &te->elem); + if (!nft_setelem_is_catchall(te->set, &te->elem)) + atomic_dec(&te->set->nelems); + diff --git a/queue-6.1/netfilter-nft_set_rbtree-.deactivate-fails-if-element-has-expired.patch b/queue-6.1/netfilter-nft_set_rbtree-.deactivate-fails-if-element-has-expired.patch new file mode 100644 index 00000000000..75727203a8f --- /dev/null +++ b/queue-6.1/netfilter-nft_set_rbtree-.deactivate-fails-if-element-has-expired.patch @@ -0,0 +1,34 @@ +From d111692a59c1470ae530cbb39bcf0346c950ecc7 Mon Sep 17 00:00:00 2001 +From: Pablo Neira Ayuso +Date: Tue, 17 Oct 2023 12:28:27 +0200 +Subject: netfilter: nft_set_rbtree: .deactivate fails if element has expired + +From: Pablo Neira Ayuso + +commit d111692a59c1470ae530cbb39bcf0346c950ecc7 upstream. + +This allows to remove an expired element which is not possible in other +existing set backends, this is more noticeable if gc-interval is high so +expired elements remain in the tree. On-demand gc also does not help in +this case, because this is delete element path. Return NULL if element +has expired. + +Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Florian Westphal +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nft_set_rbtree.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/netfilter/nft_set_rbtree.c ++++ b/net/netfilter/nft_set_rbtree.c +@@ -568,6 +568,8 @@ static void *nft_rbtree_deactivate(const + nft_rbtree_interval_end(this)) { + parent = parent->rb_right; + continue; ++ } else if (nft_set_elem_expired(&rbe->ext)) { ++ break; + } else if (!nft_set_elem_active(&rbe->ext, genmask)) { + parent = parent->rb_left; + continue; diff --git a/queue-6.1/netlink-correct-offload_xstats-size.patch b/queue-6.1/netlink-correct-offload_xstats-size.patch new file mode 100644 index 00000000000..846449eaab1 --- /dev/null +++ b/queue-6.1/netlink-correct-offload_xstats-size.patch @@ -0,0 +1,88 @@ +From 503930f8e113edc86f92b767efb4ea57bdffffb2 Mon Sep 17 00:00:00 2001 +From: Christoph Paasch +Date: Thu, 12 Oct 2023 21:14:48 -0700 +Subject: netlink: Correct offload_xstats size + +From: Christoph Paasch + +commit 503930f8e113edc86f92b767efb4ea57bdffffb2 upstream. + +rtnl_offload_xstats_get_size_hw_s_info_one() conditionalizes the +size-computation for IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED based on whether +or not the device has offload_xstats enabled. + +However, rtnl_offload_xstats_fill_hw_s_info_one() is adding the u8 for +that field uncondtionally. + +syzkaller triggered a WARNING in rtnl_stats_get due to this: +------------[ cut here ]------------ +WARNING: CPU: 0 PID: 754 at net/core/rtnetlink.c:5982 rtnl_stats_get+0x2f4/0x300 +Modules linked in: +CPU: 0 PID: 754 Comm: syz-executor148 Not tainted 6.6.0-rc2-g331b78eb12af #45 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 +RIP: 0010:rtnl_stats_get+0x2f4/0x300 net/core/rtnetlink.c:5982 +Code: ff ff 89 ee e8 7d 72 50 ff 83 fd a6 74 17 e8 33 6e 50 ff 4c 89 ef be 02 00 00 00 e8 86 00 fa ff e9 7b fe ff ff e8 1c 6e 50 ff <0f> 0b eb e5 e8 73 79 7b 00 0f 1f 00 90 90 90 90 90 90 90 90 90 90 +RSP: 0018:ffffc900006837c0 EFLAGS: 00010293 +RAX: ffffffff81cf7f24 RBX: ffff8881015d9000 RCX: ffff888101815a00 +RDX: 0000000000000000 RSI: 00000000ffffffa6 RDI: 00000000ffffffa6 +RBP: 00000000ffffffa6 R08: ffffffff81cf7f03 R09: 0000000000000001 +R10: ffff888101ba47b9 R11: ffff888101815a00 R12: ffff8881017dae00 +R13: ffff8881017dad00 R14: ffffc90000683ab8 R15: ffffffff83c1f740 +FS: 00007fbc22dbc740(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000020000046 CR3: 000000010264e003 CR4: 0000000000170ef0 +Call Trace: + + rtnetlink_rcv_msg+0x677/0x710 net/core/rtnetlink.c:6480 + netlink_rcv_skb+0xea/0x1c0 net/netlink/af_netlink.c:2545 + netlink_unicast+0x430/0x500 net/netlink/af_netlink.c:1342 + netlink_sendmsg+0x4fc/0x620 net/netlink/af_netlink.c:1910 + sock_sendmsg+0xa8/0xd0 net/socket.c:730 + ____sys_sendmsg+0x22a/0x320 net/socket.c:2541 + ___sys_sendmsg+0x143/0x190 net/socket.c:2595 + __x64_sys_sendmsg+0xd8/0x150 net/socket.c:2624 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x47/0xa0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x6e/0xd8 +RIP: 0033:0x7fbc22e8d6a9 +Code: 5c c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 4f 37 0d 00 f7 d8 64 89 01 48 +RSP: 002b:00007ffc4320e778 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00000000004007d0 RCX: 00007fbc22e8d6a9 +RDX: 0000000000000000 RSI: 0000000020000000 RDI: 0000000000000003 +RBP: 0000000000000001 R08: 0000000000000000 R09: 00000000004007d0 +R10: 0000000000000008 R11: 0000000000000246 R12: 00007ffc4320e898 +R13: 00007ffc4320e8a8 R14: 00000000004004a0 R15: 00007fbc22fa5a80 + +---[ end trace 0000000000000000 ]--- + +Which didn't happen prior to commit bf9f1baa279f ("net: add dedicated +kmem_cache for typical/small skb->head") as the skb always was large +enough. + +Fixes: 0e7788fd7622 ("net: rtnetlink: Add UAPI for obtaining L3 offload xstats") +Signed-off-by: Christoph Paasch +Reviewed-by: Petr Machata +Link: https://lore.kernel.org/r/20231013041448.8229-1-cpaasch@apple.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -5394,13 +5394,11 @@ static unsigned int + rtnl_offload_xstats_get_size_hw_s_info_one(const struct net_device *dev, + enum netdev_offload_xstats_type type) + { +- bool enabled = netdev_offload_xstats_enabled(dev, type); +- + return nla_total_size(0) + + /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST */ + nla_total_size(sizeof(u8)) + + /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED */ +- (enabled ? nla_total_size(sizeof(u8)) : 0) + ++ nla_total_size(sizeof(u8)) + + 0; + } + diff --git a/queue-6.1/octeon_ep-update-bql-sent-bytes-before-ringing-doorbell.patch b/queue-6.1/octeon_ep-update-bql-sent-bytes-before-ringing-doorbell.patch new file mode 100644 index 00000000000..b0e0a994ec1 --- /dev/null +++ b/queue-6.1/octeon_ep-update-bql-sent-bytes-before-ringing-doorbell.patch @@ -0,0 +1,53 @@ +From a0ca6b9dfef0b3cc83aa8bb485ed61a018f84982 Mon Sep 17 00:00:00 2001 +From: Shinas Rasheed +Date: Tue, 17 Oct 2023 03:50:30 -0700 +Subject: octeon_ep: update BQL sent bytes before ringing doorbell + +From: Shinas Rasheed + +commit a0ca6b9dfef0b3cc83aa8bb485ed61a018f84982 upstream. + +Sometimes Tx is completed immediately after doorbell is updated, which +causes Tx completion routing to update completion bytes before the +same packet bytes are updated in sent bytes in transmit function, hence +hitting BUG_ON() in dql_completed(). To avoid this, update BQL +sent bytes before ringing doorbell. + +Fixes: 37d79d059606 ("octeon_ep: add Tx/Rx processing and interrupt support") +Signed-off-by: Shinas Rasheed +Link: https://lore.kernel.org/r/20231017105030.2310966-1-srasheed@marvell.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 13 ++++++------- + 1 file changed, 6 insertions(+), 7 deletions(-) + +--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c ++++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +@@ -707,20 +707,19 @@ static netdev_tx_t octep_start_xmit(stru + hw_desc->dptr = tx_buffer->sglist_dma; + } + +- /* Flush the hw descriptor before writing to doorbell */ +- wmb(); +- +- /* Ring Doorbell to notify the NIC there is a new packet */ +- writel(1, iq->doorbell_reg); ++ netdev_tx_sent_queue(iq->netdev_q, skb->len); ++ skb_tx_timestamp(skb); + atomic_inc(&iq->instr_pending); + wi++; + if (wi == iq->max_count) + wi = 0; + iq->host_write_index = wi; ++ /* Flush the hw descriptor before writing to doorbell */ ++ wmb(); + +- netdev_tx_sent_queue(iq->netdev_q, skb->len); ++ /* Ring Doorbell to notify the NIC there is a new packet */ ++ writel(1, iq->doorbell_reg); + iq->stats.instr_posted++; +- skb_tx_timestamp(skb); + return NETDEV_TX_OK; + + dma_map_sg_err: diff --git a/queue-6.1/selftests-netfilter-run-nft_audit.sh-in-its-own-netns.patch b/queue-6.1/selftests-netfilter-run-nft_audit.sh-in-its-own-netns.patch new file mode 100644 index 00000000000..14ec33676c7 --- /dev/null +++ b/queue-6.1/selftests-netfilter-run-nft_audit.sh-in-its-own-netns.patch @@ -0,0 +1,41 @@ +From 2e2d9c7d4d37d74873583d7b0c94eac8b6869486 Mon Sep 17 00:00:00 2001 +From: Phil Sutter +Date: Fri, 13 Oct 2023 22:02:24 +0200 +Subject: selftests: netfilter: Run nft_audit.sh in its own netns + +From: Phil Sutter + +commit 2e2d9c7d4d37d74873583d7b0c94eac8b6869486 upstream. + +Don't mess with the host's firewall ruleset. Since audit logging is not +per-netns, add an initial delay of a second so other selftests' netns +cleanups have a chance to finish. + +Fixes: e8dbde59ca3f ("selftests: netfilter: Test nf_tables audit logging") +Signed-off-by: Phil Sutter +Signed-off-by: Florian Westphal +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/netfilter/nft_audit.sh | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/netfilter/nft_audit.sh +index e94a80859bbd..99ed5bd6e840 100755 +--- a/tools/testing/selftests/netfilter/nft_audit.sh ++++ b/tools/testing/selftests/netfilter/nft_audit.sh +@@ -11,6 +11,12 @@ nft --version >/dev/null 2>&1 || { + exit $SKIP_RC + } + ++# Run everything in a separate network namespace ++[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } ++ ++# give other scripts a chance to finish - audit_logread sees all activity ++sleep 1 ++ + logfile=$(mktemp) + rulefile=$(mktemp) + echo "logging into $logfile" +-- +2.42.0 + diff --git a/queue-6.1/selftests-openvswitch-catch-cases-where-the-tests-are-killed.patch b/queue-6.1/selftests-openvswitch-catch-cases-where-the-tests-are-killed.patch new file mode 100644 index 00000000000..deeb5e16081 --- /dev/null +++ b/queue-6.1/selftests-openvswitch-catch-cases-where-the-tests-are-killed.patch @@ -0,0 +1,36 @@ +From af846afad5ca1c1a24d320adf9e48255e97db84e Mon Sep 17 00:00:00 2001 +From: Aaron Conole +Date: Wed, 11 Oct 2023 15:49:37 -0400 +Subject: selftests: openvswitch: Catch cases where the tests are killed + +From: Aaron Conole + +commit af846afad5ca1c1a24d320adf9e48255e97db84e upstream. + +In case of fatal signal, or early abort at least cleanup the current +test case. + +Fixes: 25f16c873fb1 ("selftests: add openvswitch selftest suite") +Signed-off-by: Aaron Conole +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/net/openvswitch/openvswitch.sh | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh +index 220c3356901e..2a0112be7ead 100755 +--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh ++++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh +@@ -3,6 +3,8 @@ + # + # OVS kernel module self tests + ++trap ovs_exit_sig EXIT TERM INT ERR ++ + # Kselftest framework requirement - SKIP code is 4. + ksft_skip=4 + +-- +2.42.0 + diff --git a/queue-6.1/series b/queue-6.1/series index d768f4c7c6c..5097116deca 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -44,3 +44,33 @@ asoc-codecs-wcd938x-drop-bogus-bind-error-handling.patch asoc-codecs-wcd938x-fix-unbind-tear-down-order.patch asoc-codecs-wcd938x-fix-resource-leaks-on-bind-errors.patch qed-fix-ll2-rx-buffer-allocation.patch +xfrm-fix-a-data-race-in-xfrm_lookup_with_ifid.patch +xfrm-fix-a-data-race-in-xfrm_gen_index.patch +xfrm-interface-use-dev_stats_inc.patch +wifi-cfg80211-use-system_unbound_wq-for-wiphy-work.patch +net-ipv4-fix-return-value-check-in-esp_remove_trailer.patch +net-ipv6-fix-return-value-check-in-esp_remove_trailer.patch +net-rfkill-gpio-prevent-value-glitch-during-probe.patch +tcp-fix-excessive-tlp-and-rack-timeouts-from-hz-rounding.patch +tcp-tsq-relax-tcp_small_queue_check-when-rtx-queue-contains-a-single-skb.patch +tcp-fix-listen-warning-with-v4-mapped-v6-address.patch +tun-prevent-negative-ifindex.patch +ipv4-fib-annotate-races-around-nh-nh_saddr_genid-and-nh-nh_saddr.patch +net-usb-smsc95xx-fix-an-error-code-in-smsc95xx_reset.patch +octeon_ep-update-bql-sent-bytes-before-ringing-doorbell.patch +i40e-prevent-crash-on-probe-if-hw-registers-have-invalid-values.patch +net-dsa-bcm_sf2-fix-possible-memory-leak-in-bcm_sf2_mdio_register.patch +bonding-return-pointer-to-data-after-pull-on-skb.patch +net-sched-sch_hfsc-upgrade-rt-to-sc-when-it-becomes-a-inner-curve.patch +neighbor-tracing-move-pin6-inside-config_ipv6-y-section.patch +selftests-openvswitch-catch-cases-where-the-tests-are-killed.patch +selftests-netfilter-run-nft_audit.sh-in-its-own-netns.patch +netfilter-nft_set_rbtree-.deactivate-fails-if-element-has-expired.patch +netlink-correct-offload_xstats-size.patch +netfilter-nf_tables-do-not-remove-elements-if-set-backend-implements-.abort.patch +netfilter-nf_tables-revert-do-not-remove-elements-if-set-backend-implements-.abort.patch +net-phy-bcm7xxx-add-missing-16nm-ephy-statistics.patch +net-pktgen-fix-interface-flags-printing.patch +net-avoid-uaf-on-deleted-altname.patch +net-fix-ifname-in-netlink-ntf-during-netns-move.patch +net-check-for-altname-conflicts-when-changing-netdev-s-netns.patch diff --git a/queue-6.1/tcp-fix-excessive-tlp-and-rack-timeouts-from-hz-rounding.patch b/queue-6.1/tcp-fix-excessive-tlp-and-rack-timeouts-from-hz-rounding.patch new file mode 100644 index 00000000000..0649307a7ea --- /dev/null +++ b/queue-6.1/tcp-fix-excessive-tlp-and-rack-timeouts-from-hz-rounding.patch @@ -0,0 +1,96 @@ +From 1c2709cfff1dedbb9591e989e2f001484208d914 Mon Sep 17 00:00:00 2001 +From: Neal Cardwell +Date: Sun, 15 Oct 2023 13:47:00 -0400 +Subject: tcp: fix excessive TLP and RACK timeouts from HZ rounding + +From: Neal Cardwell + +commit 1c2709cfff1dedbb9591e989e2f001484208d914 upstream. + +We discovered from packet traces of slow loss recovery on kernels with +the default HZ=250 setting (and min_rtt < 1ms) that after reordering, +when receiving a SACKed sequence range, the RACK reordering timer was +firing after about 16ms rather than the desired value of roughly +min_rtt/4 + 2ms. The problem is largely due to the RACK reorder timer +calculation adding in TCP_TIMEOUT_MIN, which is 2 jiffies. On kernels +with HZ=250, this is 2*4ms = 8ms. The TLP timer calculation has the +exact same issue. + +This commit fixes the TLP transmit timer and RACK reordering timer +floor calculation to more closely match the intended 2ms floor even on +kernels with HZ=250. It does this by adding in a new +TCP_TIMEOUT_MIN_US floor of 2000 us and then converting to jiffies, +instead of the current approach of converting to jiffies and then +adding th TCP_TIMEOUT_MIN value of 2 jiffies. + +Our testing has verified that on kernels with HZ=1000, as expected, +this does not produce significant changes in behavior, but on kernels +with the default HZ=250 the latency improvement can be large. For +example, our tests show that for HZ=250 kernels at low RTTs this fix +roughly halves the latency for the RACK reorder timer: instead of +mostly firing at 16ms it mostly fires at 8ms. + +Suggested-by: Eric Dumazet +Signed-off-by: Neal Cardwell +Signed-off-by: Yuchung Cheng +Fixes: bb4d991a28cc ("tcp: adjust tail loss probe timeout") +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/20231015174700.2206872-1-ncardwell.sw@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tcp.h | 3 +++ + net/ipv4/tcp_output.c | 9 +++++---- + net/ipv4/tcp_recovery.c | 2 +- + 3 files changed, 9 insertions(+), 5 deletions(-) + +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -141,6 +141,9 @@ void tcp_time_wait(struct sock *sk, int + #define TCP_RTO_MAX ((unsigned)(120*HZ)) + #define TCP_RTO_MIN ((unsigned)(HZ/5)) + #define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */ ++ ++#define TCP_TIMEOUT_MIN_US (2*USEC_PER_MSEC) /* Min TCP timeout in microsecs */ ++ + #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ + #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now + * used as a fallback RTO for the +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -2735,7 +2735,7 @@ bool tcp_schedule_loss_probe(struct sock + { + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); +- u32 timeout, rto_delta_us; ++ u32 timeout, timeout_us, rto_delta_us; + int early_retrans; + + /* Don't do any loss probe on a Fast Open connection before 3WHS +@@ -2759,11 +2759,12 @@ bool tcp_schedule_loss_probe(struct sock + * sample is available then probe after TCP_TIMEOUT_INIT. + */ + if (tp->srtt_us) { +- timeout = usecs_to_jiffies(tp->srtt_us >> 2); ++ timeout_us = tp->srtt_us >> 2; + if (tp->packets_out == 1) +- timeout += TCP_RTO_MIN; ++ timeout_us += tcp_rto_min_us(sk); + else +- timeout += TCP_TIMEOUT_MIN; ++ timeout_us += TCP_TIMEOUT_MIN_US; ++ timeout = usecs_to_jiffies(timeout_us); + } else { + timeout = TCP_TIMEOUT_INIT; + } +--- a/net/ipv4/tcp_recovery.c ++++ b/net/ipv4/tcp_recovery.c +@@ -104,7 +104,7 @@ bool tcp_rack_mark_lost(struct sock *sk) + tp->rack.advanced = 0; + tcp_rack_detect_loss(sk, &timeout); + if (timeout) { +- timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN; ++ timeout = usecs_to_jiffies(timeout + TCP_TIMEOUT_MIN_US); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT, + timeout, inet_csk(sk)->icsk_rto); + } diff --git a/queue-6.1/tcp-fix-listen-warning-with-v4-mapped-v6-address.patch b/queue-6.1/tcp-fix-listen-warning-with-v4-mapped-v6-address.patch new file mode 100644 index 00000000000..34dbd6d40c5 --- /dev/null +++ b/queue-6.1/tcp-fix-listen-warning-with-v4-mapped-v6-address.patch @@ -0,0 +1,149 @@ +From 8702cf12e6ba91616a72d684e90357977972991b Mon Sep 17 00:00:00 2001 +From: Kuniyuki Iwashima +Date: Mon, 9 Oct 2023 18:38:14 -0700 +Subject: tcp: Fix listen() warning with v4-mapped-v6 address. + +From: Kuniyuki Iwashima + +commit 8702cf12e6ba91616a72d684e90357977972991b upstream. + +syzbot reported a warning [0] introduced by commit c48ef9c4aed3 ("tcp: Fix +bind() regression for v4-mapped-v6 non-wildcard address."). + +After the cited commit, a v4 socket's address matches the corresponding +v4-mapped-v6 tb2 in inet_bind2_bucket_match_addr(), not vice versa. + +During X.X.X.X -> ::ffff:X.X.X.X order bind()s, the second bind() uses +bhash and conflicts properly without checking bhash2 so that we need not +check if a v4-mapped-v6 sk matches the corresponding v4 address tb2 in +inet_bind2_bucket_match_addr(). However, the repro shows that we need +to check that in a no-conflict case. + +The repro bind()s two sockets to the 2-tuples using SO_REUSEPORT and calls +listen() for the first socket: + + from socket import * + + s1 = socket() + s1.setsockopt(SOL_SOCKET, SO_REUSEPORT, 1) + s1.bind(('127.0.0.1', 0)) + + s2 = socket(AF_INET6) + s2.setsockopt(SOL_SOCKET, SO_REUSEPORT, 1) + s2.bind(('::ffff:127.0.0.1', s1.getsockname()[1])) + + s1.listen() + +The second socket should belong to the first socket's tb2, but the second +bind() creates another tb2 bucket because inet_bind2_bucket_find() returns +NULL in inet_csk_get_port() as the v4-mapped-v6 sk does not match the +corresponding v4 address tb2. + + bhash2[] -> tb2(::ffff:X.X.X.X) -> tb2(X.X.X.X) + +Then, listen() for the first socket calls inet_csk_get_port(), where the +v4 address matches the v4-mapped-v6 tb2 and WARN_ON() is triggered. + +To avoid that, we need to check if v4-mapped-v6 sk address matches with +the corresponding v4 address tb2 in inet_bind2_bucket_match(). + +The same checks are needed in inet_bind2_bucket_addr_match() too, so we +can move all checks there and call it from inet_bind2_bucket_match(). + +Note that now tb->family is just an address family of tb->(v6_)?rcv_saddr +and not of sockets in the bucket. This could be refactored later by +defining tb->rcv_saddr as tb->v6_rcv_saddr.s6_addr32[3] and prepending +::ffff: when creating v4 tb2. + +[0]: +WARNING: CPU: 0 PID: 5049 at net/ipv4/inet_connection_sock.c:587 inet_csk_get_port+0xf96/0x2350 net/ipv4/inet_connection_sock.c:587 +Modules linked in: +CPU: 0 PID: 5049 Comm: syz-executor288 Not tainted 6.6.0-rc2-syzkaller-00018-g2cf0f7156238 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/04/2023 +RIP: 0010:inet_csk_get_port+0xf96/0x2350 net/ipv4/inet_connection_sock.c:587 +Code: 7c 24 08 e8 4c b6 8a 01 31 d2 be 88 01 00 00 48 c7 c7 e0 94 ae 8b e8 59 2e a3 f8 2e 2e 2e 31 c0 e9 04 fe ff ff e8 ca 88 d0 f8 <0f> 0b e9 0f f9 ff ff e8 be 88 d0 f8 49 8d 7e 48 e8 65 ca 5a 00 31 +RSP: 0018:ffffc90003abfbf0 EFLAGS: 00010293 +RAX: 0000000000000000 RBX: ffff888026429100 RCX: 0000000000000000 +RDX: ffff88807edcbb80 RSI: ffffffff88b73d66 RDI: ffff888026c49f38 +RBP: ffff888026c49f30 R08: 0000000000000005 R09: 0000000000000000 +R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff9260f200 +R13: ffff888026c49880 R14: 0000000000000000 R15: ffff888026429100 +FS: 00005555557d5380(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 000000000045ad50 CR3: 0000000025754000 CR4: 00000000003506f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + inet_csk_listen_start+0x155/0x360 net/ipv4/inet_connection_sock.c:1256 + __inet_listen_sk+0x1b8/0x5c0 net/ipv4/af_inet.c:217 + inet_listen+0x93/0xd0 net/ipv4/af_inet.c:239 + __sys_listen+0x194/0x270 net/socket.c:1866 + __do_sys_listen net/socket.c:1875 [inline] + __se_sys_listen net/socket.c:1873 [inline] + __x64_sys_listen+0x53/0x80 net/socket.c:1873 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd +RIP: 0033:0x7f3a5bce3af9 +Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 c1 17 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 +RSP: 002b:00007ffc1a1c79e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000032 +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3a5bce3af9 +RDX: 00007f3a5bce3af9 RSI: 0000000000000000 RDI: 0000000000000003 +RBP: 00007f3a5bd565f0 R08: 0000000000000006 R09: 0000000000000006 +R10: 0000000000000006 R11: 0000000000000246 R12: 0000000000000001 +R13: 431bde82d7b634db R14: 0000000000000001 R15: 0000000000000001 + + +Fixes: c48ef9c4aed3 ("tcp: Fix bind() regression for v4-mapped-v6 non-wildcard address.") +Reported-by: syzbot+71e724675ba3958edb31@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=71e724675ba3958edb31 +Signed-off-by: Kuniyuki Iwashima +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/20231010013814.70571-1-kuniyu@amazon.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/inet_hashtables.c | 24 +++++++++--------------- + 1 file changed, 9 insertions(+), 15 deletions(-) + +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -148,8 +148,14 @@ static bool inet_bind2_bucket_addr_match + const struct sock *sk) + { + #if IS_ENABLED(CONFIG_IPV6) +- if (sk->sk_family != tb2->family) +- return false; ++ if (sk->sk_family != tb2->family) { ++ if (sk->sk_family == AF_INET) ++ return ipv6_addr_v4mapped(&tb2->v6_rcv_saddr) && ++ tb2->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr; ++ ++ return ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr) && ++ sk->sk_v6_rcv_saddr.s6_addr32[3] == tb2->rcv_saddr; ++ } + + if (sk->sk_family == AF_INET6) + return ipv6_addr_equal(&tb2->v6_rcv_saddr, +@@ -799,19 +805,7 @@ static bool inet_bind2_bucket_match(cons + tb->l3mdev != l3mdev) + return false; + +-#if IS_ENABLED(CONFIG_IPV6) +- if (sk->sk_family != tb->family) { +- if (sk->sk_family == AF_INET) +- return ipv6_addr_v4mapped(&tb->v6_rcv_saddr) && +- tb->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr; +- +- return false; +- } +- +- if (sk->sk_family == AF_INET6) +- return ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr); +-#endif +- return tb->rcv_saddr == sk->sk_rcv_saddr; ++ return inet_bind2_bucket_addr_match(tb, sk); + } + + bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net, diff --git a/queue-6.1/tcp-tsq-relax-tcp_small_queue_check-when-rtx-queue-contains-a-single-skb.patch b/queue-6.1/tcp-tsq-relax-tcp_small_queue_check-when-rtx-queue-contains-a-single-skb.patch new file mode 100644 index 00000000000..7a9120f8e9c --- /dev/null +++ b/queue-6.1/tcp-tsq-relax-tcp_small_queue_check-when-rtx-queue-contains-a-single-skb.patch @@ -0,0 +1,76 @@ +From f921a4a5bffa8a0005b190fb9421a7fc1fd716b6 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 17 Oct 2023 12:45:26 +0000 +Subject: tcp: tsq: relax tcp_small_queue_check() when rtx queue contains a single skb + +From: Eric Dumazet + +commit f921a4a5bffa8a0005b190fb9421a7fc1fd716b6 upstream. + +In commit 75eefc6c59fd ("tcp: tsq: add a shortcut in tcp_small_queue_check()") +we allowed to send an skb regardless of TSQ limits being hit if rtx queue +was empty or had a single skb, in order to better fill the pipe +when/if TX completions were slow. + +Then later, commit 75c119afe14f ("tcp: implement rb-tree based +retransmit queue") accidentally removed the special case for +one skb in rtx queue. + +Stefan Wahren reported a regression in single TCP flow throughput +using a 100Mbit fec link, starting from commit 65466904b015 ("tcp: adjust +TSO packet sizes based on min_rtt"). This last commit only made the +regression more visible, because it locked the TCP flow on a particular +behavior where TSQ prevented two skbs being pushed downstream, +adding silences on the wire between each TSO packet. + +Many thanks to Stefan for his invaluable help ! + +Fixes: 75c119afe14f ("tcp: implement rb-tree based retransmit queue") +Link: https://lore.kernel.org/netdev/7f31ddc8-9971-495e-a1f6-819df542e0af@gmx.net/ +Reported-by: Stefan Wahren +Tested-by: Stefan Wahren +Signed-off-by: Eric Dumazet +Acked-by: Neal Cardwell +Link: https://lore.kernel.org/r/20231017124526.4060202-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -2489,6 +2489,18 @@ static bool tcp_pacing_check(struct sock + return true; + } + ++static bool tcp_rtx_queue_empty_or_single_skb(const struct sock *sk) ++{ ++ const struct rb_node *node = sk->tcp_rtx_queue.rb_node; ++ ++ /* No skb in the rtx queue. */ ++ if (!node) ++ return true; ++ ++ /* Only one skb in rtx queue. */ ++ return !node->rb_left && !node->rb_right; ++} ++ + /* TCP Small Queues : + * Control number of packets in qdisc/devices to two packets / or ~1 ms. + * (These limits are doubled for retransmits) +@@ -2526,12 +2538,12 @@ static bool tcp_small_queue_check(struct + limit += extra_bytes; + } + if (refcount_read(&sk->sk_wmem_alloc) > limit) { +- /* Always send skb if rtx queue is empty. ++ /* Always send skb if rtx queue is empty or has one skb. + * No need to wait for TX completion to call us back, + * after softirq/tasklet schedule. + * This helps when TX completions are delayed too much. + */ +- if (tcp_rtx_queue_empty(sk)) ++ if (tcp_rtx_queue_empty_or_single_skb(sk)) + return false; + + set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); diff --git a/queue-6.1/tun-prevent-negative-ifindex.patch b/queue-6.1/tun-prevent-negative-ifindex.patch new file mode 100644 index 00000000000..1e23fe3b0bd --- /dev/null +++ b/queue-6.1/tun-prevent-negative-ifindex.patch @@ -0,0 +1,96 @@ +From cbfbfe3aee718dc4c3c837f5d2463170ee59d78c Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Mon, 16 Oct 2023 18:08:51 +0000 +Subject: tun: prevent negative ifindex + +From: Eric Dumazet + +commit cbfbfe3aee718dc4c3c837f5d2463170ee59d78c upstream. + +After commit 956db0a13b47 ("net: warn about attempts to register +negative ifindex") syzbot is able to trigger the following splat. + +Negative ifindex are not supported. + +WARNING: CPU: 1 PID: 6003 at net/core/dev.c:9596 dev_index_reserve+0x104/0x210 +Modules linked in: +CPU: 1 PID: 6003 Comm: syz-executor926 Not tainted 6.6.0-rc4-syzkaller-g19af4a4ed414 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/06/2023 +pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +pc : dev_index_reserve+0x104/0x210 +lr : dev_index_reserve+0x100/0x210 +sp : ffff800096a878e0 +x29: ffff800096a87930 x28: ffff0000d04380d0 x27: ffff0000d04380f8 +x26: ffff0000d04380f0 x25: 1ffff00012d50f20 x24: 1ffff00012d50f1c +x23: dfff800000000000 x22: ffff8000929c21c0 x21: 00000000ffffffea +x20: ffff0000d04380e0 x19: ffff800096a87900 x18: ffff800096a874c0 +x17: ffff800084df5008 x16: ffff80008051f9c4 x15: 0000000000000001 +x14: 1fffe0001a087198 x13: 0000000000000000 x12: 0000000000000000 +x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 +x8 : ffff0000d41c9bc0 x7 : 0000000000000000 x6 : 0000000000000000 +x5 : ffff800091763d88 x4 : 0000000000000000 x3 : ffff800084e04748 +x2 : 0000000000000001 x1 : 00000000fead71c7 x0 : 0000000000000000 +Call trace: +dev_index_reserve+0x104/0x210 +register_netdevice+0x598/0x1074 net/core/dev.c:10084 +tun_set_iff+0x630/0xb0c drivers/net/tun.c:2850 +__tun_chr_ioctl+0x788/0x2af8 drivers/net/tun.c:3118 +tun_chr_ioctl+0x38/0x4c drivers/net/tun.c:3403 +vfs_ioctl fs/ioctl.c:51 [inline] +__do_sys_ioctl fs/ioctl.c:871 [inline] +__se_sys_ioctl fs/ioctl.c:857 [inline] +__arm64_sys_ioctl+0x14c/0x1c8 fs/ioctl.c:857 +__invoke_syscall arch/arm64/kernel/syscall.c:37 [inline] +invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:51 +el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:136 +do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:155 +el0_svc+0x58/0x16c arch/arm64/kernel/entry-common.c:678 +el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:696 +el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:595 +irq event stamp: 11348 +hardirqs last enabled at (11347): [] __raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:151 [inline] +hardirqs last enabled at (11347): [] _raw_spin_unlock_irqrestore+0x38/0x98 kernel/locking/spinlock.c:194 +hardirqs last disabled at (11348): [] el1_dbg+0x24/0x80 arch/arm64/kernel/entry-common.c:436 +softirqs last enabled at (11138): [] spin_unlock_bh include/linux/spinlock.h:396 [inline] +softirqs last enabled at (11138): [] release_sock+0x15c/0x1b0 net/core/sock.c:3531 +softirqs last disabled at (11136): [] spin_lock_bh include/linux/spinlock.h:356 [inline] +softirqs last disabled at (11136): [] release_sock+0x3c/0x1b0 net/core/sock.c:3518 + +Fixes: fb7589a16216 ("tun: Add ability to create tun device with given index") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Reviewed-by: Willem de Bruijn +Acked-by: Jason Wang +Link: https://lore.kernel.org/r/20231016180851.3560092-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -3056,10 +3056,11 @@ static long __tun_chr_ioctl(struct file + struct net *net = sock_net(&tfile->sk); + struct tun_struct *tun; + void __user* argp = (void __user*)arg; +- unsigned int ifindex, carrier; ++ unsigned int carrier; + struct ifreq ifr; + kuid_t owner; + kgid_t group; ++ int ifindex; + int sndbuf; + int vnet_hdr_sz; + int le; +@@ -3115,7 +3116,9 @@ static long __tun_chr_ioctl(struct file + ret = -EFAULT; + if (copy_from_user(&ifindex, argp, sizeof(ifindex))) + goto unlock; +- ++ ret = -EINVAL; ++ if (ifindex < 0) ++ goto unlock; + ret = 0; + tfile->ifindex = ifindex; + goto unlock; diff --git a/queue-6.1/wifi-cfg80211-use-system_unbound_wq-for-wiphy-work.patch b/queue-6.1/wifi-cfg80211-use-system_unbound_wq-for-wiphy-work.patch new file mode 100644 index 00000000000..cd85246bf11 --- /dev/null +++ b/queue-6.1/wifi-cfg80211-use-system_unbound_wq-for-wiphy-work.patch @@ -0,0 +1,34 @@ +From 91d20ab9d9ca035527af503d00e1e30d6c375f2a Mon Sep 17 00:00:00 2001 +From: Johannes Berg +Date: Mon, 9 Oct 2023 10:18:01 +0200 +Subject: wifi: cfg80211: use system_unbound_wq for wiphy work + +From: Johannes Berg + +commit 91d20ab9d9ca035527af503d00e1e30d6c375f2a upstream. + +Since wiphy work items can run pretty much arbitrary +code in the stack/driver, it can take longer to run +all of this, so we shouldn't be using system_wq via +schedule_work(). Also, we lock the wiphy (which is +the reason this exists), so use system_unbound_wq. + +Reported-and-tested-by: Kalle Valo +Fixes: a3ee4dc84c4e ("wifi: cfg80211: add a work abstraction with special semantics") +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman +--- + net/wireless/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/wireless/core.c ++++ b/net/wireless/core.c +@@ -1618,7 +1618,7 @@ void wiphy_work_queue(struct wiphy *wiph + list_add_tail(&work->entry, &rdev->wiphy_work_list); + spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); + +- schedule_work(&rdev->wiphy_work); ++ queue_work(system_unbound_wq, &rdev->wiphy_work); + } + EXPORT_SYMBOL_GPL(wiphy_work_queue); + diff --git a/queue-6.1/xfrm-fix-a-data-race-in-xfrm_gen_index.patch b/queue-6.1/xfrm-fix-a-data-race-in-xfrm_gen_index.patch new file mode 100644 index 00000000000..7c987f1c3ab --- /dev/null +++ b/queue-6.1/xfrm-fix-a-data-race-in-xfrm_gen_index.patch @@ -0,0 +1,101 @@ +From 3e4bc23926b83c3c67e5f61ae8571602754131a6 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Fri, 8 Sep 2023 18:13:59 +0000 +Subject: xfrm: fix a data-race in xfrm_gen_index() + +From: Eric Dumazet + +commit 3e4bc23926b83c3c67e5f61ae8571602754131a6 upstream. + +xfrm_gen_index() mutual exclusion uses net->xfrm.xfrm_policy_lock. + +This means we must use a per-netns idx_generator variable, +instead of a static one. +Alternative would be to use an atomic variable. + +syzbot reported: + +BUG: KCSAN: data-race in xfrm_sk_policy_insert / xfrm_sk_policy_insert + +write to 0xffffffff87005938 of 4 bytes by task 29466 on cpu 0: +xfrm_gen_index net/xfrm/xfrm_policy.c:1385 [inline] +xfrm_sk_policy_insert+0x262/0x640 net/xfrm/xfrm_policy.c:2347 +xfrm_user_policy+0x413/0x540 net/xfrm/xfrm_state.c:2639 +do_ipv6_setsockopt+0x1317/0x2ce0 net/ipv6/ipv6_sockglue.c:943 +ipv6_setsockopt+0x57/0x130 net/ipv6/ipv6_sockglue.c:1012 +rawv6_setsockopt+0x21e/0x410 net/ipv6/raw.c:1054 +sock_common_setsockopt+0x61/0x70 net/core/sock.c:3697 +__sys_setsockopt+0x1c9/0x230 net/socket.c:2263 +__do_sys_setsockopt net/socket.c:2274 [inline] +__se_sys_setsockopt net/socket.c:2271 [inline] +__x64_sys_setsockopt+0x66/0x80 net/socket.c:2271 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +read to 0xffffffff87005938 of 4 bytes by task 29460 on cpu 1: +xfrm_sk_policy_insert+0x13e/0x640 +xfrm_user_policy+0x413/0x540 net/xfrm/xfrm_state.c:2639 +do_ipv6_setsockopt+0x1317/0x2ce0 net/ipv6/ipv6_sockglue.c:943 +ipv6_setsockopt+0x57/0x130 net/ipv6/ipv6_sockglue.c:1012 +rawv6_setsockopt+0x21e/0x410 net/ipv6/raw.c:1054 +sock_common_setsockopt+0x61/0x70 net/core/sock.c:3697 +__sys_setsockopt+0x1c9/0x230 net/socket.c:2263 +__do_sys_setsockopt net/socket.c:2274 [inline] +__se_sys_setsockopt net/socket.c:2271 [inline] +__x64_sys_setsockopt+0x66/0x80 net/socket.c:2271 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +value changed: 0x00006ad8 -> 0x00006b18 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 1 PID: 29460 Comm: syz-executor.1 Not tainted 6.5.0-rc5-syzkaller-00243-g9106536c1aa3 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023 + +Fixes: 1121994c803f ("netns xfrm: policy insertion in netns") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Cc: Steffen Klassert +Cc: Herbert Xu +Acked-by: Herbert Xu +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman +--- + include/net/netns/xfrm.h | 1 + + net/xfrm/xfrm_policy.c | 6 ++---- + 2 files changed, 3 insertions(+), 4 deletions(-) + +--- a/include/net/netns/xfrm.h ++++ b/include/net/netns/xfrm.h +@@ -50,6 +50,7 @@ struct netns_xfrm { + struct list_head policy_all; + struct hlist_head *policy_byidx; + unsigned int policy_idx_hmask; ++ unsigned int idx_generator; + struct hlist_head policy_inexact[XFRM_POLICY_MAX]; + struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX]; + unsigned int policy_count[XFRM_POLICY_MAX * 2]; +--- a/net/xfrm/xfrm_policy.c ++++ b/net/xfrm/xfrm_policy.c +@@ -1371,8 +1371,6 @@ EXPORT_SYMBOL(xfrm_policy_hash_rebuild); + * of an absolute inpredictability of ordering of rules. This will not pass. */ + static u32 xfrm_gen_index(struct net *net, int dir, u32 index) + { +- static u32 idx_generator; +- + for (;;) { + struct hlist_head *list; + struct xfrm_policy *p; +@@ -1380,8 +1378,8 @@ static u32 xfrm_gen_index(struct net *ne + int found; + + if (!index) { +- idx = (idx_generator | dir); +- idx_generator += 8; ++ idx = (net->xfrm.idx_generator | dir); ++ net->xfrm.idx_generator += 8; + } else { + idx = index; + index = 0; diff --git a/queue-6.1/xfrm-fix-a-data-race-in-xfrm_lookup_with_ifid.patch b/queue-6.1/xfrm-fix-a-data-race-in-xfrm_lookup_with_ifid.patch new file mode 100644 index 00000000000..f7d76f73e0b --- /dev/null +++ b/queue-6.1/xfrm-fix-a-data-race-in-xfrm_lookup_with_ifid.patch @@ -0,0 +1,80 @@ +From de5724ca38fd5e442bae9c1fab31942b6544012d Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Wed, 11 Oct 2023 10:24:29 +0000 +Subject: xfrm: fix a data-race in xfrm_lookup_with_ifid() + +From: Eric Dumazet + +commit de5724ca38fd5e442bae9c1fab31942b6544012d upstream. + +syzbot complains about a race in xfrm_lookup_with_ifid() [1] + +When preparing commit 0a9e5794b21e ("xfrm: annotate data-race +around use_time") I thought xfrm_lookup_with_ifid() was modifying +a still private structure. + +[1] +BUG: KCSAN: data-race in xfrm_lookup_with_ifid / xfrm_lookup_with_ifid + +write to 0xffff88813ea41108 of 8 bytes by task 8150 on cpu 1: +xfrm_lookup_with_ifid+0xce7/0x12d0 net/xfrm/xfrm_policy.c:3218 +xfrm_lookup net/xfrm/xfrm_policy.c:3270 [inline] +xfrm_lookup_route+0x3b/0x100 net/xfrm/xfrm_policy.c:3281 +ip6_dst_lookup_flow+0x98/0xc0 net/ipv6/ip6_output.c:1246 +send6+0x241/0x3c0 drivers/net/wireguard/socket.c:139 +wg_socket_send_skb_to_peer+0xbd/0x130 drivers/net/wireguard/socket.c:178 +wg_socket_send_buffer_to_peer+0xd6/0x100 drivers/net/wireguard/socket.c:200 +wg_packet_send_handshake_initiation drivers/net/wireguard/send.c:40 [inline] +wg_packet_handshake_send_worker+0x10c/0x150 drivers/net/wireguard/send.c:51 +process_one_work kernel/workqueue.c:2630 [inline] +process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703 +worker_thread+0x525/0x730 kernel/workqueue.c:2784 +kthread+0x1d7/0x210 kernel/kthread.c:388 +ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147 +ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 + +write to 0xffff88813ea41108 of 8 bytes by task 15867 on cpu 0: +xfrm_lookup_with_ifid+0xce7/0x12d0 net/xfrm/xfrm_policy.c:3218 +xfrm_lookup net/xfrm/xfrm_policy.c:3270 [inline] +xfrm_lookup_route+0x3b/0x100 net/xfrm/xfrm_policy.c:3281 +ip6_dst_lookup_flow+0x98/0xc0 net/ipv6/ip6_output.c:1246 +send6+0x241/0x3c0 drivers/net/wireguard/socket.c:139 +wg_socket_send_skb_to_peer+0xbd/0x130 drivers/net/wireguard/socket.c:178 +wg_socket_send_buffer_to_peer+0xd6/0x100 drivers/net/wireguard/socket.c:200 +wg_packet_send_handshake_initiation drivers/net/wireguard/send.c:40 [inline] +wg_packet_handshake_send_worker+0x10c/0x150 drivers/net/wireguard/send.c:51 +process_one_work kernel/workqueue.c:2630 [inline] +process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703 +worker_thread+0x525/0x730 kernel/workqueue.c:2784 +kthread+0x1d7/0x210 kernel/kthread.c:388 +ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147 +ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 + +value changed: 0x00000000651cd9d1 -> 0x00000000651cd9d2 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 0 PID: 15867 Comm: kworker/u4:58 Not tainted 6.6.0-rc4-syzkaller-00016-g5e62ed3b1c8a #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/06/2023 +Workqueue: wg-kex-wg2 wg_packet_handshake_send_worker + +Fixes: 0a9e5794b21e ("xfrm: annotate data-race around use_time") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Cc: Steffen Klassert +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman +--- + net/xfrm/xfrm_policy.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/xfrm/xfrm_policy.c ++++ b/net/xfrm/xfrm_policy.c +@@ -3138,7 +3138,7 @@ no_transform: + } + + for (i = 0; i < num_pols; i++) +- pols[i]->curlft.use_time = ktime_get_real_seconds(); ++ WRITE_ONCE(pols[i]->curlft.use_time, ktime_get_real_seconds()); + + if (num_xfrms < 0) { + /* Prohibit the flow */ diff --git a/queue-6.1/xfrm-interface-use-dev_stats_inc.patch b/queue-6.1/xfrm-interface-use-dev_stats_inc.patch new file mode 100644 index 00000000000..bf567d78064 --- /dev/null +++ b/queue-6.1/xfrm-interface-use-dev_stats_inc.patch @@ -0,0 +1,182 @@ +From f7c4e3e5d4f6609b4725a97451948ca2e425379a Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 5 Sep 2023 13:23:03 +0000 +Subject: xfrm: interface: use DEV_STATS_INC() + +From: Eric Dumazet + +commit f7c4e3e5d4f6609b4725a97451948ca2e425379a upstream. + +syzbot/KCSAN reported data-races in xfrm whenever dev->stats fields +are updated. + +It appears all of these updates can happen from multiple cpus. + +Adopt SMP safe DEV_STATS_INC() to update dev->stats fields. + +BUG: KCSAN: data-race in xfrmi_xmit / xfrmi_xmit + +read-write to 0xffff88813726b160 of 8 bytes by task 23986 on cpu 1: +xfrmi_xmit+0x74e/0xb20 net/xfrm/xfrm_interface_core.c:583 +__netdev_start_xmit include/linux/netdevice.h:4889 [inline] +netdev_start_xmit include/linux/netdevice.h:4903 [inline] +xmit_one net/core/dev.c:3544 [inline] +dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3560 +__dev_queue_xmit+0xeee/0x1de0 net/core/dev.c:4340 +dev_queue_xmit include/linux/netdevice.h:3082 [inline] +neigh_connected_output+0x231/0x2a0 net/core/neighbour.c:1581 +neigh_output include/net/neighbour.h:542 [inline] +ip_finish_output2+0x74a/0x850 net/ipv4/ip_output.c:230 +ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:318 +NF_HOOK_COND include/linux/netfilter.h:293 [inline] +ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:432 +dst_output include/net/dst.h:458 [inline] +ip_local_out net/ipv4/ip_output.c:127 [inline] +ip_send_skb+0x72/0xe0 net/ipv4/ip_output.c:1487 +udp_send_skb+0x6a4/0x990 net/ipv4/udp.c:963 +udp_sendmsg+0x1249/0x12d0 net/ipv4/udp.c:1246 +inet_sendmsg+0x63/0x80 net/ipv4/af_inet.c:840 +sock_sendmsg_nosec net/socket.c:730 [inline] +sock_sendmsg net/socket.c:753 [inline] +____sys_sendmsg+0x37c/0x4d0 net/socket.c:2540 +___sys_sendmsg net/socket.c:2594 [inline] +__sys_sendmmsg+0x269/0x500 net/socket.c:2680 +__do_sys_sendmmsg net/socket.c:2709 [inline] +__se_sys_sendmmsg net/socket.c:2706 [inline] +__x64_sys_sendmmsg+0x57/0x60 net/socket.c:2706 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +read-write to 0xffff88813726b160 of 8 bytes by task 23987 on cpu 0: +xfrmi_xmit+0x74e/0xb20 net/xfrm/xfrm_interface_core.c:583 +__netdev_start_xmit include/linux/netdevice.h:4889 [inline] +netdev_start_xmit include/linux/netdevice.h:4903 [inline] +xmit_one net/core/dev.c:3544 [inline] +dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3560 +__dev_queue_xmit+0xeee/0x1de0 net/core/dev.c:4340 +dev_queue_xmit include/linux/netdevice.h:3082 [inline] +neigh_connected_output+0x231/0x2a0 net/core/neighbour.c:1581 +neigh_output include/net/neighbour.h:542 [inline] +ip_finish_output2+0x74a/0x850 net/ipv4/ip_output.c:230 +ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:318 +NF_HOOK_COND include/linux/netfilter.h:293 [inline] +ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:432 +dst_output include/net/dst.h:458 [inline] +ip_local_out net/ipv4/ip_output.c:127 [inline] +ip_send_skb+0x72/0xe0 net/ipv4/ip_output.c:1487 +udp_send_skb+0x6a4/0x990 net/ipv4/udp.c:963 +udp_sendmsg+0x1249/0x12d0 net/ipv4/udp.c:1246 +inet_sendmsg+0x63/0x80 net/ipv4/af_inet.c:840 +sock_sendmsg_nosec net/socket.c:730 [inline] +sock_sendmsg net/socket.c:753 [inline] +____sys_sendmsg+0x37c/0x4d0 net/socket.c:2540 +___sys_sendmsg net/socket.c:2594 [inline] +__sys_sendmmsg+0x269/0x500 net/socket.c:2680 +__do_sys_sendmmsg net/socket.c:2709 [inline] +__se_sys_sendmmsg net/socket.c:2706 [inline] +__x64_sys_sendmmsg+0x57/0x60 net/socket.c:2706 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +value changed: 0x00000000000010d7 -> 0x00000000000010d8 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 0 PID: 23987 Comm: syz-executor.5 Not tainted 6.5.0-syzkaller-10885-g0468be89b3fa #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023 + +Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Cc: Steffen Klassert +Signed-off-by: Steffen Klassert +Signed-off-by: Greg Kroah-Hartman +--- + net/xfrm/xfrm_interface_core.c | 22 ++++++++++------------ + 1 file changed, 10 insertions(+), 12 deletions(-) + +--- a/net/xfrm/xfrm_interface_core.c ++++ b/net/xfrm/xfrm_interface_core.c +@@ -379,8 +379,8 @@ static int xfrmi_rcv_cb(struct sk_buff * + skb->dev = dev; + + if (err) { +- dev->stats.rx_errors++; +- dev->stats.rx_dropped++; ++ DEV_STATS_INC(dev, rx_errors); ++ DEV_STATS_INC(dev, rx_dropped); + + return 0; + } +@@ -425,7 +425,6 @@ static int + xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) + { + struct xfrm_if *xi = netdev_priv(dev); +- struct net_device_stats *stats = &xi->dev->stats; + struct dst_entry *dst = skb_dst(skb); + unsigned int length = skb->len; + struct net_device *tdev; +@@ -464,7 +463,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct + tdev = dst->dev; + + if (tdev == dev) { +- stats->collisions++; ++ DEV_STATS_INC(dev, collisions); + net_warn_ratelimited("%s: Local routing loop detected!\n", + dev->name); + goto tx_err_dst_release; +@@ -503,13 +502,13 @@ xmit: + if (net_xmit_eval(err) == 0) { + dev_sw_netstats_tx_add(dev, 1, length); + } else { +- stats->tx_errors++; +- stats->tx_aborted_errors++; ++ DEV_STATS_INC(dev, tx_errors); ++ DEV_STATS_INC(dev, tx_aborted_errors); + } + + return 0; + tx_err_link_failure: +- stats->tx_carrier_errors++; ++ DEV_STATS_INC(dev, tx_carrier_errors); + dst_link_failure(skb); + tx_err_dst_release: + dst_release(dst); +@@ -519,7 +518,6 @@ tx_err_dst_release: + static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) + { + struct xfrm_if *xi = netdev_priv(dev); +- struct net_device_stats *stats = &xi->dev->stats; + struct dst_entry *dst = skb_dst(skb); + struct flowi fl; + int ret; +@@ -536,7 +534,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_ + dst = ip6_route_output(dev_net(dev), NULL, &fl.u.ip6); + if (dst->error) { + dst_release(dst); +- stats->tx_carrier_errors++; ++ DEV_STATS_INC(dev, tx_carrier_errors); + goto tx_err; + } + skb_dst_set(skb, dst); +@@ -552,7 +550,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_ + fl.u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; + rt = __ip_route_output_key(dev_net(dev), &fl.u.ip4); + if (IS_ERR(rt)) { +- stats->tx_carrier_errors++; ++ DEV_STATS_INC(dev, tx_carrier_errors); + goto tx_err; + } + skb_dst_set(skb, &rt->dst); +@@ -571,8 +569,8 @@ static netdev_tx_t xfrmi_xmit(struct sk_ + return NETDEV_TX_OK; + + tx_err: +- stats->tx_errors++; +- stats->tx_dropped++; ++ DEV_STATS_INC(dev, tx_errors); ++ DEV_STATS_INC(dev, tx_dropped); + kfree_skb(skb); + return NETDEV_TX_OK; + }