]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.1-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 20 Oct 2023 20:53:35 +0000 (22:53 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 20 Oct 2023 20:53:35 +0000 (22:53 +0200)
added patches:
bonding-return-pointer-to-data-after-pull-on-skb.patch
i40e-prevent-crash-on-probe-if-hw-registers-have-invalid-values.patch
ipv4-fib-annotate-races-around-nh-nh_saddr_genid-and-nh-nh_saddr.patch
neighbor-tracing-move-pin6-inside-config_ipv6-y-section.patch
net-avoid-uaf-on-deleted-altname.patch
net-check-for-altname-conflicts-when-changing-netdev-s-netns.patch
net-dsa-bcm_sf2-fix-possible-memory-leak-in-bcm_sf2_mdio_register.patch
net-fix-ifname-in-netlink-ntf-during-netns-move.patch
net-ipv4-fix-return-value-check-in-esp_remove_trailer.patch
net-ipv6-fix-return-value-check-in-esp_remove_trailer.patch
net-phy-bcm7xxx-add-missing-16nm-ephy-statistics.patch
net-pktgen-fix-interface-flags-printing.patch
net-rfkill-gpio-prevent-value-glitch-during-probe.patch
net-sched-sch_hfsc-upgrade-rt-to-sc-when-it-becomes-a-inner-curve.patch
net-usb-smsc95xx-fix-an-error-code-in-smsc95xx_reset.patch
netfilter-nf_tables-do-not-remove-elements-if-set-backend-implements-.abort.patch
netfilter-nf_tables-revert-do-not-remove-elements-if-set-backend-implements-.abort.patch
netfilter-nft_set_rbtree-.deactivate-fails-if-element-has-expired.patch
netlink-correct-offload_xstats-size.patch
octeon_ep-update-bql-sent-bytes-before-ringing-doorbell.patch
selftests-netfilter-run-nft_audit.sh-in-its-own-netns.patch
selftests-openvswitch-catch-cases-where-the-tests-are-killed.patch
tcp-fix-excessive-tlp-and-rack-timeouts-from-hz-rounding.patch
tcp-fix-listen-warning-with-v4-mapped-v6-address.patch
tcp-tsq-relax-tcp_small_queue_check-when-rtx-queue-contains-a-single-skb.patch
tun-prevent-negative-ifindex.patch
wifi-cfg80211-use-system_unbound_wq-for-wiphy-work.patch
xfrm-fix-a-data-race-in-xfrm_gen_index.patch
xfrm-fix-a-data-race-in-xfrm_lookup_with_ifid.patch
xfrm-interface-use-dev_stats_inc.patch

31 files changed:
queue-6.1/bonding-return-pointer-to-data-after-pull-on-skb.patch [new file with mode: 0644]
queue-6.1/i40e-prevent-crash-on-probe-if-hw-registers-have-invalid-values.patch [new file with mode: 0644]
queue-6.1/ipv4-fib-annotate-races-around-nh-nh_saddr_genid-and-nh-nh_saddr.patch [new file with mode: 0644]
queue-6.1/neighbor-tracing-move-pin6-inside-config_ipv6-y-section.patch [new file with mode: 0644]
queue-6.1/net-avoid-uaf-on-deleted-altname.patch [new file with mode: 0644]
queue-6.1/net-check-for-altname-conflicts-when-changing-netdev-s-netns.patch [new file with mode: 0644]
queue-6.1/net-dsa-bcm_sf2-fix-possible-memory-leak-in-bcm_sf2_mdio_register.patch [new file with mode: 0644]
queue-6.1/net-fix-ifname-in-netlink-ntf-during-netns-move.patch [new file with mode: 0644]
queue-6.1/net-ipv4-fix-return-value-check-in-esp_remove_trailer.patch [new file with mode: 0644]
queue-6.1/net-ipv6-fix-return-value-check-in-esp_remove_trailer.patch [new file with mode: 0644]
queue-6.1/net-phy-bcm7xxx-add-missing-16nm-ephy-statistics.patch [new file with mode: 0644]
queue-6.1/net-pktgen-fix-interface-flags-printing.patch [new file with mode: 0644]
queue-6.1/net-rfkill-gpio-prevent-value-glitch-during-probe.patch [new file with mode: 0644]
queue-6.1/net-sched-sch_hfsc-upgrade-rt-to-sc-when-it-becomes-a-inner-curve.patch [new file with mode: 0644]
queue-6.1/net-usb-smsc95xx-fix-an-error-code-in-smsc95xx_reset.patch [new file with mode: 0644]
queue-6.1/netfilter-nf_tables-do-not-remove-elements-if-set-backend-implements-.abort.patch [new file with mode: 0644]
queue-6.1/netfilter-nf_tables-revert-do-not-remove-elements-if-set-backend-implements-.abort.patch [new file with mode: 0644]
queue-6.1/netfilter-nft_set_rbtree-.deactivate-fails-if-element-has-expired.patch [new file with mode: 0644]
queue-6.1/netlink-correct-offload_xstats-size.patch [new file with mode: 0644]
queue-6.1/octeon_ep-update-bql-sent-bytes-before-ringing-doorbell.patch [new file with mode: 0644]
queue-6.1/selftests-netfilter-run-nft_audit.sh-in-its-own-netns.patch [new file with mode: 0644]
queue-6.1/selftests-openvswitch-catch-cases-where-the-tests-are-killed.patch [new file with mode: 0644]
queue-6.1/series
queue-6.1/tcp-fix-excessive-tlp-and-rack-timeouts-from-hz-rounding.patch [new file with mode: 0644]
queue-6.1/tcp-fix-listen-warning-with-v4-mapped-v6-address.patch [new file with mode: 0644]
queue-6.1/tcp-tsq-relax-tcp_small_queue_check-when-rtx-queue-contains-a-single-skb.patch [new file with mode: 0644]
queue-6.1/tun-prevent-negative-ifindex.patch [new file with mode: 0644]
queue-6.1/wifi-cfg80211-use-system_unbound_wq-for-wiphy-work.patch [new file with mode: 0644]
queue-6.1/xfrm-fix-a-data-race-in-xfrm_gen_index.patch [new file with mode: 0644]
queue-6.1/xfrm-fix-a-data-race-in-xfrm_lookup_with_ifid.patch [new file with mode: 0644]
queue-6.1/xfrm-interface-use-dev_stats_inc.patch [new file with mode: 0644]

diff --git a/queue-6.1/bonding-return-pointer-to-data-after-pull-on-skb.patch b/queue-6.1/bonding-return-pointer-to-data-after-pull-on-skb.patch
new file mode 100644 (file)
index 0000000..45835c3
--- /dev/null
@@ -0,0 +1,37 @@
+From d93f3f992780af4a21e6c1ab86946b7c5602f1b9 Mon Sep 17 00:00:00 2001
+From: Jiri Wiesner <jwiesner@suse.de>
+Date: Tue, 10 Oct 2023 18:39:33 +0200
+Subject: bonding: Return pointer to data after pull on skb
+
+From: Jiri Wiesner <jwiesner@suse.de>
+
+commit d93f3f992780af4a21e6c1ab86946b7c5602f1b9 upstream.
+
+Since 429e3d123d9a ("bonding: Fix extraction of ports from the packet
+headers"), header offsets used to compute a hash in bond_xmit_hash() are
+relative to skb->data and not skb->head. If the tail of the header buffer
+of an skb really needs to be advanced and the operation is successful, the
+pointer to the data must be returned (and not a pointer to the head of the
+buffer).
+
+Fixes: 429e3d123d9a ("bonding: Fix extraction of ports from the packet headers")
+Signed-off-by: Jiri Wiesner <jwiesner@suse.de>
+Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -3990,7 +3990,7 @@ static inline const void *bond_pull_data
+       if (likely(n <= hlen))
+               return data;
+       else if (skb && likely(pskb_may_pull(skb, n)))
+-              return skb->head;
++              return skb->data;
+       return NULL;
+ }
diff --git a/queue-6.1/i40e-prevent-crash-on-probe-if-hw-registers-have-invalid-values.patch b/queue-6.1/i40e-prevent-crash-on-probe-if-hw-registers-have-invalid-values.patch
new file mode 100644 (file)
index 0000000..18094ad
--- /dev/null
@@ -0,0 +1,57 @@
+From fc6f716a5069180c40a8c9b63631e97da34f64a3 Mon Sep 17 00:00:00 2001
+From: Michal Schmidt <mschmidt@redhat.com>
+Date: Wed, 11 Oct 2023 16:33:32 -0700
+Subject: i40e: prevent crash on probe if hw registers have invalid values
+
+From: Michal Schmidt <mschmidt@redhat.com>
+
+commit fc6f716a5069180c40a8c9b63631e97da34f64a3 upstream.
+
+The hardware provides the indexes of the first and the last available
+queue and VF. From the indexes, the driver calculates the numbers of
+queues and VFs. In theory, a faulty device might say the last index is
+smaller than the first index. In that case, the driver's calculation
+would underflow, it would attempt to write to non-existent registers
+outside of the ioremapped range and crash.
+
+I ran into this not by having a faulty device, but by an operator error.
+I accidentally ran a QE test meant for i40e devices on an ice device.
+The test used 'echo i40e > /sys/...ice PCI device.../driver_override',
+bound the driver to the device and crashed in one of the wr32 calls in
+i40e_clear_hw.
+
+Add checks to prevent underflows in the calculations of num_queues and
+num_vfs. With this fix, the wrong device probing reports errors and
+returns a failure without crashing.
+
+Fixes: 838d41d92a90 ("i40e: clear all queues and interrupts")
+Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
+Link: https://lore.kernel.org/r/20231011233334.336092-2-jacob.e.keller@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_common.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
+@@ -1082,7 +1082,7 @@ void i40e_clear_hw(struct i40e_hw *hw)
+                    I40E_PFLAN_QALLOC_FIRSTQ_SHIFT;
+       j = (val & I40E_PFLAN_QALLOC_LASTQ_MASK) >>
+           I40E_PFLAN_QALLOC_LASTQ_SHIFT;
+-      if (val & I40E_PFLAN_QALLOC_VALID_MASK)
++      if (val & I40E_PFLAN_QALLOC_VALID_MASK && j >= base_queue)
+               num_queues = (j - base_queue) + 1;
+       else
+               num_queues = 0;
+@@ -1092,7 +1092,7 @@ void i40e_clear_hw(struct i40e_hw *hw)
+           I40E_PF_VT_PFALLOC_FIRSTVF_SHIFT;
+       j = (val & I40E_PF_VT_PFALLOC_LASTVF_MASK) >>
+           I40E_PF_VT_PFALLOC_LASTVF_SHIFT;
+-      if (val & I40E_PF_VT_PFALLOC_VALID_MASK)
++      if (val & I40E_PF_VT_PFALLOC_VALID_MASK && j >= i)
+               num_vfs = (j - i) + 1;
+       else
+               num_vfs = 0;
diff --git a/queue-6.1/ipv4-fib-annotate-races-around-nh-nh_saddr_genid-and-nh-nh_saddr.patch b/queue-6.1/ipv4-fib-annotate-races-around-nh-nh_saddr_genid-and-nh-nh_saddr.patch
new file mode 100644 (file)
index 0000000..dbc539e
--- /dev/null
@@ -0,0 +1,110 @@
+From 195374d893681da43a39796e53b30ac4f20400c4 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 17 Oct 2023 19:23:04 +0000
+Subject: ipv4: fib: annotate races around nh->nh_saddr_genid and nh->nh_saddr
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 195374d893681da43a39796e53b30ac4f20400c4 upstream.
+
+syzbot reported a data-race while accessing nh->nh_saddr_genid [1]
+
+Add annotations, but leave the code lazy as intended.
+
+[1]
+BUG: KCSAN: data-race in fib_select_path / fib_select_path
+
+write to 0xffff8881387166f0 of 4 bytes by task 6778 on cpu 1:
+fib_info_update_nhc_saddr net/ipv4/fib_semantics.c:1334 [inline]
+fib_result_prefsrc net/ipv4/fib_semantics.c:1354 [inline]
+fib_select_path+0x292/0x330 net/ipv4/fib_semantics.c:2269
+ip_route_output_key_hash_rcu+0x659/0x12c0 net/ipv4/route.c:2810
+ip_route_output_key_hash net/ipv4/route.c:2644 [inline]
+__ip_route_output_key include/net/route.h:134 [inline]
+ip_route_output_flow+0xa6/0x150 net/ipv4/route.c:2872
+send4+0x1f5/0x520 drivers/net/wireguard/socket.c:61
+wg_socket_send_skb_to_peer+0x94/0x130 drivers/net/wireguard/socket.c:175
+wg_socket_send_buffer_to_peer+0xd6/0x100 drivers/net/wireguard/socket.c:200
+wg_packet_send_handshake_initiation drivers/net/wireguard/send.c:40 [inline]
+wg_packet_handshake_send_worker+0x10c/0x150 drivers/net/wireguard/send.c:51
+process_one_work kernel/workqueue.c:2630 [inline]
+process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703
+worker_thread+0x525/0x730 kernel/workqueue.c:2784
+kthread+0x1d7/0x210 kernel/kthread.c:388
+ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147
+ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
+
+read to 0xffff8881387166f0 of 4 bytes by task 6759 on cpu 0:
+fib_result_prefsrc net/ipv4/fib_semantics.c:1350 [inline]
+fib_select_path+0x1cb/0x330 net/ipv4/fib_semantics.c:2269
+ip_route_output_key_hash_rcu+0x659/0x12c0 net/ipv4/route.c:2810
+ip_route_output_key_hash net/ipv4/route.c:2644 [inline]
+__ip_route_output_key include/net/route.h:134 [inline]
+ip_route_output_flow+0xa6/0x150 net/ipv4/route.c:2872
+send4+0x1f5/0x520 drivers/net/wireguard/socket.c:61
+wg_socket_send_skb_to_peer+0x94/0x130 drivers/net/wireguard/socket.c:175
+wg_socket_send_buffer_to_peer+0xd6/0x100 drivers/net/wireguard/socket.c:200
+wg_packet_send_handshake_initiation drivers/net/wireguard/send.c:40 [inline]
+wg_packet_handshake_send_worker+0x10c/0x150 drivers/net/wireguard/send.c:51
+process_one_work kernel/workqueue.c:2630 [inline]
+process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703
+worker_thread+0x525/0x730 kernel/workqueue.c:2784
+kthread+0x1d7/0x210 kernel/kthread.c:388
+ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147
+ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
+
+value changed: 0x959d3217 -> 0x959d3218
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 6759 Comm: kworker/u4:15 Not tainted 6.6.0-rc4-syzkaller-00029-gcbf3a2cb156a #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/06/2023
+Workqueue: wg-kex-wg1 wg_packet_handshake_send_worker
+
+Fixes: 436c3b66ec98 ("ipv4: Invalidate nexthop cache nh_saddr more correctly.")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20231017192304.82626-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_semantics.c |   14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -1325,15 +1325,18 @@ __be32 fib_info_update_nhc_saddr(struct
+                                unsigned char scope)
+ {
+       struct fib_nh *nh;
++      __be32 saddr;
+       if (nhc->nhc_family != AF_INET)
+               return inet_select_addr(nhc->nhc_dev, 0, scope);
+       nh = container_of(nhc, struct fib_nh, nh_common);
+-      nh->nh_saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope);
+-      nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
++      saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope);
+-      return nh->nh_saddr;
++      WRITE_ONCE(nh->nh_saddr, saddr);
++      WRITE_ONCE(nh->nh_saddr_genid, atomic_read(&net->ipv4.dev_addr_genid));
++
++      return saddr;
+ }
+ __be32 fib_result_prefsrc(struct net *net, struct fib_result *res)
+@@ -1347,8 +1350,9 @@ __be32 fib_result_prefsrc(struct net *ne
+               struct fib_nh *nh;
+               nh = container_of(nhc, struct fib_nh, nh_common);
+-              if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid))
+-                      return nh->nh_saddr;
++              if (READ_ONCE(nh->nh_saddr_genid) ==
++                  atomic_read(&net->ipv4.dev_addr_genid))
++                      return READ_ONCE(nh->nh_saddr);
+       }
+       return fib_info_update_nhc_saddr(net, nhc, res->fi->fib_scope);
diff --git a/queue-6.1/neighbor-tracing-move-pin6-inside-config_ipv6-y-section.patch b/queue-6.1/neighbor-tracing-move-pin6-inside-config_ipv6-y-section.patch
new file mode 100644 (file)
index 0000000..da07a78
--- /dev/null
@@ -0,0 +1,99 @@
+From 2915240eddba96b37de4c7e9a3d0ac6f9548454b Mon Sep 17 00:00:00 2001
+From: Geert Uytterhoeven <geert+renesas@glider.be>
+Date: Mon, 16 Oct 2023 14:49:04 +0200
+Subject: neighbor: tracing: Move pin6 inside CONFIG_IPV6=y section
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Geert Uytterhoeven <geert+renesas@glider.be>
+
+commit 2915240eddba96b37de4c7e9a3d0ac6f9548454b upstream.
+
+When CONFIG_IPV6=n, and building with W=1:
+
+    In file included from include/trace/define_trace.h:102,
+                    from include/trace/events/neigh.h:255,
+                    from net/core/net-traces.c:51:
+    include/trace/events/neigh.h: In function ‘trace_event_raw_event_neigh_create’:
+    include/trace/events/neigh.h:42:34: error: variable ‘pin6’ set but not used [-Werror=unused-but-set-variable]
+       42 |                 struct in6_addr *pin6;
+         |                                  ^~~~
+    include/trace/trace_events.h:402:11: note: in definition of macro ‘DECLARE_EVENT_CLASS’
+      402 |         { assign; }                                                     \
+         |           ^~~~~~
+    include/trace/trace_events.h:44:30: note: in expansion of macro ‘PARAMS’
+       44 |                              PARAMS(assign),                   \
+         |                              ^~~~~~
+    include/trace/events/neigh.h:23:1: note: in expansion of macro ‘TRACE_EVENT’
+       23 | TRACE_EVENT(neigh_create,
+         | ^~~~~~~~~~~
+    include/trace/events/neigh.h:41:9: note: in expansion of macro ‘TP_fast_assign’
+       41 |         TP_fast_assign(
+         |         ^~~~~~~~~~~~~~
+    In file included from include/trace/define_trace.h:103,
+                    from include/trace/events/neigh.h:255,
+                    from net/core/net-traces.c:51:
+    include/trace/events/neigh.h: In function ‘perf_trace_neigh_create’:
+    include/trace/events/neigh.h:42:34: error: variable ‘pin6’ set but not used [-Werror=unused-but-set-variable]
+       42 |                 struct in6_addr *pin6;
+         |                                  ^~~~
+    include/trace/perf.h:51:11: note: in definition of macro ‘DECLARE_EVENT_CLASS’
+       51 |         { assign; }                                                     \
+         |           ^~~~~~
+    include/trace/trace_events.h:44:30: note: in expansion of macro ‘PARAMS’
+       44 |                              PARAMS(assign),                   \
+         |                              ^~~~~~
+    include/trace/events/neigh.h:23:1: note: in expansion of macro ‘TRACE_EVENT’
+       23 | TRACE_EVENT(neigh_create,
+         | ^~~~~~~~~~~
+    include/trace/events/neigh.h:41:9: note: in expansion of macro ‘TP_fast_assign’
+       41 |         TP_fast_assign(
+         |         ^~~~~~~~~~~~~~
+
+Indeed, the variable pin6 is declared and initialized unconditionally,
+while it is only used and needlessly re-initialized when support for
+IPv6 is enabled.
+
+Fix this by dropping the unused variable initialization, and moving the
+variable declaration inside the existing section protected by a check
+for CONFIG_IPV6.
+
+Fixes: fc651001d2c5ca4f ("neighbor: Add tracepoint to __neigh_create")
+Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Simon Horman <horms@kernel.org> # build-tested
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/trace/events/neigh.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/include/trace/events/neigh.h
++++ b/include/trace/events/neigh.h
+@@ -39,7 +39,6 @@ TRACE_EVENT(neigh_create,
+       ),
+       TP_fast_assign(
+-              struct in6_addr *pin6;
+               __be32 *p32;
+               __entry->family = tbl->family;
+@@ -47,7 +46,6 @@ TRACE_EVENT(neigh_create,
+               __entry->entries = atomic_read(&tbl->gc_entries);
+               __entry->created = n != NULL;
+               __entry->gc_exempt = exempt_from_gc;
+-              pin6 = (struct in6_addr *)__entry->primary_key6;
+               p32 = (__be32 *)__entry->primary_key4;
+               if (tbl->family == AF_INET)
+@@ -57,6 +55,8 @@ TRACE_EVENT(neigh_create,
+ #if IS_ENABLED(CONFIG_IPV6)
+               if (tbl->family == AF_INET6) {
++                      struct in6_addr *pin6;
++
+                       pin6 = (struct in6_addr *)__entry->primary_key6;
+                       *pin6 = *(struct in6_addr *)pkey;
+               }
diff --git a/queue-6.1/net-avoid-uaf-on-deleted-altname.patch b/queue-6.1/net-avoid-uaf-on-deleted-altname.patch
new file mode 100644 (file)
index 0000000..56eaff9
--- /dev/null
@@ -0,0 +1,65 @@
+From 1a83f4a7c156fa6bbd6b530e89fa3270bf3d9d1b Mon Sep 17 00:00:00 2001
+From: Jakub Kicinski <kuba@kernel.org>
+Date: Tue, 17 Oct 2023 18:38:15 -0700
+Subject: net: avoid UAF on deleted altname
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+commit 1a83f4a7c156fa6bbd6b530e89fa3270bf3d9d1b upstream.
+
+Altnames are accessed under RCU (dev_get_by_name_rcu())
+but freed by kfree() with no synchronization point.
+
+Each node has one or two allocations (node and a variable-size
+name, sometimes the name is netdev->name). Adding rcu_heads
+here is a bit tedious. Besides most code which unlists the names
+already has rcu barriers - so take the simpler approach of adding
+synchronize_rcu(). Note that the one on the unregistration path
+(which matters more) is removed by the next fix.
+
+Fixes: ff92741270bf ("net: introduce name_node struct to be used in hashlist")
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -345,7 +345,6 @@ int netdev_name_node_alt_create(struct n
+ static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
+ {
+       list_del(&name_node->list);
+-      netdev_name_node_del(name_node);
+       kfree(name_node->name);
+       netdev_name_node_free(name_node);
+ }
+@@ -364,6 +363,8 @@ int netdev_name_node_alt_destroy(struct
+       if (name_node == dev->name_node || name_node->dev != dev)
+               return -EINVAL;
++      netdev_name_node_del(name_node);
++      synchronize_rcu();
+       __netdev_name_node_alt_destroy(name_node);
+       return 0;
+@@ -10835,6 +10836,7 @@ void unregister_netdevice_many(struct li
+       synchronize_net();
+       list_for_each_entry(dev, head, unreg_list) {
++              struct netdev_name_node *name_node;
+               struct sk_buff *skb = NULL;
+               /* Shutdown queueing discipline. */
+@@ -10860,6 +10862,9 @@ void unregister_netdevice_many(struct li
+               dev_uc_flush(dev);
+               dev_mc_flush(dev);
++              netdev_for_each_altname(dev, name_node)
++                      netdev_name_node_del(name_node);
++              synchronize_rcu();
+               netdev_name_node_alt_flush(dev);
+               netdev_name_node_free(dev->name_node);
diff --git a/queue-6.1/net-check-for-altname-conflicts-when-changing-netdev-s-netns.patch b/queue-6.1/net-check-for-altname-conflicts-when-changing-netdev-s-netns.patch
new file mode 100644 (file)
index 0000000..3f40def
--- /dev/null
@@ -0,0 +1,87 @@
+From 7663d522099ecc464512164e660bc771b2ff7b64 Mon Sep 17 00:00:00 2001
+From: Jakub Kicinski <kuba@kernel.org>
+Date: Tue, 17 Oct 2023 18:38:14 -0700
+Subject: net: check for altname conflicts when changing netdev's netns
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+commit 7663d522099ecc464512164e660bc771b2ff7b64 upstream.
+
+It's currently possible to create an altname conflicting
+with an altname or real name of another device by creating
+it in another netns and moving it over:
+
+ [ ~]$ ip link add dev eth0 type dummy
+
+ [ ~]$ ip netns add test
+ [ ~]$ ip -netns test link add dev ethX netns test type dummy
+ [ ~]$ ip -netns test link property add dev ethX altname eth0
+ [ ~]$ ip -netns test link set dev ethX netns 1
+
+ [ ~]$ ip link
+ ...
+ 3: eth0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
+     link/ether 02:40:88:62:ec:b8 brd ff:ff:ff:ff:ff:ff
+ ...
+ 5: ethX: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
+     link/ether 26:b7:28:78:38:0f brd ff:ff:ff:ff:ff:ff
+     altname eth0
+
+Create a macro for walking the altnames, this hopefully makes
+it clearer that the list we walk contains only altnames.
+Which is otherwise not entirely intuitive.
+
+Fixes: 36fbf1e52bd3 ("net: rtnetlink: add linkprop commands to add and delete alternative ifnames")
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    9 ++++++++-
+ net/core/dev.h |    3 +++
+ 2 files changed, 11 insertions(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -1054,7 +1054,8 @@ static int __dev_alloc_name(struct net *
+               for_each_netdev(net, d) {
+                       struct netdev_name_node *name_node;
+-                      list_for_each_entry(name_node, &d->name_node->list, list) {
++
++                      netdev_for_each_altname(d, name_node) {
+                               if (!sscanf(name_node->name, name, &i))
+                                       continue;
+                               if (i < 0 || i >= max_netdevices)
+@@ -10949,6 +10950,7 @@ EXPORT_SYMBOL(unregister_netdev);
+ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
+                              const char *pat, int new_ifindex)
+ {
++      struct netdev_name_node *name_node;
+       struct net *net_old = dev_net(dev);
+       char new_name[IFNAMSIZ] = {};
+       int err, new_nsid;
+@@ -10981,6 +10983,11 @@ int __dev_change_net_namespace(struct ne
+               if (err < 0)
+                       goto out;
+       }
++      /* Check that none of the altnames conflicts. */
++      err = -EEXIST;
++      netdev_for_each_altname(dev, name_node)
++              if (netdev_name_in_use(net, name_node->name))
++                      goto out;
+       /* Check that new_ifindex isn't used yet. */
+       err = -EBUSY;
+--- a/net/core/dev.h
++++ b/net/core/dev.h
+@@ -61,6 +61,9 @@ struct netdev_name_node {
+ int netdev_get_name(struct net *net, char *name, int ifindex);
+ int dev_change_name(struct net_device *dev, const char *newname);
++#define netdev_for_each_altname(dev, namenode)                                \
++      list_for_each_entry((namenode), &(dev)->name_node->list, list)
++
+ int netdev_name_node_alt_create(struct net_device *dev, const char *name);
+ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name);
diff --git a/queue-6.1/net-dsa-bcm_sf2-fix-possible-memory-leak-in-bcm_sf2_mdio_register.patch b/queue-6.1/net-dsa-bcm_sf2-fix-possible-memory-leak-in-bcm_sf2_mdio_register.patch
new file mode 100644 (file)
index 0000000..8d4f71d
--- /dev/null
@@ -0,0 +1,91 @@
+From 61b40cefe51af005c72dbdcf975a3d166c6e6406 Mon Sep 17 00:00:00 2001
+From: Jinjie Ruan <ruanjinjie@huawei.com>
+Date: Wed, 11 Oct 2023 11:24:19 +0800
+Subject: net: dsa: bcm_sf2: Fix possible memory leak in bcm_sf2_mdio_register()
+
+From: Jinjie Ruan <ruanjinjie@huawei.com>
+
+commit 61b40cefe51af005c72dbdcf975a3d166c6e6406 upstream.
+
+In bcm_sf2_mdio_register(), the class_find_device() will call get_device()
+to increment reference count for priv->master_mii_bus->dev if
+of_mdio_find_bus() succeeds. If mdiobus_alloc() or mdiobus_register()
+fails, it will call get_device() twice without decrement reference count
+for the device. And it is the same if bcm_sf2_mdio_register() succeeds but
+fails in bcm_sf2_sw_probe(), or if bcm_sf2_sw_probe() succeeds. If the
+reference count has not decremented to zero, the dev related resource will
+not be freed.
+
+So remove the get_device() in bcm_sf2_mdio_register(), and call
+put_device() if mdiobus_alloc() or mdiobus_register() fails and in
+bcm_sf2_mdio_unregister() to solve the issue.
+
+And as Simon suggested, unwind from errors for bcm_sf2_mdio_register() and
+just return 0 if it succeeds to make it cleaner.
+
+Fixes: 461cd1b03e32 ("net: dsa: bcm_sf2: Register our slave MDIO bus")
+Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
+Suggested-by: Simon Horman <horms@kernel.org>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Link: https://lore.kernel.org/r/20231011032419.2423290-1-ruanjinjie@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2.c |   24 +++++++++++++++---------
+ 1 file changed, 15 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/dsa/bcm_sf2.c
++++ b/drivers/net/dsa/bcm_sf2.c
+@@ -617,17 +617,16 @@ static int bcm_sf2_mdio_register(struct
+       dn = of_find_compatible_node(NULL, NULL, "brcm,unimac-mdio");
+       priv->master_mii_bus = of_mdio_find_bus(dn);
+       if (!priv->master_mii_bus) {
+-              of_node_put(dn);
+-              return -EPROBE_DEFER;
++              err = -EPROBE_DEFER;
++              goto err_of_node_put;
+       }
+-      get_device(&priv->master_mii_bus->dev);
+       priv->master_mii_dn = dn;
+       priv->slave_mii_bus = mdiobus_alloc();
+       if (!priv->slave_mii_bus) {
+-              of_node_put(dn);
+-              return -ENOMEM;
++              err = -ENOMEM;
++              goto err_put_master_mii_bus_dev;
+       }
+       priv->slave_mii_bus->priv = priv;
+@@ -684,11 +683,17 @@ static int bcm_sf2_mdio_register(struct
+       }
+       err = mdiobus_register(priv->slave_mii_bus);
+-      if (err && dn) {
+-              mdiobus_free(priv->slave_mii_bus);
+-              of_node_put(dn);
+-      }
++      if (err && dn)
++              goto err_free_slave_mii_bus;
++
++      return 0;
++err_free_slave_mii_bus:
++      mdiobus_free(priv->slave_mii_bus);
++err_put_master_mii_bus_dev:
++      put_device(&priv->master_mii_bus->dev);
++err_of_node_put:
++      of_node_put(dn);
+       return err;
+ }
+@@ -696,6 +701,7 @@ static void bcm_sf2_mdio_unregister(stru
+ {
+       mdiobus_unregister(priv->slave_mii_bus);
+       mdiobus_free(priv->slave_mii_bus);
++      put_device(&priv->master_mii_bus->dev);
+       of_node_put(priv->master_mii_dn);
+ }
diff --git a/queue-6.1/net-fix-ifname-in-netlink-ntf-during-netns-move.patch b/queue-6.1/net-fix-ifname-in-netlink-ntf-during-netns-move.patch
new file mode 100644 (file)
index 0000000..e85b003
--- /dev/null
@@ -0,0 +1,124 @@
+From 311cca40661f428b7aa114fb5af578cfdbe3e8b6 Mon Sep 17 00:00:00 2001
+From: Jakub Kicinski <kuba@kernel.org>
+Date: Tue, 17 Oct 2023 18:38:13 -0700
+Subject: net: fix ifname in netlink ntf during netns move
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+commit 311cca40661f428b7aa114fb5af578cfdbe3e8b6 upstream.
+
+dev_get_valid_name() overwrites the netdev's name on success.
+This makes it hard to use in prepare-commit-like fashion,
+where we do validation first, and "commit" to the change
+later.
+
+Factor out a helper which lets us save the new name to a buffer.
+Use it to fix the problem of notification on netns move having
+incorrect name:
+
+ 5: eth0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
+     link/ether be:4d:58:f9:d5:40 brd ff:ff:ff:ff:ff:ff
+ 6: eth1: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
+     link/ether 1e:4a:34:36:e3:cd brd ff:ff:ff:ff:ff:ff
+
+ [ ~]# ip link set dev eth0 netns 1 name eth1
+
+ip monitor inside netns:
+ Deleted inet eth0
+ Deleted inet6 eth0
+ Deleted 5: eth1: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
+     link/ether be:4d:58:f9:d5:40 brd ff:ff:ff:ff:ff:ff new-netnsid 0 new-ifindex 7
+
+Name is reported as eth1 in old netns for ifindex 5, already renamed.
+
+Fixes: d90310243fd7 ("net: device name allocation cleanups")
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |   44 +++++++++++++++++++++++++++++++-------------
+ 1 file changed, 31 insertions(+), 13 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -1091,6 +1091,26 @@ static int __dev_alloc_name(struct net *
+       return -ENFILE;
+ }
++static int dev_prep_valid_name(struct net *net, struct net_device *dev,
++                             const char *want_name, char *out_name)
++{
++      int ret;
++
++      if (!dev_valid_name(want_name))
++              return -EINVAL;
++
++      if (strchr(want_name, '%')) {
++              ret = __dev_alloc_name(net, want_name, out_name);
++              return ret < 0 ? ret : 0;
++      } else if (netdev_name_in_use(net, want_name)) {
++              return -EEXIST;
++      } else if (out_name != want_name) {
++              strscpy(out_name, want_name, IFNAMSIZ);
++      }
++
++      return 0;
++}
++
+ static int dev_alloc_name_ns(struct net *net,
+                            struct net_device *dev,
+                            const char *name)
+@@ -1128,19 +1148,13 @@ EXPORT_SYMBOL(dev_alloc_name);
+ static int dev_get_valid_name(struct net *net, struct net_device *dev,
+                             const char *name)
+ {
+-      BUG_ON(!net);
+-
+-      if (!dev_valid_name(name))
+-              return -EINVAL;
+-
+-      if (strchr(name, '%'))
+-              return dev_alloc_name_ns(net, dev, name);
+-      else if (netdev_name_in_use(net, name))
+-              return -EEXIST;
+-      else if (dev->name != name)
+-              strscpy(dev->name, name, IFNAMSIZ);
++      char buf[IFNAMSIZ];
++      int ret;
+-      return 0;
++      ret = dev_prep_valid_name(net, dev, name, buf);
++      if (ret >= 0)
++              strscpy(dev->name, buf, IFNAMSIZ);
++      return ret;
+ }
+ /**
+@@ -10936,6 +10950,7 @@ int __dev_change_net_namespace(struct ne
+                              const char *pat, int new_ifindex)
+ {
+       struct net *net_old = dev_net(dev);
++      char new_name[IFNAMSIZ] = {};
+       int err, new_nsid;
+       ASSERT_RTNL();
+@@ -10962,7 +10977,7 @@ int __dev_change_net_namespace(struct ne
+               /* We get here if we can't use the current device name */
+               if (!pat)
+                       goto out;
+-              err = dev_get_valid_name(net, dev, pat);
++              err = dev_prep_valid_name(net, dev, pat, new_name);
+               if (err < 0)
+                       goto out;
+       }
+@@ -11030,6 +11045,9 @@ int __dev_change_net_namespace(struct ne
+       kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
+       netdev_adjacent_add_links(dev);
++      if (new_name[0]) /* Rename the netdev to prepared name */
++              strscpy(dev->name, new_name, IFNAMSIZ);
++
+       /* Fixup kobjects */
+       err = device_rename(&dev->dev, dev->name);
+       WARN_ON(err);
diff --git a/queue-6.1/net-ipv4-fix-return-value-check-in-esp_remove_trailer.patch b/queue-6.1/net-ipv4-fix-return-value-check-in-esp_remove_trailer.patch
new file mode 100644 (file)
index 0000000..3d328d3
--- /dev/null
@@ -0,0 +1,32 @@
+From 513f61e2193350c7a345da98559b80f61aec4fa6 Mon Sep 17 00:00:00 2001
+From: Ma Ke <make_ruc2021@163.com>
+Date: Mon, 9 Oct 2023 09:13:37 +0800
+Subject: net: ipv4: fix return value check in esp_remove_trailer
+
+From: Ma Ke <make_ruc2021@163.com>
+
+commit 513f61e2193350c7a345da98559b80f61aec4fa6 upstream.
+
+In esp_remove_trailer(), to avoid an unexpected result returned by
+pskb_trim, we should check the return value of pskb_trim().
+
+Signed-off-by: Ma Ke <make_ruc2021@163.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/esp4.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/esp4.c
++++ b/net/ipv4/esp4.c
+@@ -732,7 +732,9 @@ static inline int esp_remove_trailer(str
+               skb->csum = csum_block_sub(skb->csum, csumdiff,
+                                          skb->len - trimlen);
+       }
+-      pskb_trim(skb, skb->len - trimlen);
++      ret = pskb_trim(skb, skb->len - trimlen);
++      if (unlikely(ret))
++              return ret;
+       ret = nexthdr[1];
diff --git a/queue-6.1/net-ipv6-fix-return-value-check-in-esp_remove_trailer.patch b/queue-6.1/net-ipv6-fix-return-value-check-in-esp_remove_trailer.patch
new file mode 100644 (file)
index 0000000..928f0fa
--- /dev/null
@@ -0,0 +1,32 @@
+From dad4e491e30b20f4dc615c9da65d2142d703b5c2 Mon Sep 17 00:00:00 2001
+From: Ma Ke <make_ruc2021@163.com>
+Date: Sat, 7 Oct 2023 08:59:53 +0800
+Subject: net: ipv6: fix return value check in esp_remove_trailer
+
+From: Ma Ke <make_ruc2021@163.com>
+
+commit dad4e491e30b20f4dc615c9da65d2142d703b5c2 upstream.
+
+In esp_remove_trailer(), to avoid an unexpected result returned by
+pskb_trim, we should check the return value of pskb_trim().
+
+Signed-off-by: Ma Ke <make_ruc2021@163.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/esp6.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/ipv6/esp6.c
++++ b/net/ipv6/esp6.c
+@@ -770,7 +770,9 @@ static inline int esp_remove_trailer(str
+               skb->csum = csum_block_sub(skb->csum, csumdiff,
+                                          skb->len - trimlen);
+       }
+-      pskb_trim(skb, skb->len - trimlen);
++      ret = pskb_trim(skb, skb->len - trimlen);
++      if (unlikely(ret))
++              return ret;
+       ret = nexthdr[1];
diff --git a/queue-6.1/net-phy-bcm7xxx-add-missing-16nm-ephy-statistics.patch b/queue-6.1/net-phy-bcm7xxx-add-missing-16nm-ephy-statistics.patch
new file mode 100644 (file)
index 0000000..c585c7a
--- /dev/null
@@ -0,0 +1,37 @@
+From 6200e00e112ce2d17b066a20dd2476d9aecbefa6 Mon Sep 17 00:00:00 2001
+From: Florian Fainelli <florian.fainelli@broadcom.com>
+Date: Tue, 17 Oct 2023 13:51:19 -0700
+Subject: net: phy: bcm7xxx: Add missing 16nm EPHY statistics
+
+From: Florian Fainelli <florian.fainelli@broadcom.com>
+
+commit 6200e00e112ce2d17b066a20dd2476d9aecbefa6 upstream.
+
+The .probe() function would allocate the necessary space and ensure that
+the library call sizes the number of statistics but the callbacks
+necessary to fetch the name and values were not wired up.
+
+Reported-by: Justin Chen <justin.chen@broadcom.com>
+Fixes: f68d08c437f9 ("net: phy: bcm7xxx: Add EPHY entry for 72165")
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20231017205119.416392-1-florian.fainelli@broadcom.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/bcm7xxx.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/phy/bcm7xxx.c
++++ b/drivers/net/phy/bcm7xxx.c
+@@ -907,6 +907,9 @@ static void bcm7xxx_28nm_remove(struct p
+       .name           = _name,                                        \
+       /* PHY_BASIC_FEATURES */                                        \
+       .flags          = PHY_IS_INTERNAL,                              \
++      .get_sset_count = bcm_phy_get_sset_count,                       \
++      .get_strings    = bcm_phy_get_strings,                          \
++      .get_stats      = bcm7xxx_28nm_get_phy_stats,                   \
+       .probe          = bcm7xxx_28nm_probe,                           \
+       .remove         = bcm7xxx_28nm_remove,                          \
+       .config_init    = bcm7xxx_16nm_ephy_config_init,                \
diff --git a/queue-6.1/net-pktgen-fix-interface-flags-printing.patch b/queue-6.1/net-pktgen-fix-interface-flags-printing.patch
new file mode 100644 (file)
index 0000000..1f33747
--- /dev/null
@@ -0,0 +1,60 @@
+From 1d30162f35c7a73fc2f8cdcdcdbd690bedb99d1a Mon Sep 17 00:00:00 2001
+From: Gavrilov Ilia <Ilia.Gavrilov@infotecs.ru>
+Date: Mon, 16 Oct 2023 14:08:59 +0000
+Subject: net: pktgen: Fix interface flags printing
+
+From: Gavrilov Ilia <Ilia.Gavrilov@infotecs.ru>
+
+commit 1d30162f35c7a73fc2f8cdcdcdbd690bedb99d1a upstream.
+
+Device flags are displayed incorrectly:
+1) The comparison (i == F_FLOW_SEQ) is always false, because F_FLOW_SEQ
+is equal to (1 << FLOW_SEQ_SHIFT) == 2048, and the maximum value
+of the 'i' variable is (NR_PKT_FLAG - 1) == 17. It should be compared
+with FLOW_SEQ_SHIFT.
+
+2) Similarly to the F_IPSEC flag.
+
+3) Also add spaces to the print end of the string literal "spi:%u"
+to prevent the output from merging with the flag that follows.
+
+Found by InfoTeCS on behalf of Linux Verification Center
+(linuxtesting.org) with SVACE.
+
+Fixes: 99c6d3d20d62 ("pktgen: Remove brute-force printing of flags")
+Signed-off-by: Gavrilov Ilia <Ilia.Gavrilov@infotecs.ru>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/pktgen.c |   14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/net/core/pktgen.c
++++ b/net/core/pktgen.c
+@@ -669,19 +669,19 @@ static int pktgen_if_show(struct seq_fil
+       seq_puts(seq, "     Flags: ");
+       for (i = 0; i < NR_PKT_FLAGS; i++) {
+-              if (i == F_FLOW_SEQ)
++              if (i == FLOW_SEQ_SHIFT)
+                       if (!pkt_dev->cflows)
+                               continue;
+-              if (pkt_dev->flags & (1 << i))
++              if (pkt_dev->flags & (1 << i)) {
+                       seq_printf(seq, "%s  ", pkt_flag_names[i]);
+-              else if (i == F_FLOW_SEQ)
+-                      seq_puts(seq, "FLOW_RND  ");
+-
+ #ifdef CONFIG_XFRM
+-              if (i == F_IPSEC && pkt_dev->spi)
+-                      seq_printf(seq, "spi:%u", pkt_dev->spi);
++                      if (i == IPSEC_SHIFT && pkt_dev->spi)
++                              seq_printf(seq, "spi:%u  ", pkt_dev->spi);
+ #endif
++              } else if (i == FLOW_SEQ_SHIFT) {
++                      seq_puts(seq, "FLOW_RND  ");
++              }
+       }
+       seq_puts(seq, "\n");
diff --git a/queue-6.1/net-rfkill-gpio-prevent-value-glitch-during-probe.patch b/queue-6.1/net-rfkill-gpio-prevent-value-glitch-during-probe.patch
new file mode 100644 (file)
index 0000000..dd66d07
--- /dev/null
@@ -0,0 +1,56 @@
+From b2f750c3a80b285cd60c9346f8c96bd0a2a66cde Mon Sep 17 00:00:00 2001
+From: Josua Mayer <josua@solid-run.com>
+Date: Wed, 4 Oct 2023 18:39:28 +0200
+Subject: net: rfkill: gpio: prevent value glitch during probe
+
+From: Josua Mayer <josua@solid-run.com>
+
+commit b2f750c3a80b285cd60c9346f8c96bd0a2a66cde upstream.
+
+When either reset- or shutdown-gpio have are initially deasserted,
+e.g. after a reboot - or when the hardware does not include pull-down,
+there will be a short toggle of both IOs to logical 0 and back to 1.
+
+It seems that the rfkill default is unblocked, so the driver should not
+glitch to output low during probe.
+It can lead e.g. to unexpected lte modem reconnect:
+
+[1] root@localhost:~# dmesg | grep "usb 2-1"
+[    2.136124] usb 2-1: new SuperSpeed USB device number 2 using xhci-hcd
+[   21.215278] usb 2-1: USB disconnect, device number 2
+[   28.833977] usb 2-1: new SuperSpeed USB device number 3 using xhci-hcd
+
+The glitch has been discovered on an arm64 board, now that device-tree
+support for the rfkill-gpio driver has finally appeared :).
+
+Change the flags for devm_gpiod_get_optional from GPIOD_OUT_LOW to
+GPIOD_ASIS to avoid any glitches.
+The rfkill driver will set the intended value during rfkill_sync_work.
+
+Fixes: 7176ba23f8b5 ("net: rfkill: add generic gpio rfkill driver")
+Signed-off-by: Josua Mayer <josua@solid-run.com>
+Link: https://lore.kernel.org/r/20231004163928.14609-1-josua@solid-run.com
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rfkill/rfkill-gpio.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/rfkill/rfkill-gpio.c
++++ b/net/rfkill/rfkill-gpio.c
+@@ -98,13 +98,13 @@ static int rfkill_gpio_probe(struct plat
+       rfkill->clk = devm_clk_get(&pdev->dev, NULL);
+-      gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_OUT_LOW);
++      gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_ASIS);
+       if (IS_ERR(gpio))
+               return PTR_ERR(gpio);
+       rfkill->reset_gpio = gpio;
+-      gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_OUT_LOW);
++      gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_ASIS);
+       if (IS_ERR(gpio))
+               return PTR_ERR(gpio);
diff --git a/queue-6.1/net-sched-sch_hfsc-upgrade-rt-to-sc-when-it-becomes-a-inner-curve.patch b/queue-6.1/net-sched-sch_hfsc-upgrade-rt-to-sc-when-it-becomes-a-inner-curve.patch
new file mode 100644 (file)
index 0000000..989a671
--- /dev/null
@@ -0,0 +1,90 @@
+From a13b67c9a015c4e21601ef9aa4ec9c5d972df1b4 Mon Sep 17 00:00:00 2001
+From: Pedro Tammela <pctammela@mojatatu.com>
+Date: Tue, 17 Oct 2023 11:36:02 -0300
+Subject: net/sched: sch_hfsc: upgrade 'rt' to 'sc' when it becomes a inner curve
+
+From: Pedro Tammela <pctammela@mojatatu.com>
+
+commit a13b67c9a015c4e21601ef9aa4ec9c5d972df1b4 upstream.
+
+Christian Theune says:
+   I upgraded from 6.1.38 to 6.1.55 this morning and it broke my traffic shaping script,
+   leaving me with a non-functional uplink on a remote router.
+
+A 'rt' curve cannot be used as a inner curve (parent class), but we were
+allowing such configurations since the qdisc was introduced. Such
+configurations would trigger a UAF as Budimir explains:
+   The parent will have vttree_insert() called on it in init_vf(),
+   but will not have vttree_remove() called on it in update_vf()
+   because it does not have the HFSC_FSC flag set.
+
+The qdisc always assumes that inner classes have the HFSC_FSC flag set.
+This is by design as it doesn't make sense 'qdisc wise' for an 'rt'
+curve to be an inner curve.
+
+Budimir's original patch disallows users to add classes with a 'rt'
+parent, but this is too strict as it breaks users that have been using
+'rt' as a inner class. Another approach, taken by this patch, is to
+upgrade the inner 'rt' into a 'sc', warning the user in the process.
+It avoids the UAF reported by Budimir while also being more permissive
+to bad scripts/users/code using 'rt' as a inner class.
+
+Users checking the `tc class ls [...]` or `tc class get [...]` dumps would
+observe the curve change and are potentially breaking with this change.
+
+v1->v2: https://lore.kernel.org/all/20231013151057.2611860-1-pctammela@mojatatu.com/
+- Correct 'Fixes' tag and merge with revert (Jakub)
+
+Cc: Christian Theune <ct@flyingcircus.io>
+Cc: Budimir Markovic <markovicbudimir@gmail.com>
+Fixes: b3d26c5702c7 ("net/sched: sch_hfsc: Ensure inner classes have fsc curve")
+Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Link: https://lore.kernel.org/r/20231017143602.3191556-1-pctammela@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_hfsc.c |   18 ++++++++++++++----
+ 1 file changed, 14 insertions(+), 4 deletions(-)
+
+--- a/net/sched/sch_hfsc.c
++++ b/net/sched/sch_hfsc.c
+@@ -903,6 +903,14 @@ hfsc_change_usc(struct hfsc_class *cl, s
+       cl->cl_flags |= HFSC_USC;
+ }
++static void
++hfsc_upgrade_rt(struct hfsc_class *cl)
++{
++      cl->cl_fsc = cl->cl_rsc;
++      rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
++      cl->cl_flags |= HFSC_FSC;
++}
++
+ static const struct nla_policy hfsc_policy[TCA_HFSC_MAX + 1] = {
+       [TCA_HFSC_RSC]  = { .len = sizeof(struct tc_service_curve) },
+       [TCA_HFSC_FSC]  = { .len = sizeof(struct tc_service_curve) },
+@@ -1012,10 +1020,6 @@ hfsc_change_class(struct Qdisc *sch, u32
+               if (parent == NULL)
+                       return -ENOENT;
+       }
+-      if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) {
+-              NL_SET_ERR_MSG(extack, "Invalid parent - parent class must have FSC");
+-              return -EINVAL;
+-      }
+       if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0)
+               return -EINVAL;
+@@ -1066,6 +1070,12 @@ hfsc_change_class(struct Qdisc *sch, u32
+       cl->cf_tree = RB_ROOT;
+       sch_tree_lock(sch);
++      /* Check if the inner class is a misconfigured 'rt' */
++      if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) {
++              NL_SET_ERR_MSG(extack,
++                             "Forced curve change on parent 'rt' to 'sc'");
++              hfsc_upgrade_rt(parent);
++      }
+       qdisc_class_hash_insert(&q->clhash, &cl->cl_common);
+       list_add_tail(&cl->siblings, &parent->children);
+       if (parent->level == 0)
diff --git a/queue-6.1/net-usb-smsc95xx-fix-an-error-code-in-smsc95xx_reset.patch b/queue-6.1/net-usb-smsc95xx-fix-an-error-code-in-smsc95xx_reset.patch
new file mode 100644 (file)
index 0000000..cd9a2fd
--- /dev/null
@@ -0,0 +1,32 @@
+From c53647a5df9e66dd9fedf240198e1fe50d88c286 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@linaro.org>
+Date: Mon, 16 Oct 2023 20:28:10 +0300
+Subject: net: usb: smsc95xx: Fix an error code in smsc95xx_reset()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+commit c53647a5df9e66dd9fedf240198e1fe50d88c286 upstream.
+
+Return a negative error code instead of success.
+
+Fixes: 2f7ca802bdae ("net: Add SMSC LAN9500 USB2.0 10/100 ethernet adapter driver")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://lore.kernel.org/r/147927f0-9ada-45cc-81ff-75a19dd30b76@moroto.mountain
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/smsc95xx.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/usb/smsc95xx.c
++++ b/drivers/net/usb/smsc95xx.c
+@@ -897,7 +897,7 @@ static int smsc95xx_reset(struct usbnet
+       if (timeout >= 100) {
+               netdev_warn(dev->net, "timeout waiting for completion of Lite Reset\n");
+-              return ret;
++              return -ETIMEDOUT;
+       }
+       ret = smsc95xx_set_mac_address(dev);
diff --git a/queue-6.1/netfilter-nf_tables-do-not-remove-elements-if-set-backend-implements-.abort.patch b/queue-6.1/netfilter-nf_tables-do-not-remove-elements-if-set-backend-implements-.abort.patch
new file mode 100644 (file)
index 0000000..50870d3
--- /dev/null
@@ -0,0 +1,36 @@
+From ebd032fa881882fef2acb9da1bbde48d8233241d Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Wed, 4 Oct 2023 13:12:58 +0200
+Subject: netfilter: nf_tables: do not remove elements if set backend implements .abort
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit ebd032fa881882fef2acb9da1bbde48d8233241d upstream.
+
+pipapo set backend maintains two copies of the datastructure, removing
+the elements from the copy that is going to be discarded slows down
+the abort path significantly, from several minutes to few seconds after
+this patch.
+
+Fixes: 212ed75dc5fb ("netfilter: nf_tables: integrate pipapo into commit protocol")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -9931,7 +9931,10 @@ static int __nf_tables_abort(struct net
+                               break;
+                       }
+                       te = (struct nft_trans_elem *)trans->data;
+-                      nft_setelem_remove(net, te->set, &te->elem);
++                      if (!te->set->ops->abort ||
++                          nft_setelem_is_catchall(te->set, &te->elem))
++                              nft_setelem_remove(net, te->set, &te->elem);
++
+                       if (!nft_setelem_is_catchall(te->set, &te->elem))
+                               atomic_dec(&te->set->nelems);
diff --git a/queue-6.1/netfilter-nf_tables-revert-do-not-remove-elements-if-set-backend-implements-.abort.patch b/queue-6.1/netfilter-nf_tables-revert-do-not-remove-elements-if-set-backend-implements-.abort.patch
new file mode 100644 (file)
index 0000000..b5d12a1
--- /dev/null
@@ -0,0 +1,35 @@
+From f86fb94011aeb3b26337fc22204ca726aeb8bc24 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Wed, 18 Oct 2023 13:18:39 +0200
+Subject: netfilter: nf_tables: revert do not remove elements if set backend implements .abort
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit f86fb94011aeb3b26337fc22204ca726aeb8bc24 upstream.
+
+nf_tables_abort_release() path calls nft_set_elem_destroy() for
+NFT_MSG_NEWSETELEM which releases the element, however, a reference to
+the element still remains in the working copy.
+
+Fixes: ebd032fa8818 ("netfilter: nf_tables: do not remove elements if set backend implements .abort")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |    5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -9931,10 +9931,7 @@ static int __nf_tables_abort(struct net
+                               break;
+                       }
+                       te = (struct nft_trans_elem *)trans->data;
+-                      if (!te->set->ops->abort ||
+-                          nft_setelem_is_catchall(te->set, &te->elem))
+-                              nft_setelem_remove(net, te->set, &te->elem);
+-
++                      nft_setelem_remove(net, te->set, &te->elem);
+                       if (!nft_setelem_is_catchall(te->set, &te->elem))
+                               atomic_dec(&te->set->nelems);
diff --git a/queue-6.1/netfilter-nft_set_rbtree-.deactivate-fails-if-element-has-expired.patch b/queue-6.1/netfilter-nft_set_rbtree-.deactivate-fails-if-element-has-expired.patch
new file mode 100644 (file)
index 0000000..7572720
--- /dev/null
@@ -0,0 +1,34 @@
+From d111692a59c1470ae530cbb39bcf0346c950ecc7 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Tue, 17 Oct 2023 12:28:27 +0200
+Subject: netfilter: nft_set_rbtree: .deactivate fails if element has expired
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit d111692a59c1470ae530cbb39bcf0346c950ecc7 upstream.
+
+This allows to remove an expired element which is not possible in other
+existing set backends, this is more noticeable if gc-interval is high so
+expired elements remain in the tree. On-demand gc also does not help in
+this case, because this is delete element path. Return NULL if element
+has expired.
+
+Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_set_rbtree.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -568,6 +568,8 @@ static void *nft_rbtree_deactivate(const
+                                  nft_rbtree_interval_end(this)) {
+                               parent = parent->rb_right;
+                               continue;
++                      } else if (nft_set_elem_expired(&rbe->ext)) {
++                              break;
+                       } else if (!nft_set_elem_active(&rbe->ext, genmask)) {
+                               parent = parent->rb_left;
+                               continue;
diff --git a/queue-6.1/netlink-correct-offload_xstats-size.patch b/queue-6.1/netlink-correct-offload_xstats-size.patch
new file mode 100644 (file)
index 0000000..846449e
--- /dev/null
@@ -0,0 +1,88 @@
+From 503930f8e113edc86f92b767efb4ea57bdffffb2 Mon Sep 17 00:00:00 2001
+From: Christoph Paasch <cpaasch@apple.com>
+Date: Thu, 12 Oct 2023 21:14:48 -0700
+Subject: netlink: Correct offload_xstats size
+
+From: Christoph Paasch <cpaasch@apple.com>
+
+commit 503930f8e113edc86f92b767efb4ea57bdffffb2 upstream.
+
+rtnl_offload_xstats_get_size_hw_s_info_one() conditionalizes the
+size-computation for IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED based on whether
+or not the device has offload_xstats enabled.
+
+However, rtnl_offload_xstats_fill_hw_s_info_one() is adding the u8 for
+that field uncondtionally.
+
+syzkaller triggered a WARNING in rtnl_stats_get due to this:
+------------[ cut here ]------------
+WARNING: CPU: 0 PID: 754 at net/core/rtnetlink.c:5982 rtnl_stats_get+0x2f4/0x300
+Modules linked in:
+CPU: 0 PID: 754 Comm: syz-executor148 Not tainted 6.6.0-rc2-g331b78eb12af #45
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
+RIP: 0010:rtnl_stats_get+0x2f4/0x300 net/core/rtnetlink.c:5982
+Code: ff ff 89 ee e8 7d 72 50 ff 83 fd a6 74 17 e8 33 6e 50 ff 4c 89 ef be 02 00 00 00 e8 86 00 fa ff e9 7b fe ff ff e8 1c 6e 50 ff <0f> 0b eb e5 e8 73 79 7b 00 0f 1f 00 90 90 90 90 90 90 90 90 90 90
+RSP: 0018:ffffc900006837c0 EFLAGS: 00010293
+RAX: ffffffff81cf7f24 RBX: ffff8881015d9000 RCX: ffff888101815a00
+RDX: 0000000000000000 RSI: 00000000ffffffa6 RDI: 00000000ffffffa6
+RBP: 00000000ffffffa6 R08: ffffffff81cf7f03 R09: 0000000000000001
+R10: ffff888101ba47b9 R11: ffff888101815a00 R12: ffff8881017dae00
+R13: ffff8881017dad00 R14: ffffc90000683ab8 R15: ffffffff83c1f740
+FS:  00007fbc22dbc740(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000020000046 CR3: 000000010264e003 CR4: 0000000000170ef0
+Call Trace:
+ <TASK>
+ rtnetlink_rcv_msg+0x677/0x710 net/core/rtnetlink.c:6480
+ netlink_rcv_skb+0xea/0x1c0 net/netlink/af_netlink.c:2545
+ netlink_unicast+0x430/0x500 net/netlink/af_netlink.c:1342
+ netlink_sendmsg+0x4fc/0x620 net/netlink/af_netlink.c:1910
+ sock_sendmsg+0xa8/0xd0 net/socket.c:730
+ ____sys_sendmsg+0x22a/0x320 net/socket.c:2541
+ ___sys_sendmsg+0x143/0x190 net/socket.c:2595
+ __x64_sys_sendmsg+0xd8/0x150 net/socket.c:2624
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x47/0xa0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x6e/0xd8
+RIP: 0033:0x7fbc22e8d6a9
+Code: 5c c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 4f 37 0d 00 f7 d8 64 89 01 48
+RSP: 002b:00007ffc4320e778 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+RAX: ffffffffffffffda RBX: 00000000004007d0 RCX: 00007fbc22e8d6a9
+RDX: 0000000000000000 RSI: 0000000020000000 RDI: 0000000000000003
+RBP: 0000000000000001 R08: 0000000000000000 R09: 00000000004007d0
+R10: 0000000000000008 R11: 0000000000000246 R12: 00007ffc4320e898
+R13: 00007ffc4320e8a8 R14: 00000000004004a0 R15: 00007fbc22fa5a80
+ </TASK>
+---[ end trace 0000000000000000 ]---
+
+Which didn't happen prior to commit bf9f1baa279f ("net: add dedicated
+kmem_cache for typical/small skb->head") as the skb always was large
+enough.
+
+Fixes: 0e7788fd7622 ("net: rtnetlink: Add UAPI for obtaining L3 offload xstats")
+Signed-off-by: Christoph Paasch <cpaasch@apple.com>
+Reviewed-by: Petr Machata <petrm@nvidia.com>
+Link: https://lore.kernel.org/r/20231013041448.8229-1-cpaasch@apple.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -5394,13 +5394,11 @@ static unsigned int
+ rtnl_offload_xstats_get_size_hw_s_info_one(const struct net_device *dev,
+                                          enum netdev_offload_xstats_type type)
+ {
+-      bool enabled = netdev_offload_xstats_enabled(dev, type);
+-
+       return nla_total_size(0) +
+               /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST */
+               nla_total_size(sizeof(u8)) +
+               /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED */
+-              (enabled ? nla_total_size(sizeof(u8)) : 0) +
++              nla_total_size(sizeof(u8)) +
+               0;
+ }
diff --git a/queue-6.1/octeon_ep-update-bql-sent-bytes-before-ringing-doorbell.patch b/queue-6.1/octeon_ep-update-bql-sent-bytes-before-ringing-doorbell.patch
new file mode 100644 (file)
index 0000000..b0e0a99
--- /dev/null
@@ -0,0 +1,53 @@
+From a0ca6b9dfef0b3cc83aa8bb485ed61a018f84982 Mon Sep 17 00:00:00 2001
+From: Shinas Rasheed <srasheed@marvell.com>
+Date: Tue, 17 Oct 2023 03:50:30 -0700
+Subject: octeon_ep: update BQL sent bytes before ringing doorbell
+
+From: Shinas Rasheed <srasheed@marvell.com>
+
+commit a0ca6b9dfef0b3cc83aa8bb485ed61a018f84982 upstream.
+
+Sometimes Tx is completed immediately after doorbell is updated, which
+causes Tx completion routing to update completion bytes before the
+same packet bytes are updated in sent bytes in transmit function, hence
+hitting BUG_ON() in dql_completed(). To avoid this, update BQL
+sent bytes before ringing doorbell.
+
+Fixes: 37d79d059606 ("octeon_ep: add Tx/Rx processing and interrupt support")
+Signed-off-by: Shinas Rasheed <srasheed@marvell.com>
+Link: https://lore.kernel.org/r/20231017105030.2310966-1-srasheed@marvell.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/octeon_ep/octep_main.c |   13 ++++++-------
+ 1 file changed, 6 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
++++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+@@ -707,20 +707,19 @@ static netdev_tx_t octep_start_xmit(stru
+               hw_desc->dptr = tx_buffer->sglist_dma;
+       }
+-      /* Flush the hw descriptor before writing to doorbell */
+-      wmb();
+-
+-      /* Ring Doorbell to notify the NIC there is a new packet */
+-      writel(1, iq->doorbell_reg);
++      netdev_tx_sent_queue(iq->netdev_q, skb->len);
++      skb_tx_timestamp(skb);
+       atomic_inc(&iq->instr_pending);
+       wi++;
+       if (wi == iq->max_count)
+               wi = 0;
+       iq->host_write_index = wi;
++      /* Flush the hw descriptor before writing to doorbell */
++      wmb();
+-      netdev_tx_sent_queue(iq->netdev_q, skb->len);
++      /* Ring Doorbell to notify the NIC there is a new packet */
++      writel(1, iq->doorbell_reg);
+       iq->stats.instr_posted++;
+-      skb_tx_timestamp(skb);
+       return NETDEV_TX_OK;
+ dma_map_sg_err:
diff --git a/queue-6.1/selftests-netfilter-run-nft_audit.sh-in-its-own-netns.patch b/queue-6.1/selftests-netfilter-run-nft_audit.sh-in-its-own-netns.patch
new file mode 100644 (file)
index 0000000..14ec336
--- /dev/null
@@ -0,0 +1,41 @@
+From 2e2d9c7d4d37d74873583d7b0c94eac8b6869486 Mon Sep 17 00:00:00 2001
+From: Phil Sutter <phil@nwl.cc>
+Date: Fri, 13 Oct 2023 22:02:24 +0200
+Subject: selftests: netfilter: Run nft_audit.sh in its own netns
+
+From: Phil Sutter <phil@nwl.cc>
+
+commit 2e2d9c7d4d37d74873583d7b0c94eac8b6869486 upstream.
+
+Don't mess with the host's firewall ruleset. Since audit logging is not
+per-netns, add an initial delay of a second so other selftests' netns
+cleanups have a chance to finish.
+
+Fixes: e8dbde59ca3f ("selftests: netfilter: Test nf_tables audit logging")
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/netfilter/nft_audit.sh | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/netfilter/nft_audit.sh
+index e94a80859bbd..99ed5bd6e840 100755
+--- a/tools/testing/selftests/netfilter/nft_audit.sh
++++ b/tools/testing/selftests/netfilter/nft_audit.sh
+@@ -11,6 +11,12 @@ nft --version >/dev/null 2>&1 || {
+       exit $SKIP_RC
+ }
++# Run everything in a separate network namespace
++[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
++
++# give other scripts a chance to finish - audit_logread sees all activity
++sleep 1
++
+ logfile=$(mktemp)
+ rulefile=$(mktemp)
+ echo "logging into $logfile"
+-- 
+2.42.0
+
diff --git a/queue-6.1/selftests-openvswitch-catch-cases-where-the-tests-are-killed.patch b/queue-6.1/selftests-openvswitch-catch-cases-where-the-tests-are-killed.patch
new file mode 100644 (file)
index 0000000..deeb5e1
--- /dev/null
@@ -0,0 +1,36 @@
+From af846afad5ca1c1a24d320adf9e48255e97db84e Mon Sep 17 00:00:00 2001
+From: Aaron Conole <aconole@redhat.com>
+Date: Wed, 11 Oct 2023 15:49:37 -0400
+Subject: selftests: openvswitch: Catch cases where the tests are killed
+
+From: Aaron Conole <aconole@redhat.com>
+
+commit af846afad5ca1c1a24d320adf9e48255e97db84e upstream.
+
+In case of fatal signal, or early abort at least cleanup the current
+test case.
+
+Fixes: 25f16c873fb1 ("selftests: add openvswitch selftest suite")
+Signed-off-by: Aaron Conole <aconole@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/openvswitch/openvswitch.sh | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
+index 220c3356901e..2a0112be7ead 100755
+--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
++++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
+@@ -3,6 +3,8 @@
+ #
+ # OVS kernel module self tests
++trap ovs_exit_sig EXIT TERM INT ERR
++
+ # Kselftest framework requirement - SKIP code is 4.
+ ksft_skip=4
+-- 
+2.42.0
+
index d768f4c7c6c76d217c5bfcc2a0bfd02bb51e08e1..5097116decaea98ca4d473c0696f11f08ae9ac72 100644 (file)
@@ -44,3 +44,33 @@ asoc-codecs-wcd938x-drop-bogus-bind-error-handling.patch
 asoc-codecs-wcd938x-fix-unbind-tear-down-order.patch
 asoc-codecs-wcd938x-fix-resource-leaks-on-bind-errors.patch
 qed-fix-ll2-rx-buffer-allocation.patch
+xfrm-fix-a-data-race-in-xfrm_lookup_with_ifid.patch
+xfrm-fix-a-data-race-in-xfrm_gen_index.patch
+xfrm-interface-use-dev_stats_inc.patch
+wifi-cfg80211-use-system_unbound_wq-for-wiphy-work.patch
+net-ipv4-fix-return-value-check-in-esp_remove_trailer.patch
+net-ipv6-fix-return-value-check-in-esp_remove_trailer.patch
+net-rfkill-gpio-prevent-value-glitch-during-probe.patch
+tcp-fix-excessive-tlp-and-rack-timeouts-from-hz-rounding.patch
+tcp-tsq-relax-tcp_small_queue_check-when-rtx-queue-contains-a-single-skb.patch
+tcp-fix-listen-warning-with-v4-mapped-v6-address.patch
+tun-prevent-negative-ifindex.patch
+ipv4-fib-annotate-races-around-nh-nh_saddr_genid-and-nh-nh_saddr.patch
+net-usb-smsc95xx-fix-an-error-code-in-smsc95xx_reset.patch
+octeon_ep-update-bql-sent-bytes-before-ringing-doorbell.patch
+i40e-prevent-crash-on-probe-if-hw-registers-have-invalid-values.patch
+net-dsa-bcm_sf2-fix-possible-memory-leak-in-bcm_sf2_mdio_register.patch
+bonding-return-pointer-to-data-after-pull-on-skb.patch
+net-sched-sch_hfsc-upgrade-rt-to-sc-when-it-becomes-a-inner-curve.patch
+neighbor-tracing-move-pin6-inside-config_ipv6-y-section.patch
+selftests-openvswitch-catch-cases-where-the-tests-are-killed.patch
+selftests-netfilter-run-nft_audit.sh-in-its-own-netns.patch
+netfilter-nft_set_rbtree-.deactivate-fails-if-element-has-expired.patch
+netlink-correct-offload_xstats-size.patch
+netfilter-nf_tables-do-not-remove-elements-if-set-backend-implements-.abort.patch
+netfilter-nf_tables-revert-do-not-remove-elements-if-set-backend-implements-.abort.patch
+net-phy-bcm7xxx-add-missing-16nm-ephy-statistics.patch
+net-pktgen-fix-interface-flags-printing.patch
+net-avoid-uaf-on-deleted-altname.patch
+net-fix-ifname-in-netlink-ntf-during-netns-move.patch
+net-check-for-altname-conflicts-when-changing-netdev-s-netns.patch
diff --git a/queue-6.1/tcp-fix-excessive-tlp-and-rack-timeouts-from-hz-rounding.patch b/queue-6.1/tcp-fix-excessive-tlp-and-rack-timeouts-from-hz-rounding.patch
new file mode 100644 (file)
index 0000000..0649307
--- /dev/null
@@ -0,0 +1,96 @@
+From 1c2709cfff1dedbb9591e989e2f001484208d914 Mon Sep 17 00:00:00 2001
+From: Neal Cardwell <ncardwell@google.com>
+Date: Sun, 15 Oct 2023 13:47:00 -0400
+Subject: tcp: fix excessive TLP and RACK timeouts from HZ rounding
+
+From: Neal Cardwell <ncardwell@google.com>
+
+commit 1c2709cfff1dedbb9591e989e2f001484208d914 upstream.
+
+We discovered from packet traces of slow loss recovery on kernels with
+the default HZ=250 setting (and min_rtt < 1ms) that after reordering,
+when receiving a SACKed sequence range, the RACK reordering timer was
+firing after about 16ms rather than the desired value of roughly
+min_rtt/4 + 2ms. The problem is largely due to the RACK reorder timer
+calculation adding in TCP_TIMEOUT_MIN, which is 2 jiffies. On kernels
+with HZ=250, this is 2*4ms = 8ms. The TLP timer calculation has the
+exact same issue.
+
+This commit fixes the TLP transmit timer and RACK reordering timer
+floor calculation to more closely match the intended 2ms floor even on
+kernels with HZ=250. It does this by adding in a new
+TCP_TIMEOUT_MIN_US floor of 2000 us and then converting to jiffies,
+instead of the current approach of converting to jiffies and then
+adding th TCP_TIMEOUT_MIN value of 2 jiffies.
+
+Our testing has verified that on kernels with HZ=1000, as expected,
+this does not produce significant changes in behavior, but on kernels
+with the default HZ=250 the latency improvement can be large. For
+example, our tests show that for HZ=250 kernels at low RTTs this fix
+roughly halves the latency for the RACK reorder timer: instead of
+mostly firing at 16ms it mostly fires at 8ms.
+
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Fixes: bb4d991a28cc ("tcp: adjust tail loss probe timeout")
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20231015174700.2206872-1-ncardwell.sw@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h       |    3 +++
+ net/ipv4/tcp_output.c   |    9 +++++----
+ net/ipv4/tcp_recovery.c |    2 +-
+ 3 files changed, 9 insertions(+), 5 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -141,6 +141,9 @@ void tcp_time_wait(struct sock *sk, int
+ #define TCP_RTO_MAX   ((unsigned)(120*HZ))
+ #define TCP_RTO_MIN   ((unsigned)(HZ/5))
+ #define TCP_TIMEOUT_MIN       (2U) /* Min timeout for TCP timers in jiffies */
++
++#define TCP_TIMEOUT_MIN_US (2*USEC_PER_MSEC) /* Min TCP timeout in microsecs */
++
+ #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))   /* RFC6298 2.1 initial RTO value        */
+ #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ))       /* RFC 1122 initial RTO value, now
+                                                * used as a fallback RTO for the
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2735,7 +2735,7 @@ bool tcp_schedule_loss_probe(struct sock
+ {
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       struct tcp_sock *tp = tcp_sk(sk);
+-      u32 timeout, rto_delta_us;
++      u32 timeout, timeout_us, rto_delta_us;
+       int early_retrans;
+       /* Don't do any loss probe on a Fast Open connection before 3WHS
+@@ -2759,11 +2759,12 @@ bool tcp_schedule_loss_probe(struct sock
+        * sample is available then probe after TCP_TIMEOUT_INIT.
+        */
+       if (tp->srtt_us) {
+-              timeout = usecs_to_jiffies(tp->srtt_us >> 2);
++              timeout_us = tp->srtt_us >> 2;
+               if (tp->packets_out == 1)
+-                      timeout += TCP_RTO_MIN;
++                      timeout_us += tcp_rto_min_us(sk);
+               else
+-                      timeout += TCP_TIMEOUT_MIN;
++                      timeout_us += TCP_TIMEOUT_MIN_US;
++              timeout = usecs_to_jiffies(timeout_us);
+       } else {
+               timeout = TCP_TIMEOUT_INIT;
+       }
+--- a/net/ipv4/tcp_recovery.c
++++ b/net/ipv4/tcp_recovery.c
+@@ -104,7 +104,7 @@ bool tcp_rack_mark_lost(struct sock *sk)
+       tp->rack.advanced = 0;
+       tcp_rack_detect_loss(sk, &timeout);
+       if (timeout) {
+-              timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
++              timeout = usecs_to_jiffies(timeout + TCP_TIMEOUT_MIN_US);
+               inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
+                                         timeout, inet_csk(sk)->icsk_rto);
+       }
diff --git a/queue-6.1/tcp-fix-listen-warning-with-v4-mapped-v6-address.patch b/queue-6.1/tcp-fix-listen-warning-with-v4-mapped-v6-address.patch
new file mode 100644 (file)
index 0000000..34dbd6d
--- /dev/null
@@ -0,0 +1,149 @@
+From 8702cf12e6ba91616a72d684e90357977972991b Mon Sep 17 00:00:00 2001
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+Date: Mon, 9 Oct 2023 18:38:14 -0700
+Subject: tcp: Fix listen() warning with v4-mapped-v6 address.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+commit 8702cf12e6ba91616a72d684e90357977972991b upstream.
+
+syzbot reported a warning [0] introduced by commit c48ef9c4aed3 ("tcp: Fix
+bind() regression for v4-mapped-v6 non-wildcard address.").
+
+After the cited commit, a v4 socket's address matches the corresponding
+v4-mapped-v6 tb2 in inet_bind2_bucket_match_addr(), not vice versa.
+
+During X.X.X.X -> ::ffff:X.X.X.X order bind()s, the second bind() uses
+bhash and conflicts properly without checking bhash2 so that we need not
+check if a v4-mapped-v6 sk matches the corresponding v4 address tb2 in
+inet_bind2_bucket_match_addr().  However, the repro shows that we need
+to check that in a no-conflict case.
+
+The repro bind()s two sockets to the 2-tuples using SO_REUSEPORT and calls
+listen() for the first socket:
+
+  from socket import *
+
+  s1 = socket()
+  s1.setsockopt(SOL_SOCKET, SO_REUSEPORT, 1)
+  s1.bind(('127.0.0.1', 0))
+
+  s2 = socket(AF_INET6)
+  s2.setsockopt(SOL_SOCKET, SO_REUSEPORT, 1)
+  s2.bind(('::ffff:127.0.0.1', s1.getsockname()[1]))
+
+  s1.listen()
+
+The second socket should belong to the first socket's tb2, but the second
+bind() creates another tb2 bucket because inet_bind2_bucket_find() returns
+NULL in inet_csk_get_port() as the v4-mapped-v6 sk does not match the
+corresponding v4 address tb2.
+
+  bhash2[] -> tb2(::ffff:X.X.X.X) -> tb2(X.X.X.X)
+
+Then, listen() for the first socket calls inet_csk_get_port(), where the
+v4 address matches the v4-mapped-v6 tb2 and WARN_ON() is triggered.
+
+To avoid that, we need to check if v4-mapped-v6 sk address matches with
+the corresponding v4 address tb2 in inet_bind2_bucket_match().
+
+The same checks are needed in inet_bind2_bucket_addr_match() too, so we
+can move all checks there and call it from inet_bind2_bucket_match().
+
+Note that now tb->family is just an address family of tb->(v6_)?rcv_saddr
+and not of sockets in the bucket.  This could be refactored later by
+defining tb->rcv_saddr as tb->v6_rcv_saddr.s6_addr32[3] and prepending
+::ffff: when creating v4 tb2.
+
+[0]:
+WARNING: CPU: 0 PID: 5049 at net/ipv4/inet_connection_sock.c:587 inet_csk_get_port+0xf96/0x2350 net/ipv4/inet_connection_sock.c:587
+Modules linked in:
+CPU: 0 PID: 5049 Comm: syz-executor288 Not tainted 6.6.0-rc2-syzkaller-00018-g2cf0f7156238 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/04/2023
+RIP: 0010:inet_csk_get_port+0xf96/0x2350 net/ipv4/inet_connection_sock.c:587
+Code: 7c 24 08 e8 4c b6 8a 01 31 d2 be 88 01 00 00 48 c7 c7 e0 94 ae 8b e8 59 2e a3 f8 2e 2e 2e 31 c0 e9 04 fe ff ff e8 ca 88 d0 f8 <0f> 0b e9 0f f9 ff ff e8 be 88 d0 f8 49 8d 7e 48 e8 65 ca 5a 00 31
+RSP: 0018:ffffc90003abfbf0 EFLAGS: 00010293
+RAX: 0000000000000000 RBX: ffff888026429100 RCX: 0000000000000000
+RDX: ffff88807edcbb80 RSI: ffffffff88b73d66 RDI: ffff888026c49f38
+RBP: ffff888026c49f30 R08: 0000000000000005 R09: 0000000000000000
+R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff9260f200
+R13: ffff888026c49880 R14: 0000000000000000 R15: ffff888026429100
+FS:  00005555557d5380(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 000000000045ad50 CR3: 0000000025754000 CR4: 00000000003506f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <TASK>
+ inet_csk_listen_start+0x155/0x360 net/ipv4/inet_connection_sock.c:1256
+ __inet_listen_sk+0x1b8/0x5c0 net/ipv4/af_inet.c:217
+ inet_listen+0x93/0xd0 net/ipv4/af_inet.c:239
+ __sys_listen+0x194/0x270 net/socket.c:1866
+ __do_sys_listen net/socket.c:1875 [inline]
+ __se_sys_listen net/socket.c:1873 [inline]
+ __x64_sys_listen+0x53/0x80 net/socket.c:1873
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+RIP: 0033:0x7f3a5bce3af9
+Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 c1 17 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007ffc1a1c79e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000032
+RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3a5bce3af9
+RDX: 00007f3a5bce3af9 RSI: 0000000000000000 RDI: 0000000000000003
+RBP: 00007f3a5bd565f0 R08: 0000000000000006 R09: 0000000000000006
+R10: 0000000000000006 R11: 0000000000000246 R12: 0000000000000001
+R13: 431bde82d7b634db R14: 0000000000000001 R15: 0000000000000001
+ </TASK>
+
+Fixes: c48ef9c4aed3 ("tcp: Fix bind() regression for v4-mapped-v6 non-wildcard address.")
+Reported-by: syzbot+71e724675ba3958edb31@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=71e724675ba3958edb31
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20231010013814.70571-1-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/inet_hashtables.c |   24 +++++++++---------------
+ 1 file changed, 9 insertions(+), 15 deletions(-)
+
+--- a/net/ipv4/inet_hashtables.c
++++ b/net/ipv4/inet_hashtables.c
+@@ -148,8 +148,14 @@ static bool inet_bind2_bucket_addr_match
+                                        const struct sock *sk)
+ {
+ #if IS_ENABLED(CONFIG_IPV6)
+-      if (sk->sk_family != tb2->family)
+-              return false;
++      if (sk->sk_family != tb2->family) {
++              if (sk->sk_family == AF_INET)
++                      return ipv6_addr_v4mapped(&tb2->v6_rcv_saddr) &&
++                              tb2->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr;
++
++              return ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr) &&
++                      sk->sk_v6_rcv_saddr.s6_addr32[3] == tb2->rcv_saddr;
++      }
+       if (sk->sk_family == AF_INET6)
+               return ipv6_addr_equal(&tb2->v6_rcv_saddr,
+@@ -799,19 +805,7 @@ static bool inet_bind2_bucket_match(cons
+           tb->l3mdev != l3mdev)
+               return false;
+-#if IS_ENABLED(CONFIG_IPV6)
+-      if (sk->sk_family != tb->family) {
+-              if (sk->sk_family == AF_INET)
+-                      return ipv6_addr_v4mapped(&tb->v6_rcv_saddr) &&
+-                              tb->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr;
+-
+-              return false;
+-      }
+-
+-      if (sk->sk_family == AF_INET6)
+-              return ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
+-#endif
+-      return tb->rcv_saddr == sk->sk_rcv_saddr;
++      return inet_bind2_bucket_addr_match(tb, sk);
+ }
+ bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net,
diff --git a/queue-6.1/tcp-tsq-relax-tcp_small_queue_check-when-rtx-queue-contains-a-single-skb.patch b/queue-6.1/tcp-tsq-relax-tcp_small_queue_check-when-rtx-queue-contains-a-single-skb.patch
new file mode 100644 (file)
index 0000000..7a9120f
--- /dev/null
@@ -0,0 +1,76 @@
+From f921a4a5bffa8a0005b190fb9421a7fc1fd716b6 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 17 Oct 2023 12:45:26 +0000
+Subject: tcp: tsq: relax tcp_small_queue_check() when rtx queue contains a single skb
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit f921a4a5bffa8a0005b190fb9421a7fc1fd716b6 upstream.
+
+In commit 75eefc6c59fd ("tcp: tsq: add a shortcut in tcp_small_queue_check()")
+we allowed to send an skb regardless of TSQ limits being hit if rtx queue
+was empty or had a single skb, in order to better fill the pipe
+when/if TX completions were slow.
+
+Then later, commit 75c119afe14f ("tcp: implement rb-tree based
+retransmit queue") accidentally removed the special case for
+one skb in rtx queue.
+
+Stefan Wahren reported a regression in single TCP flow throughput
+using a 100Mbit fec link, starting from commit 65466904b015 ("tcp: adjust
+TSO packet sizes based on min_rtt"). This last commit only made the
+regression more visible, because it locked the TCP flow on a particular
+behavior where TSQ prevented two skbs being pushed downstream,
+adding silences on the wire between each TSO packet.
+
+Many thanks to Stefan for his invaluable help !
+
+Fixes: 75c119afe14f ("tcp: implement rb-tree based retransmit queue")
+Link: https://lore.kernel.org/netdev/7f31ddc8-9971-495e-a1f6-819df542e0af@gmx.net/
+Reported-by: Stefan Wahren <wahrenst@gmx.net>
+Tested-by: Stefan Wahren <wahrenst@gmx.net>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Link: https://lore.kernel.org/r/20231017124526.4060202-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |   16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2489,6 +2489,18 @@ static bool tcp_pacing_check(struct sock
+       return true;
+ }
++static bool tcp_rtx_queue_empty_or_single_skb(const struct sock *sk)
++{
++      const struct rb_node *node = sk->tcp_rtx_queue.rb_node;
++
++      /* No skb in the rtx queue. */
++      if (!node)
++              return true;
++
++      /* Only one skb in rtx queue. */
++      return !node->rb_left && !node->rb_right;
++}
++
+ /* TCP Small Queues :
+  * Control number of packets in qdisc/devices to two packets / or ~1 ms.
+  * (These limits are doubled for retransmits)
+@@ -2526,12 +2538,12 @@ static bool tcp_small_queue_check(struct
+               limit += extra_bytes;
+       }
+       if (refcount_read(&sk->sk_wmem_alloc) > limit) {
+-              /* Always send skb if rtx queue is empty.
++              /* Always send skb if rtx queue is empty or has one skb.
+                * No need to wait for TX completion to call us back,
+                * after softirq/tasklet schedule.
+                * This helps when TX completions are delayed too much.
+                */
+-              if (tcp_rtx_queue_empty(sk))
++              if (tcp_rtx_queue_empty_or_single_skb(sk))
+                       return false;
+               set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
diff --git a/queue-6.1/tun-prevent-negative-ifindex.patch b/queue-6.1/tun-prevent-negative-ifindex.patch
new file mode 100644 (file)
index 0000000..1e23fe3
--- /dev/null
@@ -0,0 +1,96 @@
+From cbfbfe3aee718dc4c3c837f5d2463170ee59d78c Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 16 Oct 2023 18:08:51 +0000
+Subject: tun: prevent negative ifindex
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit cbfbfe3aee718dc4c3c837f5d2463170ee59d78c upstream.
+
+After commit 956db0a13b47 ("net: warn about attempts to register
+negative ifindex") syzbot is able to trigger the following splat.
+
+Negative ifindex are not supported.
+
+WARNING: CPU: 1 PID: 6003 at net/core/dev.c:9596 dev_index_reserve+0x104/0x210
+Modules linked in:
+CPU: 1 PID: 6003 Comm: syz-executor926 Not tainted 6.6.0-rc4-syzkaller-g19af4a4ed414 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/06/2023
+pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+pc : dev_index_reserve+0x104/0x210
+lr : dev_index_reserve+0x100/0x210
+sp : ffff800096a878e0
+x29: ffff800096a87930 x28: ffff0000d04380d0 x27: ffff0000d04380f8
+x26: ffff0000d04380f0 x25: 1ffff00012d50f20 x24: 1ffff00012d50f1c
+x23: dfff800000000000 x22: ffff8000929c21c0 x21: 00000000ffffffea
+x20: ffff0000d04380e0 x19: ffff800096a87900 x18: ffff800096a874c0
+x17: ffff800084df5008 x16: ffff80008051f9c4 x15: 0000000000000001
+x14: 1fffe0001a087198 x13: 0000000000000000 x12: 0000000000000000
+x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000
+x8 : ffff0000d41c9bc0 x7 : 0000000000000000 x6 : 0000000000000000
+x5 : ffff800091763d88 x4 : 0000000000000000 x3 : ffff800084e04748
+x2 : 0000000000000001 x1 : 00000000fead71c7 x0 : 0000000000000000
+Call trace:
+dev_index_reserve+0x104/0x210
+register_netdevice+0x598/0x1074 net/core/dev.c:10084
+tun_set_iff+0x630/0xb0c drivers/net/tun.c:2850
+__tun_chr_ioctl+0x788/0x2af8 drivers/net/tun.c:3118
+tun_chr_ioctl+0x38/0x4c drivers/net/tun.c:3403
+vfs_ioctl fs/ioctl.c:51 [inline]
+__do_sys_ioctl fs/ioctl.c:871 [inline]
+__se_sys_ioctl fs/ioctl.c:857 [inline]
+__arm64_sys_ioctl+0x14c/0x1c8 fs/ioctl.c:857
+__invoke_syscall arch/arm64/kernel/syscall.c:37 [inline]
+invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:51
+el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:136
+do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:155
+el0_svc+0x58/0x16c arch/arm64/kernel/entry-common.c:678
+el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:696
+el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:595
+irq event stamp: 11348
+hardirqs last enabled at (11347): [<ffff80008a716574>] __raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:151 [inline]
+hardirqs last enabled at (11347): [<ffff80008a716574>] _raw_spin_unlock_irqrestore+0x38/0x98 kernel/locking/spinlock.c:194
+hardirqs last disabled at (11348): [<ffff80008a627820>] el1_dbg+0x24/0x80 arch/arm64/kernel/entry-common.c:436
+softirqs last enabled at (11138): [<ffff8000887ca53c>] spin_unlock_bh include/linux/spinlock.h:396 [inline]
+softirqs last enabled at (11138): [<ffff8000887ca53c>] release_sock+0x15c/0x1b0 net/core/sock.c:3531
+softirqs last disabled at (11136): [<ffff8000887ca41c>] spin_lock_bh include/linux/spinlock.h:356 [inline]
+softirqs last disabled at (11136): [<ffff8000887ca41c>] release_sock+0x3c/0x1b0 net/core/sock.c:3518
+
+Fixes: fb7589a16216 ("tun: Add ability to create tun device with given index")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Link: https://lore.kernel.org/r/20231016180851.3560092-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -3056,10 +3056,11 @@ static long __tun_chr_ioctl(struct file
+       struct net *net = sock_net(&tfile->sk);
+       struct tun_struct *tun;
+       void __user* argp = (void __user*)arg;
+-      unsigned int ifindex, carrier;
++      unsigned int carrier;
+       struct ifreq ifr;
+       kuid_t owner;
+       kgid_t group;
++      int ifindex;
+       int sndbuf;
+       int vnet_hdr_sz;
+       int le;
+@@ -3115,7 +3116,9 @@ static long __tun_chr_ioctl(struct file
+               ret = -EFAULT;
+               if (copy_from_user(&ifindex, argp, sizeof(ifindex)))
+                       goto unlock;
+-
++              ret = -EINVAL;
++              if (ifindex < 0)
++                      goto unlock;
+               ret = 0;
+               tfile->ifindex = ifindex;
+               goto unlock;
diff --git a/queue-6.1/wifi-cfg80211-use-system_unbound_wq-for-wiphy-work.patch b/queue-6.1/wifi-cfg80211-use-system_unbound_wq-for-wiphy-work.patch
new file mode 100644 (file)
index 0000000..cd85246
--- /dev/null
@@ -0,0 +1,34 @@
+From 91d20ab9d9ca035527af503d00e1e30d6c375f2a Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Mon, 9 Oct 2023 10:18:01 +0200
+Subject: wifi: cfg80211: use system_unbound_wq for wiphy work
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit 91d20ab9d9ca035527af503d00e1e30d6c375f2a upstream.
+
+Since wiphy work items can run pretty much arbitrary
+code in the stack/driver, it can take longer to run
+all of this, so we shouldn't be using system_wq via
+schedule_work(). Also, we lock the wiphy (which is
+the reason this exists), so use system_unbound_wq.
+
+Reported-and-tested-by: Kalle Valo <kvalo@kernel.org>
+Fixes: a3ee4dc84c4e ("wifi: cfg80211: add a work abstraction with special semantics")
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/wireless/core.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/wireless/core.c
++++ b/net/wireless/core.c
+@@ -1618,7 +1618,7 @@ void wiphy_work_queue(struct wiphy *wiph
+               list_add_tail(&work->entry, &rdev->wiphy_work_list);
+       spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);
+-      schedule_work(&rdev->wiphy_work);
++      queue_work(system_unbound_wq, &rdev->wiphy_work);
+ }
+ EXPORT_SYMBOL_GPL(wiphy_work_queue);
diff --git a/queue-6.1/xfrm-fix-a-data-race-in-xfrm_gen_index.patch b/queue-6.1/xfrm-fix-a-data-race-in-xfrm_gen_index.patch
new file mode 100644 (file)
index 0000000..7c987f1
--- /dev/null
@@ -0,0 +1,101 @@
+From 3e4bc23926b83c3c67e5f61ae8571602754131a6 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 8 Sep 2023 18:13:59 +0000
+Subject: xfrm: fix a data-race in xfrm_gen_index()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 3e4bc23926b83c3c67e5f61ae8571602754131a6 upstream.
+
+xfrm_gen_index() mutual exclusion uses net->xfrm.xfrm_policy_lock.
+
+This means we must use a per-netns idx_generator variable,
+instead of a static one.
+Alternative would be to use an atomic variable.
+
+syzbot reported:
+
+BUG: KCSAN: data-race in xfrm_sk_policy_insert / xfrm_sk_policy_insert
+
+write to 0xffffffff87005938 of 4 bytes by task 29466 on cpu 0:
+xfrm_gen_index net/xfrm/xfrm_policy.c:1385 [inline]
+xfrm_sk_policy_insert+0x262/0x640 net/xfrm/xfrm_policy.c:2347
+xfrm_user_policy+0x413/0x540 net/xfrm/xfrm_state.c:2639
+do_ipv6_setsockopt+0x1317/0x2ce0 net/ipv6/ipv6_sockglue.c:943
+ipv6_setsockopt+0x57/0x130 net/ipv6/ipv6_sockglue.c:1012
+rawv6_setsockopt+0x21e/0x410 net/ipv6/raw.c:1054
+sock_common_setsockopt+0x61/0x70 net/core/sock.c:3697
+__sys_setsockopt+0x1c9/0x230 net/socket.c:2263
+__do_sys_setsockopt net/socket.c:2274 [inline]
+__se_sys_setsockopt net/socket.c:2271 [inline]
+__x64_sys_setsockopt+0x66/0x80 net/socket.c:2271
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+read to 0xffffffff87005938 of 4 bytes by task 29460 on cpu 1:
+xfrm_sk_policy_insert+0x13e/0x640
+xfrm_user_policy+0x413/0x540 net/xfrm/xfrm_state.c:2639
+do_ipv6_setsockopt+0x1317/0x2ce0 net/ipv6/ipv6_sockglue.c:943
+ipv6_setsockopt+0x57/0x130 net/ipv6/ipv6_sockglue.c:1012
+rawv6_setsockopt+0x21e/0x410 net/ipv6/raw.c:1054
+sock_common_setsockopt+0x61/0x70 net/core/sock.c:3697
+__sys_setsockopt+0x1c9/0x230 net/socket.c:2263
+__do_sys_setsockopt net/socket.c:2274 [inline]
+__se_sys_setsockopt net/socket.c:2271 [inline]
+__x64_sys_setsockopt+0x66/0x80 net/socket.c:2271
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+value changed: 0x00006ad8 -> 0x00006b18
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 29460 Comm: syz-executor.1 Not tainted 6.5.0-rc5-syzkaller-00243-g9106536c1aa3 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023
+
+Fixes: 1121994c803f ("netns xfrm: policy insertion in netns")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Steffen Klassert <steffen.klassert@secunet.com>
+Cc: Herbert Xu <herbert@gondor.apana.org.au>
+Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/netns/xfrm.h |    1 +
+ net/xfrm/xfrm_policy.c   |    6 ++----
+ 2 files changed, 3 insertions(+), 4 deletions(-)
+
+--- a/include/net/netns/xfrm.h
++++ b/include/net/netns/xfrm.h
+@@ -50,6 +50,7 @@ struct netns_xfrm {
+       struct list_head        policy_all;
+       struct hlist_head       *policy_byidx;
+       unsigned int            policy_idx_hmask;
++      unsigned int            idx_generator;
+       struct hlist_head       policy_inexact[XFRM_POLICY_MAX];
+       struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX];
+       unsigned int            policy_count[XFRM_POLICY_MAX * 2];
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -1371,8 +1371,6 @@ EXPORT_SYMBOL(xfrm_policy_hash_rebuild);
+  * of an absolute inpredictability of ordering of rules. This will not pass. */
+ static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
+ {
+-      static u32 idx_generator;
+-
+       for (;;) {
+               struct hlist_head *list;
+               struct xfrm_policy *p;
+@@ -1380,8 +1378,8 @@ static u32 xfrm_gen_index(struct net *ne
+               int found;
+               if (!index) {
+-                      idx = (idx_generator | dir);
+-                      idx_generator += 8;
++                      idx = (net->xfrm.idx_generator | dir);
++                      net->xfrm.idx_generator += 8;
+               } else {
+                       idx = index;
+                       index = 0;
diff --git a/queue-6.1/xfrm-fix-a-data-race-in-xfrm_lookup_with_ifid.patch b/queue-6.1/xfrm-fix-a-data-race-in-xfrm_lookup_with_ifid.patch
new file mode 100644 (file)
index 0000000..f7d76f7
--- /dev/null
@@ -0,0 +1,80 @@
+From de5724ca38fd5e442bae9c1fab31942b6544012d Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 11 Oct 2023 10:24:29 +0000
+Subject: xfrm: fix a data-race in xfrm_lookup_with_ifid()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit de5724ca38fd5e442bae9c1fab31942b6544012d upstream.
+
+syzbot complains about a race in xfrm_lookup_with_ifid() [1]
+
+When preparing commit 0a9e5794b21e ("xfrm: annotate data-race
+around use_time") I thought xfrm_lookup_with_ifid() was modifying
+a still private structure.
+
+[1]
+BUG: KCSAN: data-race in xfrm_lookup_with_ifid / xfrm_lookup_with_ifid
+
+write to 0xffff88813ea41108 of 8 bytes by task 8150 on cpu 1:
+xfrm_lookup_with_ifid+0xce7/0x12d0 net/xfrm/xfrm_policy.c:3218
+xfrm_lookup net/xfrm/xfrm_policy.c:3270 [inline]
+xfrm_lookup_route+0x3b/0x100 net/xfrm/xfrm_policy.c:3281
+ip6_dst_lookup_flow+0x98/0xc0 net/ipv6/ip6_output.c:1246
+send6+0x241/0x3c0 drivers/net/wireguard/socket.c:139
+wg_socket_send_skb_to_peer+0xbd/0x130 drivers/net/wireguard/socket.c:178
+wg_socket_send_buffer_to_peer+0xd6/0x100 drivers/net/wireguard/socket.c:200
+wg_packet_send_handshake_initiation drivers/net/wireguard/send.c:40 [inline]
+wg_packet_handshake_send_worker+0x10c/0x150 drivers/net/wireguard/send.c:51
+process_one_work kernel/workqueue.c:2630 [inline]
+process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703
+worker_thread+0x525/0x730 kernel/workqueue.c:2784
+kthread+0x1d7/0x210 kernel/kthread.c:388
+ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147
+ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
+
+write to 0xffff88813ea41108 of 8 bytes by task 15867 on cpu 0:
+xfrm_lookup_with_ifid+0xce7/0x12d0 net/xfrm/xfrm_policy.c:3218
+xfrm_lookup net/xfrm/xfrm_policy.c:3270 [inline]
+xfrm_lookup_route+0x3b/0x100 net/xfrm/xfrm_policy.c:3281
+ip6_dst_lookup_flow+0x98/0xc0 net/ipv6/ip6_output.c:1246
+send6+0x241/0x3c0 drivers/net/wireguard/socket.c:139
+wg_socket_send_skb_to_peer+0xbd/0x130 drivers/net/wireguard/socket.c:178
+wg_socket_send_buffer_to_peer+0xd6/0x100 drivers/net/wireguard/socket.c:200
+wg_packet_send_handshake_initiation drivers/net/wireguard/send.c:40 [inline]
+wg_packet_handshake_send_worker+0x10c/0x150 drivers/net/wireguard/send.c:51
+process_one_work kernel/workqueue.c:2630 [inline]
+process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703
+worker_thread+0x525/0x730 kernel/workqueue.c:2784
+kthread+0x1d7/0x210 kernel/kthread.c:388
+ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147
+ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
+
+value changed: 0x00000000651cd9d1 -> 0x00000000651cd9d2
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 15867 Comm: kworker/u4:58 Not tainted 6.6.0-rc4-syzkaller-00016-g5e62ed3b1c8a #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/06/2023
+Workqueue: wg-kex-wg2 wg_packet_handshake_send_worker
+
+Fixes: 0a9e5794b21e ("xfrm: annotate data-race around use_time")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/xfrm/xfrm_policy.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -3138,7 +3138,7 @@ no_transform:
+       }
+       for (i = 0; i < num_pols; i++)
+-              pols[i]->curlft.use_time = ktime_get_real_seconds();
++              WRITE_ONCE(pols[i]->curlft.use_time, ktime_get_real_seconds());
+       if (num_xfrms < 0) {
+               /* Prohibit the flow */
diff --git a/queue-6.1/xfrm-interface-use-dev_stats_inc.patch b/queue-6.1/xfrm-interface-use-dev_stats_inc.patch
new file mode 100644 (file)
index 0000000..bf567d7
--- /dev/null
@@ -0,0 +1,182 @@
+From f7c4e3e5d4f6609b4725a97451948ca2e425379a Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 5 Sep 2023 13:23:03 +0000
+Subject: xfrm: interface: use DEV_STATS_INC()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit f7c4e3e5d4f6609b4725a97451948ca2e425379a upstream.
+
+syzbot/KCSAN reported data-races in xfrm whenever dev->stats fields
+are updated.
+
+It appears all of these updates can happen from multiple cpus.
+
+Adopt SMP safe DEV_STATS_INC() to update dev->stats fields.
+
+BUG: KCSAN: data-race in xfrmi_xmit / xfrmi_xmit
+
+read-write to 0xffff88813726b160 of 8 bytes by task 23986 on cpu 1:
+xfrmi_xmit+0x74e/0xb20 net/xfrm/xfrm_interface_core.c:583
+__netdev_start_xmit include/linux/netdevice.h:4889 [inline]
+netdev_start_xmit include/linux/netdevice.h:4903 [inline]
+xmit_one net/core/dev.c:3544 [inline]
+dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3560
+__dev_queue_xmit+0xeee/0x1de0 net/core/dev.c:4340
+dev_queue_xmit include/linux/netdevice.h:3082 [inline]
+neigh_connected_output+0x231/0x2a0 net/core/neighbour.c:1581
+neigh_output include/net/neighbour.h:542 [inline]
+ip_finish_output2+0x74a/0x850 net/ipv4/ip_output.c:230
+ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:318
+NF_HOOK_COND include/linux/netfilter.h:293 [inline]
+ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:432
+dst_output include/net/dst.h:458 [inline]
+ip_local_out net/ipv4/ip_output.c:127 [inline]
+ip_send_skb+0x72/0xe0 net/ipv4/ip_output.c:1487
+udp_send_skb+0x6a4/0x990 net/ipv4/udp.c:963
+udp_sendmsg+0x1249/0x12d0 net/ipv4/udp.c:1246
+inet_sendmsg+0x63/0x80 net/ipv4/af_inet.c:840
+sock_sendmsg_nosec net/socket.c:730 [inline]
+sock_sendmsg net/socket.c:753 [inline]
+____sys_sendmsg+0x37c/0x4d0 net/socket.c:2540
+___sys_sendmsg net/socket.c:2594 [inline]
+__sys_sendmmsg+0x269/0x500 net/socket.c:2680
+__do_sys_sendmmsg net/socket.c:2709 [inline]
+__se_sys_sendmmsg net/socket.c:2706 [inline]
+__x64_sys_sendmmsg+0x57/0x60 net/socket.c:2706
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+read-write to 0xffff88813726b160 of 8 bytes by task 23987 on cpu 0:
+xfrmi_xmit+0x74e/0xb20 net/xfrm/xfrm_interface_core.c:583
+__netdev_start_xmit include/linux/netdevice.h:4889 [inline]
+netdev_start_xmit include/linux/netdevice.h:4903 [inline]
+xmit_one net/core/dev.c:3544 [inline]
+dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3560
+__dev_queue_xmit+0xeee/0x1de0 net/core/dev.c:4340
+dev_queue_xmit include/linux/netdevice.h:3082 [inline]
+neigh_connected_output+0x231/0x2a0 net/core/neighbour.c:1581
+neigh_output include/net/neighbour.h:542 [inline]
+ip_finish_output2+0x74a/0x850 net/ipv4/ip_output.c:230
+ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:318
+NF_HOOK_COND include/linux/netfilter.h:293 [inline]
+ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:432
+dst_output include/net/dst.h:458 [inline]
+ip_local_out net/ipv4/ip_output.c:127 [inline]
+ip_send_skb+0x72/0xe0 net/ipv4/ip_output.c:1487
+udp_send_skb+0x6a4/0x990 net/ipv4/udp.c:963
+udp_sendmsg+0x1249/0x12d0 net/ipv4/udp.c:1246
+inet_sendmsg+0x63/0x80 net/ipv4/af_inet.c:840
+sock_sendmsg_nosec net/socket.c:730 [inline]
+sock_sendmsg net/socket.c:753 [inline]
+____sys_sendmsg+0x37c/0x4d0 net/socket.c:2540
+___sys_sendmsg net/socket.c:2594 [inline]
+__sys_sendmmsg+0x269/0x500 net/socket.c:2680
+__do_sys_sendmmsg net/socket.c:2709 [inline]
+__se_sys_sendmmsg net/socket.c:2706 [inline]
+__x64_sys_sendmmsg+0x57/0x60 net/socket.c:2706
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+value changed: 0x00000000000010d7 -> 0x00000000000010d8
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 23987 Comm: syz-executor.5 Not tainted 6.5.0-syzkaller-10885-g0468be89b3fa #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023
+
+Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/xfrm/xfrm_interface_core.c |   22 ++++++++++------------
+ 1 file changed, 10 insertions(+), 12 deletions(-)
+
+--- a/net/xfrm/xfrm_interface_core.c
++++ b/net/xfrm/xfrm_interface_core.c
+@@ -379,8 +379,8 @@ static int xfrmi_rcv_cb(struct sk_buff *
+       skb->dev = dev;
+       if (err) {
+-              dev->stats.rx_errors++;
+-              dev->stats.rx_dropped++;
++              DEV_STATS_INC(dev, rx_errors);
++              DEV_STATS_INC(dev, rx_dropped);
+               return 0;
+       }
+@@ -425,7 +425,6 @@ static int
+ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
+ {
+       struct xfrm_if *xi = netdev_priv(dev);
+-      struct net_device_stats *stats = &xi->dev->stats;
+       struct dst_entry *dst = skb_dst(skb);
+       unsigned int length = skb->len;
+       struct net_device *tdev;
+@@ -464,7 +463,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct
+       tdev = dst->dev;
+       if (tdev == dev) {
+-              stats->collisions++;
++              DEV_STATS_INC(dev, collisions);
+               net_warn_ratelimited("%s: Local routing loop detected!\n",
+                                    dev->name);
+               goto tx_err_dst_release;
+@@ -503,13 +502,13 @@ xmit:
+       if (net_xmit_eval(err) == 0) {
+               dev_sw_netstats_tx_add(dev, 1, length);
+       } else {
+-              stats->tx_errors++;
+-              stats->tx_aborted_errors++;
++              DEV_STATS_INC(dev, tx_errors);
++              DEV_STATS_INC(dev, tx_aborted_errors);
+       }
+       return 0;
+ tx_err_link_failure:
+-      stats->tx_carrier_errors++;
++      DEV_STATS_INC(dev, tx_carrier_errors);
+       dst_link_failure(skb);
+ tx_err_dst_release:
+       dst_release(dst);
+@@ -519,7 +518,6 @@ tx_err_dst_release:
+ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+       struct xfrm_if *xi = netdev_priv(dev);
+-      struct net_device_stats *stats = &xi->dev->stats;
+       struct dst_entry *dst = skb_dst(skb);
+       struct flowi fl;
+       int ret;
+@@ -536,7 +534,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_
+                       dst = ip6_route_output(dev_net(dev), NULL, &fl.u.ip6);
+                       if (dst->error) {
+                               dst_release(dst);
+-                              stats->tx_carrier_errors++;
++                              DEV_STATS_INC(dev, tx_carrier_errors);
+                               goto tx_err;
+                       }
+                       skb_dst_set(skb, dst);
+@@ -552,7 +550,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_
+                       fl.u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC;
+                       rt = __ip_route_output_key(dev_net(dev), &fl.u.ip4);
+                       if (IS_ERR(rt)) {
+-                              stats->tx_carrier_errors++;
++                              DEV_STATS_INC(dev, tx_carrier_errors);
+                               goto tx_err;
+                       }
+                       skb_dst_set(skb, &rt->dst);
+@@ -571,8 +569,8 @@ static netdev_tx_t xfrmi_xmit(struct sk_
+       return NETDEV_TX_OK;
+ tx_err:
+-      stats->tx_errors++;
+-      stats->tx_dropped++;
++      DEV_STATS_INC(dev, tx_errors);
++      DEV_STATS_INC(dev, tx_dropped);
+       kfree_skb(skb);
+       return NETDEV_TX_OK;
+ }