--- /dev/null
+From d93f3f992780af4a21e6c1ab86946b7c5602f1b9 Mon Sep 17 00:00:00 2001
+From: Jiri Wiesner <jwiesner@suse.de>
+Date: Tue, 10 Oct 2023 18:39:33 +0200
+Subject: bonding: Return pointer to data after pull on skb
+
+From: Jiri Wiesner <jwiesner@suse.de>
+
+commit d93f3f992780af4a21e6c1ab86946b7c5602f1b9 upstream.
+
+Since 429e3d123d9a ("bonding: Fix extraction of ports from the packet
+headers"), header offsets used to compute a hash in bond_xmit_hash() are
+relative to skb->data and not skb->head. If the tail of the header buffer
+of an skb really needs to be advanced and the operation is successful, the
+pointer to the data must be returned (and not a pointer to the head of the
+buffer).
+
+Fixes: 429e3d123d9a ("bonding: Fix extraction of ports from the packet headers")
+Signed-off-by: Jiri Wiesner <jwiesner@suse.de>
+Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/bonding/bond_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -4022,7 +4022,7 @@ static inline const void *bond_pull_data
+ if (likely(n <= hlen))
+ return data;
+ else if (skb && likely(pskb_may_pull(skb, n)))
+- return skb->head;
++ return skb->data;
+
+ return NULL;
+ }
--- /dev/null
+From a258c804aa8742763dce694b5e992d7ccf4294f2 Mon Sep 17 00:00:00 2001
+From: Mateusz Polchlopek <mateusz.polchlopek@intel.com>
+Date: Thu, 12 Oct 2023 08:31:44 -0400
+Subject: docs: fix info about representor identification
+
+From: Mateusz Polchlopek <mateusz.polchlopek@intel.com>
+
+commit a258c804aa8742763dce694b5e992d7ccf4294f2 upstream.
+
+Update the "How are representors identified?" documentation
+subchapter. For newer kernels driver should use
+SET_NETDEV_DEVLINK_PORT instead of ndo_get_devlink_port()
+callback.
+
+Fixes: 7712b3e966ea ("Merge branch 'net-fix-netdev-to-devlink_port-linkage-and-expose-to-user'")
+Signed-off-by: Mateusz Polchlopek <mateusz.polchlopek@intel.com>
+Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Reviewed-by: Edward Cree <ecree.xilinx@gmail.com>
+Link: https://lore.kernel.org/r/20231012123144.15768-1-mateusz.polchlopek@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/networking/representors.rst | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/Documentation/networking/representors.rst b/Documentation/networking/representors.rst
+index ee1f5cd54496..decb39c19b9e 100644
+--- a/Documentation/networking/representors.rst
++++ b/Documentation/networking/representors.rst
+@@ -162,9 +162,11 @@ How are representors identified?
+ The representor netdevice should *not* directly refer to a PCIe device (e.g.
+ through ``net_dev->dev.parent`` / ``SET_NETDEV_DEV()``), either of the
+ representee or of the switchdev function.
+-Instead, it should implement the ``ndo_get_devlink_port()`` netdevice op, which
+-the kernel uses to provide the ``phys_switch_id`` and ``phys_port_name`` sysfs
+-nodes. (Some legacy drivers implement ``ndo_get_port_parent_id()`` and
++Instead, the driver should use the ``SET_NETDEV_DEVLINK_PORT`` macro to
++assign a devlink port instance to the netdevice before registering the
++netdevice; the kernel uses the devlink port to provide the ``phys_switch_id``
++and ``phys_port_name`` sysfs nodes.
++(Some legacy drivers implement ``ndo_get_port_parent_id()`` and
+ ``ndo_get_phys_port_name()`` directly, but this is deprecated.) See
+ :ref:`Documentation/networking/devlink/devlink-port.rst <devlink_port>` for the
+ details of this API.
+--
+2.42.0
+
--- /dev/null
+From 700b2b439766e8aab8a7174991198497345bd411 Mon Sep 17 00:00:00 2001
+From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
+Date: Tue, 17 Oct 2023 08:49:45 +0900
+Subject: fprobe: Fix to ensure the number of active retprobes is not zero
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+commit 700b2b439766e8aab8a7174991198497345bd411 upstream.
+
+The number of active retprobes can be zero but it is not acceptable,
+so return EINVAL error if detected.
+
+Link: https://lore.kernel.org/all/169750018550.186853.11198884812017796410.stgit@devnote2/
+
+Reported-by: wuqiang.matt <wuqiang.matt@bytedance.com>
+Closes: https://lore.kernel.org/all/20231016222103.cb9f426edc60220eabd8aa6a@kernel.org/
+Fixes: 5b0ab78998e3 ("fprobe: Add exit_handler support")
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/fprobe.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/kernel/trace/fprobe.c
++++ b/kernel/trace/fprobe.c
+@@ -189,7 +189,7 @@ static int fprobe_init_rethook(struct fp
+ {
+ int i, size;
+
+- if (num < 0)
++ if (num <= 0)
+ return -EINVAL;
+
+ if (!fp->exit_handler) {
+@@ -202,8 +202,8 @@ static int fprobe_init_rethook(struct fp
+ size = fp->nr_maxactive;
+ else
+ size = num * num_possible_cpus() * 2;
+- if (size < 0)
+- return -E2BIG;
++ if (size <= 0)
++ return -EINVAL;
+
+ fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler);
+ if (!fp->rethook)
--- /dev/null
+From 95535e37e8959f50e7aee365a5bdc9e5ed720443 Mon Sep 17 00:00:00 2001
+From: Shailend Chand <shailend@google.com>
+Date: Sat, 14 Oct 2023 01:41:21 +0000
+Subject: gve: Do not fully free QPL pages on prefill errors
+
+From: Shailend Chand <shailend@google.com>
+
+commit 95535e37e8959f50e7aee365a5bdc9e5ed720443 upstream.
+
+The prefill function should have only removed the page count bias it
+added. Fully freeing the page will cause gve_free_queue_page_list to
+free a page the driver no longer owns.
+
+Fixes: 82fd151d38d9 ("gve: Reduce alloc and copy costs in the GQ rx path")
+Signed-off-by: Shailend Chand <shailend@google.com>
+Link: https://lore.kernel.org/r/20231014014121.2843922-1-shailend@google.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/google/gve/gve_rx.c | 18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
+index d1da7413dc4d..e84a066aa1a4 100644
+--- a/drivers/net/ethernet/google/gve/gve_rx.c
++++ b/drivers/net/ethernet/google/gve/gve_rx.c
+@@ -146,7 +146,7 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
+ err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i],
+ &rx->data.data_ring[i]);
+ if (err)
+- goto alloc_err;
++ goto alloc_err_rda;
+ }
+
+ if (!rx->data.raw_addressing) {
+@@ -171,12 +171,26 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
+ return slots;
+
+ alloc_err_qpl:
++ /* Fully free the copy pool pages. */
+ while (j--) {
+ page_ref_sub(rx->qpl_copy_pool[j].page,
+ rx->qpl_copy_pool[j].pagecnt_bias - 1);
+ put_page(rx->qpl_copy_pool[j].page);
+ }
+-alloc_err:
++
++ /* Do not fully free QPL pages - only remove the bias added in this
++ * function with gve_setup_rx_buffer.
++ */
++ while (i--)
++ page_ref_sub(rx->data.page_info[i].page,
++ rx->data.page_info[i].pagecnt_bias - 1);
++
++ gve_unassign_qpl(priv, rx->data.qpl->id);
++ rx->data.qpl = NULL;
++
++ return err;
++
++alloc_err_rda:
+ while (i--)
+ gve_rx_free_buffer(&priv->pdev->dev,
+ &rx->data.page_info[i],
+--
+2.42.0
+
--- /dev/null
+From fc6f716a5069180c40a8c9b63631e97da34f64a3 Mon Sep 17 00:00:00 2001
+From: Michal Schmidt <mschmidt@redhat.com>
+Date: Wed, 11 Oct 2023 16:33:32 -0700
+Subject: i40e: prevent crash on probe if hw registers have invalid values
+
+From: Michal Schmidt <mschmidt@redhat.com>
+
+commit fc6f716a5069180c40a8c9b63631e97da34f64a3 upstream.
+
+The hardware provides the indexes of the first and the last available
+queue and VF. From the indexes, the driver calculates the numbers of
+queues and VFs. In theory, a faulty device might say the last index is
+smaller than the first index. In that case, the driver's calculation
+would underflow, it would attempt to write to non-existent registers
+outside of the ioremapped range and crash.
+
+I ran into this not by having a faulty device, but by an operator error.
+I accidentally ran a QE test meant for i40e devices on an ice device.
+The test used 'echo i40e > /sys/...ice PCI device.../driver_override',
+bound the driver to the device and crashed in one of the wr32 calls in
+i40e_clear_hw.
+
+Add checks to prevent underflows in the calculations of num_queues and
+num_vfs. With this fix, the wrong device probing reports errors and
+returns a failure without crashing.
+
+Fixes: 838d41d92a90 ("i40e: clear all queues and interrupts")
+Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
+Link: https://lore.kernel.org/r/20231011233334.336092-2-jacob.e.keller@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_common.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
+@@ -1082,7 +1082,7 @@ void i40e_clear_hw(struct i40e_hw *hw)
+ I40E_PFLAN_QALLOC_FIRSTQ_SHIFT;
+ j = (val & I40E_PFLAN_QALLOC_LASTQ_MASK) >>
+ I40E_PFLAN_QALLOC_LASTQ_SHIFT;
+- if (val & I40E_PFLAN_QALLOC_VALID_MASK)
++ if (val & I40E_PFLAN_QALLOC_VALID_MASK && j >= base_queue)
+ num_queues = (j - base_queue) + 1;
+ else
+ num_queues = 0;
+@@ -1092,7 +1092,7 @@ void i40e_clear_hw(struct i40e_hw *hw)
+ I40E_PF_VT_PFALLOC_FIRSTVF_SHIFT;
+ j = (val & I40E_PF_VT_PFALLOC_LASTVF_MASK) >>
+ I40E_PF_VT_PFALLOC_LASTVF_SHIFT;
+- if (val & I40E_PF_VT_PFALLOC_VALID_MASK)
++ if (val & I40E_PF_VT_PFALLOC_VALID_MASK && j >= i)
+ num_vfs = (j - i) + 1;
+ else
+ num_vfs = 0;
--- /dev/null
+From 195374d893681da43a39796e53b30ac4f20400c4 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 17 Oct 2023 19:23:04 +0000
+Subject: ipv4: fib: annotate races around nh->nh_saddr_genid and nh->nh_saddr
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 195374d893681da43a39796e53b30ac4f20400c4 upstream.
+
+syzbot reported a data-race while accessing nh->nh_saddr_genid [1]
+
+Add annotations, but leave the code lazy as intended.
+
+[1]
+BUG: KCSAN: data-race in fib_select_path / fib_select_path
+
+write to 0xffff8881387166f0 of 4 bytes by task 6778 on cpu 1:
+fib_info_update_nhc_saddr net/ipv4/fib_semantics.c:1334 [inline]
+fib_result_prefsrc net/ipv4/fib_semantics.c:1354 [inline]
+fib_select_path+0x292/0x330 net/ipv4/fib_semantics.c:2269
+ip_route_output_key_hash_rcu+0x659/0x12c0 net/ipv4/route.c:2810
+ip_route_output_key_hash net/ipv4/route.c:2644 [inline]
+__ip_route_output_key include/net/route.h:134 [inline]
+ip_route_output_flow+0xa6/0x150 net/ipv4/route.c:2872
+send4+0x1f5/0x520 drivers/net/wireguard/socket.c:61
+wg_socket_send_skb_to_peer+0x94/0x130 drivers/net/wireguard/socket.c:175
+wg_socket_send_buffer_to_peer+0xd6/0x100 drivers/net/wireguard/socket.c:200
+wg_packet_send_handshake_initiation drivers/net/wireguard/send.c:40 [inline]
+wg_packet_handshake_send_worker+0x10c/0x150 drivers/net/wireguard/send.c:51
+process_one_work kernel/workqueue.c:2630 [inline]
+process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703
+worker_thread+0x525/0x730 kernel/workqueue.c:2784
+kthread+0x1d7/0x210 kernel/kthread.c:388
+ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147
+ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
+
+read to 0xffff8881387166f0 of 4 bytes by task 6759 on cpu 0:
+fib_result_prefsrc net/ipv4/fib_semantics.c:1350 [inline]
+fib_select_path+0x1cb/0x330 net/ipv4/fib_semantics.c:2269
+ip_route_output_key_hash_rcu+0x659/0x12c0 net/ipv4/route.c:2810
+ip_route_output_key_hash net/ipv4/route.c:2644 [inline]
+__ip_route_output_key include/net/route.h:134 [inline]
+ip_route_output_flow+0xa6/0x150 net/ipv4/route.c:2872
+send4+0x1f5/0x520 drivers/net/wireguard/socket.c:61
+wg_socket_send_skb_to_peer+0x94/0x130 drivers/net/wireguard/socket.c:175
+wg_socket_send_buffer_to_peer+0xd6/0x100 drivers/net/wireguard/socket.c:200
+wg_packet_send_handshake_initiation drivers/net/wireguard/send.c:40 [inline]
+wg_packet_handshake_send_worker+0x10c/0x150 drivers/net/wireguard/send.c:51
+process_one_work kernel/workqueue.c:2630 [inline]
+process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703
+worker_thread+0x525/0x730 kernel/workqueue.c:2784
+kthread+0x1d7/0x210 kernel/kthread.c:388
+ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147
+ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
+
+value changed: 0x959d3217 -> 0x959d3218
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 6759 Comm: kworker/u4:15 Not tainted 6.6.0-rc4-syzkaller-00029-gcbf3a2cb156a #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/06/2023
+Workqueue: wg-kex-wg1 wg_packet_handshake_send_worker
+
+Fixes: 436c3b66ec98 ("ipv4: Invalidate nexthop cache nh_saddr more correctly.")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20231017192304.82626-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_semantics.c | 14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -1325,15 +1325,18 @@ __be32 fib_info_update_nhc_saddr(struct
+ unsigned char scope)
+ {
+ struct fib_nh *nh;
++ __be32 saddr;
+
+ if (nhc->nhc_family != AF_INET)
+ return inet_select_addr(nhc->nhc_dev, 0, scope);
+
+ nh = container_of(nhc, struct fib_nh, nh_common);
+- nh->nh_saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope);
+- nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
++ saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope);
+
+- return nh->nh_saddr;
++ WRITE_ONCE(nh->nh_saddr, saddr);
++ WRITE_ONCE(nh->nh_saddr_genid, atomic_read(&net->ipv4.dev_addr_genid));
++
++ return saddr;
+ }
+
+ __be32 fib_result_prefsrc(struct net *net, struct fib_result *res)
+@@ -1347,8 +1350,9 @@ __be32 fib_result_prefsrc(struct net *ne
+ struct fib_nh *nh;
+
+ nh = container_of(nhc, struct fib_nh, nh_common);
+- if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid))
+- return nh->nh_saddr;
++ if (READ_ONCE(nh->nh_saddr_genid) ==
++ atomic_read(&net->ipv4.dev_addr_genid))
++ return READ_ONCE(nh->nh_saddr);
+ }
+
+ return fib_info_update_nhc_saddr(net, nhc, res->fi->fib_scope);
--- /dev/null
+From 2915240eddba96b37de4c7e9a3d0ac6f9548454b Mon Sep 17 00:00:00 2001
+From: Geert Uytterhoeven <geert+renesas@glider.be>
+Date: Mon, 16 Oct 2023 14:49:04 +0200
+Subject: neighbor: tracing: Move pin6 inside CONFIG_IPV6=y section
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Geert Uytterhoeven <geert+renesas@glider.be>
+
+commit 2915240eddba96b37de4c7e9a3d0ac6f9548454b upstream.
+
+When CONFIG_IPV6=n, and building with W=1:
+
+ In file included from include/trace/define_trace.h:102,
+ from include/trace/events/neigh.h:255,
+ from net/core/net-traces.c:51:
+ include/trace/events/neigh.h: In function ‘trace_event_raw_event_neigh_create’:
+ include/trace/events/neigh.h:42:34: error: variable ‘pin6’ set but not used [-Werror=unused-but-set-variable]
+ 42 | struct in6_addr *pin6;
+ | ^~~~
+ include/trace/trace_events.h:402:11: note: in definition of macro ‘DECLARE_EVENT_CLASS’
+ 402 | { assign; } \
+ | ^~~~~~
+ include/trace/trace_events.h:44:30: note: in expansion of macro ‘PARAMS’
+ 44 | PARAMS(assign), \
+ | ^~~~~~
+ include/trace/events/neigh.h:23:1: note: in expansion of macro ‘TRACE_EVENT’
+ 23 | TRACE_EVENT(neigh_create,
+ | ^~~~~~~~~~~
+ include/trace/events/neigh.h:41:9: note: in expansion of macro ‘TP_fast_assign’
+ 41 | TP_fast_assign(
+ | ^~~~~~~~~~~~~~
+ In file included from include/trace/define_trace.h:103,
+ from include/trace/events/neigh.h:255,
+ from net/core/net-traces.c:51:
+ include/trace/events/neigh.h: In function ‘perf_trace_neigh_create’:
+ include/trace/events/neigh.h:42:34: error: variable ‘pin6’ set but not used [-Werror=unused-but-set-variable]
+ 42 | struct in6_addr *pin6;
+ | ^~~~
+ include/trace/perf.h:51:11: note: in definition of macro ‘DECLARE_EVENT_CLASS’
+ 51 | { assign; } \
+ | ^~~~~~
+ include/trace/trace_events.h:44:30: note: in expansion of macro ‘PARAMS’
+ 44 | PARAMS(assign), \
+ | ^~~~~~
+ include/trace/events/neigh.h:23:1: note: in expansion of macro ‘TRACE_EVENT’
+ 23 | TRACE_EVENT(neigh_create,
+ | ^~~~~~~~~~~
+ include/trace/events/neigh.h:41:9: note: in expansion of macro ‘TP_fast_assign’
+ 41 | TP_fast_assign(
+ | ^~~~~~~~~~~~~~
+
+Indeed, the variable pin6 is declared and initialized unconditionally,
+while it is only used and needlessly re-initialized when support for
+IPv6 is enabled.
+
+Fix this by dropping the unused variable initialization, and moving the
+variable declaration inside the existing section protected by a check
+for CONFIG_IPV6.
+
+Fixes: fc651001d2c5ca4f ("neighbor: Add tracepoint to __neigh_create")
+Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Simon Horman <horms@kernel.org> # build-tested
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/trace/events/neigh.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/include/trace/events/neigh.h
++++ b/include/trace/events/neigh.h
+@@ -39,7 +39,6 @@ TRACE_EVENT(neigh_create,
+ ),
+
+ TP_fast_assign(
+- struct in6_addr *pin6;
+ __be32 *p32;
+
+ __entry->family = tbl->family;
+@@ -47,7 +46,6 @@ TRACE_EVENT(neigh_create,
+ __entry->entries = atomic_read(&tbl->gc_entries);
+ __entry->created = n != NULL;
+ __entry->gc_exempt = exempt_from_gc;
+- pin6 = (struct in6_addr *)__entry->primary_key6;
+ p32 = (__be32 *)__entry->primary_key4;
+
+ if (tbl->family == AF_INET)
+@@ -57,6 +55,8 @@ TRACE_EVENT(neigh_create,
+
+ #if IS_ENABLED(CONFIG_IPV6)
+ if (tbl->family == AF_INET6) {
++ struct in6_addr *pin6;
++
+ pin6 = (struct in6_addr *)__entry->primary_key6;
+ *pin6 = *(struct in6_addr *)pkey;
+ }
--- /dev/null
+From 1a83f4a7c156fa6bbd6b530e89fa3270bf3d9d1b Mon Sep 17 00:00:00 2001
+From: Jakub Kicinski <kuba@kernel.org>
+Date: Tue, 17 Oct 2023 18:38:15 -0700
+Subject: net: avoid UAF on deleted altname
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+commit 1a83f4a7c156fa6bbd6b530e89fa3270bf3d9d1b upstream.
+
+Altnames are accessed under RCU (dev_get_by_name_rcu())
+but freed by kfree() with no synchronization point.
+
+Each node has one or two allocations (node and a variable-size
+name, sometimes the name is netdev->name). Adding rcu_heads
+here is a bit tedious. Besides most code which unlists the names
+already has rcu barriers - so take the simpler approach of adding
+synchronize_rcu(). Note that the one on the unregistration path
+(which matters more) is removed by the next fix.
+
+Fixes: ff92741270bf ("net: introduce name_node struct to be used in hashlist")
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -343,7 +343,6 @@ int netdev_name_node_alt_create(struct n
+ static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
+ {
+ list_del(&name_node->list);
+- netdev_name_node_del(name_node);
+ kfree(name_node->name);
+ netdev_name_node_free(name_node);
+ }
+@@ -362,6 +361,8 @@ int netdev_name_node_alt_destroy(struct
+ if (name_node == dev->name_node || name_node->dev != dev)
+ return -EINVAL;
+
++ netdev_name_node_del(name_node);
++ synchronize_rcu();
+ __netdev_name_node_alt_destroy(name_node);
+
+ return 0;
+@@ -10838,6 +10839,7 @@ void unregister_netdevice_many_notify(st
+ synchronize_net();
+
+ list_for_each_entry(dev, head, unreg_list) {
++ struct netdev_name_node *name_node;
+ struct sk_buff *skb = NULL;
+
+ /* Shutdown queueing discipline. */
+@@ -10865,6 +10867,9 @@ void unregister_netdevice_many_notify(st
+ dev_uc_flush(dev);
+ dev_mc_flush(dev);
+
++ netdev_for_each_altname(dev, name_node)
++ netdev_name_node_del(name_node);
++ synchronize_rcu();
+ netdev_name_node_alt_flush(dev);
+ netdev_name_node_free(dev->name_node);
+
--- /dev/null
+From 7663d522099ecc464512164e660bc771b2ff7b64 Mon Sep 17 00:00:00 2001
+From: Jakub Kicinski <kuba@kernel.org>
+Date: Tue, 17 Oct 2023 18:38:14 -0700
+Subject: net: check for altname conflicts when changing netdev's netns
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+commit 7663d522099ecc464512164e660bc771b2ff7b64 upstream.
+
+It's currently possible to create an altname conflicting
+with an altname or real name of another device by creating
+it in another netns and moving it over:
+
+ [ ~]$ ip link add dev eth0 type dummy
+
+ [ ~]$ ip netns add test
+ [ ~]$ ip -netns test link add dev ethX netns test type dummy
+ [ ~]$ ip -netns test link property add dev ethX altname eth0
+ [ ~]$ ip -netns test link set dev ethX netns 1
+
+ [ ~]$ ip link
+ ...
+ 3: eth0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
+ link/ether 02:40:88:62:ec:b8 brd ff:ff:ff:ff:ff:ff
+ ...
+ 5: ethX: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
+ link/ether 26:b7:28:78:38:0f brd ff:ff:ff:ff:ff:ff
+ altname eth0
+
+Create a macro for walking the altnames, this hopefully makes
+it clearer that the list we walk contains only altnames.
+Which is otherwise not entirely intuitive.
+
+Fixes: 36fbf1e52bd3 ("net: rtnetlink: add linkprop commands to add and delete alternative ifnames")
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c | 9 ++++++++-
+ net/core/dev.h | 3 +++
+ 2 files changed, 11 insertions(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -1079,7 +1079,8 @@ static int __dev_alloc_name(struct net *
+
+ for_each_netdev(net, d) {
+ struct netdev_name_node *name_node;
+- list_for_each_entry(name_node, &d->name_node->list, list) {
++
++ netdev_for_each_altname(d, name_node) {
+ if (!sscanf(name_node->name, name, &i))
+ continue;
+ if (i < 0 || i >= max_netdevices)
+@@ -10968,6 +10969,7 @@ EXPORT_SYMBOL(unregister_netdev);
+ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
+ const char *pat, int new_ifindex)
+ {
++ struct netdev_name_node *name_node;
+ struct net *net_old = dev_net(dev);
+ char new_name[IFNAMSIZ] = {};
+ int err, new_nsid;
+@@ -11000,6 +11002,11 @@ int __dev_change_net_namespace(struct ne
+ if (err < 0)
+ goto out;
+ }
++ /* Check that none of the altnames conflicts. */
++ err = -EEXIST;
++ netdev_for_each_altname(dev, name_node)
++ if (netdev_name_in_use(net, name_node->name))
++ goto out;
+
+ /* Check that new_ifindex isn't used yet. */
+ err = -EBUSY;
+--- a/net/core/dev.h
++++ b/net/core/dev.h
+@@ -62,6 +62,9 @@ struct netdev_name_node {
+ int netdev_get_name(struct net *net, char *name, int ifindex);
+ int dev_change_name(struct net_device *dev, const char *newname);
+
++#define netdev_for_each_altname(dev, namenode) \
++ list_for_each_entry((namenode), &(dev)->name_node->list, list)
++
+ int netdev_name_node_alt_create(struct net_device *dev, const char *name);
+ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name);
+
--- /dev/null
+From 61b40cefe51af005c72dbdcf975a3d166c6e6406 Mon Sep 17 00:00:00 2001
+From: Jinjie Ruan <ruanjinjie@huawei.com>
+Date: Wed, 11 Oct 2023 11:24:19 +0800
+Subject: net: dsa: bcm_sf2: Fix possible memory leak in bcm_sf2_mdio_register()
+
+From: Jinjie Ruan <ruanjinjie@huawei.com>
+
+commit 61b40cefe51af005c72dbdcf975a3d166c6e6406 upstream.
+
+In bcm_sf2_mdio_register(), the class_find_device() will call get_device()
+to increment reference count for priv->master_mii_bus->dev if
+of_mdio_find_bus() succeeds. If mdiobus_alloc() or mdiobus_register()
+fails, it will call get_device() twice without decrement reference count
+for the device. And it is the same if bcm_sf2_mdio_register() succeeds but
+fails in bcm_sf2_sw_probe(), or if bcm_sf2_sw_probe() succeeds. If the
+reference count has not decremented to zero, the dev related resource will
+not be freed.
+
+So remove the get_device() in bcm_sf2_mdio_register(), and call
+put_device() if mdiobus_alloc() or mdiobus_register() fails and in
+bcm_sf2_mdio_unregister() to solve the issue.
+
+And as Simon suggested, unwind from errors for bcm_sf2_mdio_register() and
+just return 0 if it succeeds to make it cleaner.
+
+Fixes: 461cd1b03e32 ("net: dsa: bcm_sf2: Register our slave MDIO bus")
+Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
+Suggested-by: Simon Horman <horms@kernel.org>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Link: https://lore.kernel.org/r/20231011032419.2423290-1-ruanjinjie@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/dsa/bcm_sf2.c | 24 +++++++++++++++---------
+ 1 file changed, 15 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/dsa/bcm_sf2.c
++++ b/drivers/net/dsa/bcm_sf2.c
+@@ -617,17 +617,16 @@ static int bcm_sf2_mdio_register(struct
+ dn = of_find_compatible_node(NULL, NULL, "brcm,unimac-mdio");
+ priv->master_mii_bus = of_mdio_find_bus(dn);
+ if (!priv->master_mii_bus) {
+- of_node_put(dn);
+- return -EPROBE_DEFER;
++ err = -EPROBE_DEFER;
++ goto err_of_node_put;
+ }
+
+- get_device(&priv->master_mii_bus->dev);
+ priv->master_mii_dn = dn;
+
+ priv->slave_mii_bus = mdiobus_alloc();
+ if (!priv->slave_mii_bus) {
+- of_node_put(dn);
+- return -ENOMEM;
++ err = -ENOMEM;
++ goto err_put_master_mii_bus_dev;
+ }
+
+ priv->slave_mii_bus->priv = priv;
+@@ -684,11 +683,17 @@ static int bcm_sf2_mdio_register(struct
+ }
+
+ err = mdiobus_register(priv->slave_mii_bus);
+- if (err && dn) {
+- mdiobus_free(priv->slave_mii_bus);
+- of_node_put(dn);
+- }
++ if (err && dn)
++ goto err_free_slave_mii_bus;
++
++ return 0;
+
++err_free_slave_mii_bus:
++ mdiobus_free(priv->slave_mii_bus);
++err_put_master_mii_bus_dev:
++ put_device(&priv->master_mii_bus->dev);
++err_of_node_put:
++ of_node_put(dn);
+ return err;
+ }
+
+@@ -696,6 +701,7 @@ static void bcm_sf2_mdio_unregister(stru
+ {
+ mdiobus_unregister(priv->slave_mii_bus);
+ mdiobus_free(priv->slave_mii_bus);
++ put_device(&priv->master_mii_bus->dev);
+ of_node_put(priv->master_mii_dn);
+ }
+
--- /dev/null
+From 311cca40661f428b7aa114fb5af578cfdbe3e8b6 Mon Sep 17 00:00:00 2001
+From: Jakub Kicinski <kuba@kernel.org>
+Date: Tue, 17 Oct 2023 18:38:13 -0700
+Subject: net: fix ifname in netlink ntf during netns move
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+commit 311cca40661f428b7aa114fb5af578cfdbe3e8b6 upstream.
+
+dev_get_valid_name() overwrites the netdev's name on success.
+This makes it hard to use in prepare-commit-like fashion,
+where we do validation first, and "commit" to the change
+later.
+
+Factor out a helper which lets us save the new name to a buffer.
+Use it to fix the problem of notification on netns move having
+incorrect name:
+
+ 5: eth0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
+ link/ether be:4d:58:f9:d5:40 brd ff:ff:ff:ff:ff:ff
+ 6: eth1: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
+ link/ether 1e:4a:34:36:e3:cd brd ff:ff:ff:ff:ff:ff
+
+ [ ~]# ip link set dev eth0 netns 1 name eth1
+
+ip monitor inside netns:
+ Deleted inet eth0
+ Deleted inet6 eth0
+ Deleted 5: eth1: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN group default
+ link/ether be:4d:58:f9:d5:40 brd ff:ff:ff:ff:ff:ff new-netnsid 0 new-ifindex 7
+
+Name is reported as eth1 in old netns for ifindex 5, already renamed.
+
+Fixes: d90310243fd7 ("net: device name allocation cleanups")
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c | 44 +++++++++++++++++++++++++++++++-------------
+ 1 file changed, 31 insertions(+), 13 deletions(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -1116,6 +1116,26 @@ static int __dev_alloc_name(struct net *
+ return -ENFILE;
+ }
+
++static int dev_prep_valid_name(struct net *net, struct net_device *dev,
++ const char *want_name, char *out_name)
++{
++ int ret;
++
++ if (!dev_valid_name(want_name))
++ return -EINVAL;
++
++ if (strchr(want_name, '%')) {
++ ret = __dev_alloc_name(net, want_name, out_name);
++ return ret < 0 ? ret : 0;
++ } else if (netdev_name_in_use(net, want_name)) {
++ return -EEXIST;
++ } else if (out_name != want_name) {
++ strscpy(out_name, want_name, IFNAMSIZ);
++ }
++
++ return 0;
++}
++
+ static int dev_alloc_name_ns(struct net *net,
+ struct net_device *dev,
+ const char *name)
+@@ -1153,19 +1173,13 @@ EXPORT_SYMBOL(dev_alloc_name);
+ static int dev_get_valid_name(struct net *net, struct net_device *dev,
+ const char *name)
+ {
+- BUG_ON(!net);
+-
+- if (!dev_valid_name(name))
+- return -EINVAL;
+-
+- if (strchr(name, '%'))
+- return dev_alloc_name_ns(net, dev, name);
+- else if (netdev_name_in_use(net, name))
+- return -EEXIST;
+- else if (dev->name != name)
+- strscpy(dev->name, name, IFNAMSIZ);
++ char buf[IFNAMSIZ];
++ int ret;
+
+- return 0;
++ ret = dev_prep_valid_name(net, dev, name, buf);
++ if (ret >= 0)
++ strscpy(dev->name, buf, IFNAMSIZ);
++ return ret;
+ }
+
+ /**
+@@ -10955,6 +10969,7 @@ int __dev_change_net_namespace(struct ne
+ const char *pat, int new_ifindex)
+ {
+ struct net *net_old = dev_net(dev);
++ char new_name[IFNAMSIZ] = {};
+ int err, new_nsid;
+
+ ASSERT_RTNL();
+@@ -10981,7 +10996,7 @@ int __dev_change_net_namespace(struct ne
+ /* We get here if we can't use the current device name */
+ if (!pat)
+ goto out;
+- err = dev_get_valid_name(net, dev, pat);
++ err = dev_prep_valid_name(net, dev, pat, new_name);
+ if (err < 0)
+ goto out;
+ }
+@@ -11049,6 +11064,9 @@ int __dev_change_net_namespace(struct ne
+ kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
+ netdev_adjacent_add_links(dev);
+
++ if (new_name[0]) /* Rename the netdev to prepared name */
++ strscpy(dev->name, new_name, IFNAMSIZ);
++
+ /* Fixup kobjects */
+ err = device_rename(&dev->dev, dev->name);
+ WARN_ON(err);
--- /dev/null
+From 513f61e2193350c7a345da98559b80f61aec4fa6 Mon Sep 17 00:00:00 2001
+From: Ma Ke <make_ruc2021@163.com>
+Date: Mon, 9 Oct 2023 09:13:37 +0800
+Subject: net: ipv4: fix return value check in esp_remove_trailer
+
+From: Ma Ke <make_ruc2021@163.com>
+
+commit 513f61e2193350c7a345da98559b80f61aec4fa6 upstream.
+
+In esp_remove_trailer(), to avoid an unexpected result returned by
+pskb_trim, we should check the return value of pskb_trim().
+
+Signed-off-by: Ma Ke <make_ruc2021@163.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/esp4.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/esp4.c
++++ b/net/ipv4/esp4.c
+@@ -732,7 +732,9 @@ static inline int esp_remove_trailer(str
+ skb->csum = csum_block_sub(skb->csum, csumdiff,
+ skb->len - trimlen);
+ }
+- pskb_trim(skb, skb->len - trimlen);
++ ret = pskb_trim(skb, skb->len - trimlen);
++ if (unlikely(ret))
++ return ret;
+
+ ret = nexthdr[1];
+
--- /dev/null
+From dad4e491e30b20f4dc615c9da65d2142d703b5c2 Mon Sep 17 00:00:00 2001
+From: Ma Ke <make_ruc2021@163.com>
+Date: Sat, 7 Oct 2023 08:59:53 +0800
+Subject: net: ipv6: fix return value check in esp_remove_trailer
+
+From: Ma Ke <make_ruc2021@163.com>
+
+commit dad4e491e30b20f4dc615c9da65d2142d703b5c2 upstream.
+
+In esp_remove_trailer(), to avoid an unexpected result returned by
+pskb_trim, we should check the return value of pskb_trim().
+
+Signed-off-by: Ma Ke <make_ruc2021@163.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/esp6.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/ipv6/esp6.c
++++ b/net/ipv6/esp6.c
+@@ -770,7 +770,9 @@ static inline int esp_remove_trailer(str
+ skb->csum = csum_block_sub(skb->csum, csumdiff,
+ skb->len - trimlen);
+ }
+- pskb_trim(skb, skb->len - trimlen);
++ ret = pskb_trim(skb, skb->len - trimlen);
++ if (unlikely(ret))
++ return ret;
+
+ ret = nexthdr[1];
+
--- /dev/null
+From 1f9f2143f24e224a8582a5d54918c43b9121eccc Mon Sep 17 00:00:00 2001
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+Date: Tue, 17 Oct 2023 17:31:44 +0300
+Subject: net: mdio-mux: fix C45 access returning -EIO after API change
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+commit 1f9f2143f24e224a8582a5d54918c43b9121eccc upstream.
+
+The mii_bus API conversion to read_c45() and write_c45() did not cover
+the mdio-mux driver before read() and write() were made C22-only.
+
+This broke arch/arm64/boot/dts/freescale/fsl-ls1028a-qds-13bb.dtso.
+The -EOPNOTSUPP from mdiobus_c45_read() is transformed by
+get_phy_c45_devs_in_pkg() into -EIO, is further propagated to
+of_mdiobus_register() and this makes the mdio-mux driver fail to probe
+the entire child buses, not just the PHYs that cause access errors.
+
+Fix the regression by introducing special c45 read and write accessors
+to mdio-mux which forward the operation to the parent MDIO bus.
+
+Fixes: db1a63aed89c ("net: phy: Remove fallback to old C45 method")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Link: https://lore.kernel.org/r/20231017143144.3212657-1-vladimir.oltean@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/mdio/mdio-mux.c | 47 +++++++++++++++++++++++++++++++++++++
+ 1 file changed, 47 insertions(+)
+
+diff --git a/drivers/net/mdio/mdio-mux.c b/drivers/net/mdio/mdio-mux.c
+index a881e3523328..bef4cce71287 100644
+--- a/drivers/net/mdio/mdio-mux.c
++++ b/drivers/net/mdio/mdio-mux.c
+@@ -55,6 +55,27 @@ out:
+ return r;
+ }
+
++static int mdio_mux_read_c45(struct mii_bus *bus, int phy_id, int dev_addr,
++ int regnum)
++{
++ struct mdio_mux_child_bus *cb = bus->priv;
++ struct mdio_mux_parent_bus *pb = cb->parent;
++ int r;
++
++ mutex_lock_nested(&pb->mii_bus->mdio_lock, MDIO_MUTEX_MUX);
++ r = pb->switch_fn(pb->current_child, cb->bus_number, pb->switch_data);
++ if (r)
++ goto out;
++
++ pb->current_child = cb->bus_number;
++
++ r = pb->mii_bus->read_c45(pb->mii_bus, phy_id, dev_addr, regnum);
++out:
++ mutex_unlock(&pb->mii_bus->mdio_lock);
++
++ return r;
++}
++
+ /*
+ * The parent bus' lock is used to order access to the switch_fn.
+ */
+@@ -80,6 +101,28 @@ out:
+ return r;
+ }
+
++static int mdio_mux_write_c45(struct mii_bus *bus, int phy_id, int dev_addr,
++ int regnum, u16 val)
++{
++ struct mdio_mux_child_bus *cb = bus->priv;
++ struct mdio_mux_parent_bus *pb = cb->parent;
++
++ int r;
++
++ mutex_lock_nested(&pb->mii_bus->mdio_lock, MDIO_MUTEX_MUX);
++ r = pb->switch_fn(pb->current_child, cb->bus_number, pb->switch_data);
++ if (r)
++ goto out;
++
++ pb->current_child = cb->bus_number;
++
++ r = pb->mii_bus->write_c45(pb->mii_bus, phy_id, dev_addr, regnum, val);
++out:
++ mutex_unlock(&pb->mii_bus->mdio_lock);
++
++ return r;
++}
++
+ static int parent_count;
+
+ static void mdio_mux_uninit_children(struct mdio_mux_parent_bus *pb)
+@@ -173,6 +216,10 @@ int mdio_mux_init(struct device *dev,
+ cb->mii_bus->parent = dev;
+ cb->mii_bus->read = mdio_mux_read;
+ cb->mii_bus->write = mdio_mux_write;
++ if (parent_bus->read_c45)
++ cb->mii_bus->read_c45 = mdio_mux_read_c45;
++ if (parent_bus->write_c45)
++ cb->mii_bus->write_c45 = mdio_mux_write_c45;
+ r = of_mdiobus_register(cb->mii_bus, child_bus_node);
+ if (r) {
+ mdiobus_free(cb->mii_bus);
+--
+2.42.0
+
--- /dev/null
+From fc8b2a619469378717e7270d2a4e1ef93c585f7a Mon Sep 17 00:00:00 2001
+From: Willem de Bruijn <willemb@google.com>
+Date: Wed, 11 Oct 2023 10:01:14 -0400
+Subject: net: more strict VIRTIO_NET_HDR_GSO_UDP_L4 validation
+
+From: Willem de Bruijn <willemb@google.com>
+
+commit fc8b2a619469378717e7270d2a4e1ef93c585f7a upstream.
+
+Syzbot reported two new paths to hit an internal WARNING using the
+new virtio gso type VIRTIO_NET_HDR_GSO_UDP_L4.
+
+ RIP: 0010:skb_checksum_help+0x4a2/0x600 net/core/dev.c:3260
+ skb len=64521 gso_size=344
+and
+
+ RIP: 0010:skb_warn_bad_offload+0x118/0x240 net/core/dev.c:3262
+
+Older virtio types have historically had loose restrictions, leading
+to many entirely impractical fuzzer generated packets causing
+problems deep in the kernel stack. Ideally, we would have had strict
+validation for all types from the start.
+
+New virtio types can have tighter validation. Limit UDP GSO packets
+inserted via virtio to the same limits imposed by the UDP_SEGMENT
+socket interface:
+
+1. must use checksum offload
+2. checksum offload matches UDP header
+3. no more segments than UDP_MAX_SEGMENTS
+4. UDP GSO does not take modifier flags, notably SKB_GSO_TCP_ECN
+
+Fixes: 860b7f27b8f7 ("linux/virtio_net.h: Support USO offload in vnet header.")
+Reported-by: syzbot+01cdbc31e9c0ae9b33ac@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/netdev/0000000000005039270605eb0b7f@google.com/
+Reported-by: syzbot+c99d835ff081ca30f986@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/netdev/0000000000005426680605eb0b9f@google.com/
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/virtio_net.h | 19 ++++++++++++++++---
+ 1 file changed, 16 insertions(+), 3 deletions(-)
+
+diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
+index 7b4dd69555e4..27cc1d464321 100644
+--- a/include/linux/virtio_net.h
++++ b/include/linux/virtio_net.h
+@@ -3,8 +3,8 @@
+ #define _LINUX_VIRTIO_NET_H
+
+ #include <linux/if_vlan.h>
++#include <linux/udp.h>
+ #include <uapi/linux/tcp.h>
+-#include <uapi/linux/udp.h>
+ #include <uapi/linux/virtio_net.h>
+
+ static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type)
+@@ -151,9 +151,22 @@ retry:
+ unsigned int nh_off = p_off;
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+- /* UFO may not include transport header in gso_size. */
+- if (gso_type & SKB_GSO_UDP)
++ switch (gso_type & ~SKB_GSO_TCP_ECN) {
++ case SKB_GSO_UDP:
++ /* UFO may not include transport header in gso_size. */
+ nh_off -= thlen;
++ break;
++ case SKB_GSO_UDP_L4:
++ if (!(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM))
++ return -EINVAL;
++ if (skb->csum_offset != offsetof(struct udphdr, check))
++ return -EINVAL;
++ if (skb->len - p_off > gso_size * UDP_MAX_SEGMENTS)
++ return -EINVAL;
++ if (gso_type != SKB_GSO_UDP_L4)
++ return -EINVAL;
++ break;
++ }
+
+ /* Kernel has a special handling for GSO_BY_FRAGS. */
+ if (gso_size == GSO_BY_FRAGS)
+--
+2.42.0
+
--- /dev/null
+From 6200e00e112ce2d17b066a20dd2476d9aecbefa6 Mon Sep 17 00:00:00 2001
+From: Florian Fainelli <florian.fainelli@broadcom.com>
+Date: Tue, 17 Oct 2023 13:51:19 -0700
+Subject: net: phy: bcm7xxx: Add missing 16nm EPHY statistics
+
+From: Florian Fainelli <florian.fainelli@broadcom.com>
+
+commit 6200e00e112ce2d17b066a20dd2476d9aecbefa6 upstream.
+
+The .probe() function would allocate the necessary space and ensure that
+the library call sizes the number of statistics but the callbacks
+necessary to fetch the name and values were not wired up.
+
+Reported-by: Justin Chen <justin.chen@broadcom.com>
+Fixes: f68d08c437f9 ("net: phy: bcm7xxx: Add EPHY entry for 72165")
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20231017205119.416392-1-florian.fainelli@broadcom.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/bcm7xxx.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/net/phy/bcm7xxx.c
++++ b/drivers/net/phy/bcm7xxx.c
+@@ -894,6 +894,9 @@ static int bcm7xxx_28nm_probe(struct phy
+ .name = _name, \
+ /* PHY_BASIC_FEATURES */ \
+ .flags = PHY_IS_INTERNAL, \
++ .get_sset_count = bcm_phy_get_sset_count, \
++ .get_strings = bcm_phy_get_strings, \
++ .get_stats = bcm7xxx_28nm_get_phy_stats, \
+ .probe = bcm7xxx_28nm_probe, \
+ .config_init = bcm7xxx_16nm_ephy_config_init, \
+ .config_aneg = genphy_config_aneg, \
--- /dev/null
+From 1d30162f35c7a73fc2f8cdcdcdbd690bedb99d1a Mon Sep 17 00:00:00 2001
+From: Gavrilov Ilia <Ilia.Gavrilov@infotecs.ru>
+Date: Mon, 16 Oct 2023 14:08:59 +0000
+Subject: net: pktgen: Fix interface flags printing
+
+From: Gavrilov Ilia <Ilia.Gavrilov@infotecs.ru>
+
+commit 1d30162f35c7a73fc2f8cdcdcdbd690bedb99d1a upstream.
+
+Device flags are displayed incorrectly:
+1) The comparison (i == F_FLOW_SEQ) is always false, because F_FLOW_SEQ
+is equal to (1 << FLOW_SEQ_SHIFT) == 2048, and the maximum value
+of the 'i' variable is (NR_PKT_FLAG - 1) == 17. It should be compared
+with FLOW_SEQ_SHIFT.
+
+2) Similarly to the F_IPSEC flag.
+
+3) Also add spaces to the print end of the string literal "spi:%u"
+to prevent the output from merging with the flag that follows.
+
+Found by InfoTeCS on behalf of Linux Verification Center
+(linuxtesting.org) with SVACE.
+
+Fixes: 99c6d3d20d62 ("pktgen: Remove brute-force printing of flags")
+Signed-off-by: Gavrilov Ilia <Ilia.Gavrilov@infotecs.ru>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/pktgen.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/net/core/pktgen.c
++++ b/net/core/pktgen.c
+@@ -669,19 +669,19 @@ static int pktgen_if_show(struct seq_fil
+ seq_puts(seq, " Flags: ");
+
+ for (i = 0; i < NR_PKT_FLAGS; i++) {
+- if (i == F_FLOW_SEQ)
++ if (i == FLOW_SEQ_SHIFT)
+ if (!pkt_dev->cflows)
+ continue;
+
+- if (pkt_dev->flags & (1 << i))
++ if (pkt_dev->flags & (1 << i)) {
+ seq_printf(seq, "%s ", pkt_flag_names[i]);
+- else if (i == F_FLOW_SEQ)
+- seq_puts(seq, "FLOW_RND ");
+-
+ #ifdef CONFIG_XFRM
+- if (i == F_IPSEC && pkt_dev->spi)
+- seq_printf(seq, "spi:%u", pkt_dev->spi);
++ if (i == IPSEC_SHIFT && pkt_dev->spi)
++ seq_printf(seq, "spi:%u ", pkt_dev->spi);
+ #endif
++ } else if (i == FLOW_SEQ_SHIFT) {
++ seq_puts(seq, "FLOW_RND ");
++ }
+ }
+
+ seq_puts(seq, "\n");
--- /dev/null
+From b2f750c3a80b285cd60c9346f8c96bd0a2a66cde Mon Sep 17 00:00:00 2001
+From: Josua Mayer <josua@solid-run.com>
+Date: Wed, 4 Oct 2023 18:39:28 +0200
+Subject: net: rfkill: gpio: prevent value glitch during probe
+
+From: Josua Mayer <josua@solid-run.com>
+
+commit b2f750c3a80b285cd60c9346f8c96bd0a2a66cde upstream.
+
+When either reset- or shutdown-gpio have are initially deasserted,
+e.g. after a reboot - or when the hardware does not include pull-down,
+there will be a short toggle of both IOs to logical 0 and back to 1.
+
+It seems that the rfkill default is unblocked, so the driver should not
+glitch to output low during probe.
+It can lead e.g. to unexpected lte modem reconnect:
+
+[1] root@localhost:~# dmesg | grep "usb 2-1"
+[ 2.136124] usb 2-1: new SuperSpeed USB device number 2 using xhci-hcd
+[ 21.215278] usb 2-1: USB disconnect, device number 2
+[ 28.833977] usb 2-1: new SuperSpeed USB device number 3 using xhci-hcd
+
+The glitch has been discovered on an arm64 board, now that device-tree
+support for the rfkill-gpio driver has finally appeared :).
+
+Change the flags for devm_gpiod_get_optional from GPIOD_OUT_LOW to
+GPIOD_ASIS to avoid any glitches.
+The rfkill driver will set the intended value during rfkill_sync_work.
+
+Fixes: 7176ba23f8b5 ("net: rfkill: add generic gpio rfkill driver")
+Signed-off-by: Josua Mayer <josua@solid-run.com>
+Link: https://lore.kernel.org/r/20231004163928.14609-1-josua@solid-run.com
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rfkill/rfkill-gpio.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/rfkill/rfkill-gpio.c
++++ b/net/rfkill/rfkill-gpio.c
+@@ -108,13 +108,13 @@ static int rfkill_gpio_probe(struct plat
+
+ rfkill->clk = devm_clk_get(&pdev->dev, NULL);
+
+- gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_OUT_LOW);
++ gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_ASIS);
+ if (IS_ERR(gpio))
+ return PTR_ERR(gpio);
+
+ rfkill->reset_gpio = gpio;
+
+- gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_OUT_LOW);
++ gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_ASIS);
+ if (IS_ERR(gpio))
+ return PTR_ERR(gpio);
+
--- /dev/null
+From a13b67c9a015c4e21601ef9aa4ec9c5d972df1b4 Mon Sep 17 00:00:00 2001
+From: Pedro Tammela <pctammela@mojatatu.com>
+Date: Tue, 17 Oct 2023 11:36:02 -0300
+Subject: net/sched: sch_hfsc: upgrade 'rt' to 'sc' when it becomes a inner curve
+
+From: Pedro Tammela <pctammela@mojatatu.com>
+
+commit a13b67c9a015c4e21601ef9aa4ec9c5d972df1b4 upstream.
+
+Christian Theune says:
+ I upgraded from 6.1.38 to 6.1.55 this morning and it broke my traffic shaping script,
+ leaving me with a non-functional uplink on a remote router.
+
+A 'rt' curve cannot be used as a inner curve (parent class), but we were
+allowing such configurations since the qdisc was introduced. Such
+configurations would trigger a UAF as Budimir explains:
+ The parent will have vttree_insert() called on it in init_vf(),
+ but will not have vttree_remove() called on it in update_vf()
+ because it does not have the HFSC_FSC flag set.
+
+The qdisc always assumes that inner classes have the HFSC_FSC flag set.
+This is by design as it doesn't make sense 'qdisc wise' for an 'rt'
+curve to be an inner curve.
+
+Budimir's original patch disallows users to add classes with a 'rt'
+parent, but this is too strict as it breaks users that have been using
+'rt' as a inner class. Another approach, taken by this patch, is to
+upgrade the inner 'rt' into a 'sc', warning the user in the process.
+It avoids the UAF reported by Budimir while also being more permissive
+to bad scripts/users/code using 'rt' as a inner class.
+
+Users checking the `tc class ls [...]` or `tc class get [...]` dumps would
+observe the curve change and are potentially breaking with this change.
+
+v1->v2: https://lore.kernel.org/all/20231013151057.2611860-1-pctammela@mojatatu.com/
+- Correct 'Fixes' tag and merge with revert (Jakub)
+
+Cc: Christian Theune <ct@flyingcircus.io>
+Cc: Budimir Markovic <markovicbudimir@gmail.com>
+Fixes: b3d26c5702c7 ("net/sched: sch_hfsc: Ensure inner classes have fsc curve")
+Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Link: https://lore.kernel.org/r/20231017143602.3191556-1-pctammela@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_hfsc.c | 18 ++++++++++++++----
+ 1 file changed, 14 insertions(+), 4 deletions(-)
+
+--- a/net/sched/sch_hfsc.c
++++ b/net/sched/sch_hfsc.c
+@@ -903,6 +903,14 @@ hfsc_change_usc(struct hfsc_class *cl, s
+ cl->cl_flags |= HFSC_USC;
+ }
+
++static void
++hfsc_upgrade_rt(struct hfsc_class *cl)
++{
++ cl->cl_fsc = cl->cl_rsc;
++ rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
++ cl->cl_flags |= HFSC_FSC;
++}
++
+ static const struct nla_policy hfsc_policy[TCA_HFSC_MAX + 1] = {
+ [TCA_HFSC_RSC] = { .len = sizeof(struct tc_service_curve) },
+ [TCA_HFSC_FSC] = { .len = sizeof(struct tc_service_curve) },
+@@ -1012,10 +1020,6 @@ hfsc_change_class(struct Qdisc *sch, u32
+ if (parent == NULL)
+ return -ENOENT;
+ }
+- if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) {
+- NL_SET_ERR_MSG(extack, "Invalid parent - parent class must have FSC");
+- return -EINVAL;
+- }
+
+ if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0)
+ return -EINVAL;
+@@ -1066,6 +1070,12 @@ hfsc_change_class(struct Qdisc *sch, u32
+ cl->cf_tree = RB_ROOT;
+
+ sch_tree_lock(sch);
++ /* Check if the inner class is a misconfigured 'rt' */
++ if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) {
++ NL_SET_ERR_MSG(extack,
++ "Forced curve change on parent 'rt' to 'sc'");
++ hfsc_upgrade_rt(parent);
++ }
+ qdisc_class_hash_insert(&q->clhash, &cl->cl_common);
+ list_add_tail(&cl->siblings, &parent->children);
+ if (parent->level == 0)
--- /dev/null
+From c53647a5df9e66dd9fedf240198e1fe50d88c286 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@linaro.org>
+Date: Mon, 16 Oct 2023 20:28:10 +0300
+Subject: net: usb: smsc95xx: Fix an error code in smsc95xx_reset()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+commit c53647a5df9e66dd9fedf240198e1fe50d88c286 upstream.
+
+Return a negative error code instead of success.
+
+Fixes: 2f7ca802bdae ("net: Add SMSC LAN9500 USB2.0 10/100 ethernet adapter driver")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://lore.kernel.org/r/147927f0-9ada-45cc-81ff-75a19dd30b76@moroto.mountain
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/smsc95xx.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/usb/smsc95xx.c
++++ b/drivers/net/usb/smsc95xx.c
+@@ -897,7 +897,7 @@ static int smsc95xx_reset(struct usbnet
+
+ if (timeout >= 100) {
+ netdev_warn(dev->net, "timeout waiting for completion of Lite Reset\n");
+- return ret;
++ return -ETIMEDOUT;
+ }
+
+ ret = smsc95xx_set_mac_address(dev);
--- /dev/null
+From 6d41d4fe28724db16ca1016df0713a07e0cc7448 Mon Sep 17 00:00:00 2001
+From: Dong Chenchen <dongchenchen2@huawei.com>
+Date: Tue, 15 Aug 2023 22:18:34 +0800
+Subject: net: xfrm: skip policies marked as dead while reinserting policies
+
+From: Dong Chenchen <dongchenchen2@huawei.com>
+
+commit 6d41d4fe28724db16ca1016df0713a07e0cc7448 upstream.
+
+BUG: KASAN: slab-use-after-free in xfrm_policy_inexact_list_reinsert+0xb6/0x430
+Read of size 1 at addr ffff8881051f3bf8 by task ip/668
+
+CPU: 2 PID: 668 Comm: ip Not tainted 6.5.0-rc5-00182-g25aa0bebba72-dirty #64
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13 04/01/2014
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x72/0xa0
+ print_report+0xd0/0x620
+ kasan_report+0xb6/0xf0
+ xfrm_policy_inexact_list_reinsert+0xb6/0x430
+ xfrm_policy_inexact_insert_node.constprop.0+0x537/0x800
+ xfrm_policy_inexact_alloc_chain+0x23f/0x320
+ xfrm_policy_inexact_insert+0x6b/0x590
+ xfrm_policy_insert+0x3b1/0x480
+ xfrm_add_policy+0x23c/0x3c0
+ xfrm_user_rcv_msg+0x2d0/0x510
+ netlink_rcv_skb+0x10d/0x2d0
+ xfrm_netlink_rcv+0x49/0x60
+ netlink_unicast+0x3fe/0x540
+ netlink_sendmsg+0x528/0x970
+ sock_sendmsg+0x14a/0x160
+ ____sys_sendmsg+0x4fc/0x580
+ ___sys_sendmsg+0xef/0x160
+ __sys_sendmsg+0xf7/0x1b0
+ do_syscall_64+0x3f/0x90
+ entry_SYSCALL_64_after_hwframe+0x73/0xdd
+
+The root cause is:
+
+cpu 0 cpu1
+xfrm_dump_policy
+xfrm_policy_walk
+list_move_tail
+ xfrm_add_policy
+ ... ...
+ xfrm_policy_inexact_list_reinsert
+ list_for_each_entry_reverse
+ if (!policy->bydst_reinsert)
+ //read non-existent policy
+xfrm_dump_policy_done
+xfrm_policy_walk_done
+list_del(&walk->walk.all);
+
+If dump_one_policy() returns err (triggered by netlink socket),
+xfrm_policy_walk() will move walk initialized by socket to list
+net->xfrm.policy_all. so this socket becomes visible in the global
+policy list. The head *walk can be traversed when users add policies
+with different prefixlen and trigger xfrm_policy node merge.
+
+The issue can also be triggered by policy list traversal while rehashing
+and flushing policies.
+
+It can be fixed by skip such "policies" with walk.dead set to 1.
+
+Fixes: 9cf545ebd591 ("xfrm: policy: store inexact policies in a tree ordered by destination address")
+Fixes: 12a169e7d8f4 ("ipsec: Put dumpers on the dump list")
+Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/xfrm/xfrm_policy.c | 19 +++++++++++++------
+ 1 file changed, 13 insertions(+), 6 deletions(-)
+
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -851,7 +851,7 @@ static void xfrm_policy_inexact_list_rei
+ struct hlist_node *newpos = NULL;
+ bool matches_s, matches_d;
+
+- if (!policy->bydst_reinsert)
++ if (policy->walk.dead || !policy->bydst_reinsert)
+ continue;
+
+ WARN_ON_ONCE(policy->family != family);
+@@ -1256,8 +1256,11 @@ static void xfrm_hash_rebuild(struct wor
+ struct xfrm_pol_inexact_bin *bin;
+ u8 dbits, sbits;
+
++ if (policy->walk.dead)
++ continue;
++
+ dir = xfrm_policy_id2dir(policy->index);
+- if (policy->walk.dead || dir >= XFRM_POLICY_MAX)
++ if (dir >= XFRM_POLICY_MAX)
+ continue;
+
+ if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
+@@ -1821,9 +1824,11 @@ int xfrm_policy_flush(struct net *net, u
+
+ again:
+ list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
++ if (pol->walk.dead)
++ continue;
++
+ dir = xfrm_policy_id2dir(pol->index);
+- if (pol->walk.dead ||
+- dir >= XFRM_POLICY_MAX ||
++ if (dir >= XFRM_POLICY_MAX ||
+ pol->type != type)
+ continue;
+
+@@ -1860,9 +1865,11 @@ int xfrm_dev_policy_flush(struct net *ne
+
+ again:
+ list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
++ if (pol->walk.dead)
++ continue;
++
+ dir = xfrm_policy_id2dir(pol->index);
+- if (pol->walk.dead ||
+- dir >= XFRM_POLICY_MAX ||
++ if (dir >= XFRM_POLICY_MAX ||
+ pol->xdo.dev != dev)
+ continue;
+
--- /dev/null
+From 4c90bba60c26db7dc7df450f748e86440149786e Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Mon, 2 Oct 2023 11:57:42 +0200
+Subject: netfilter: nf_tables: do not refresh timeout when resetting element
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 4c90bba60c26db7dc7df450f748e86440149786e upstream.
+
+The dump and reset command should not refresh the timeout, this command
+is intended to allow users to list existing stateful objects and reset
+them, element expiration should be refresh via transaction instead with
+a specific command to achieve this, otherwise this is entering combo
+semantics that will be hard to be undone later (eg. a user asking to
+retrieve counters but _not_ requiring to refresh expiration).
+
+Fixes: 079cd633219d ("netfilter: nf_tables: Introduce NFT_MSG_GETSETELEM_RESET")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c | 18 +++++-------------
+ 1 file changed, 5 insertions(+), 13 deletions(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -5553,7 +5553,6 @@ static int nf_tables_fill_setelem(struct
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ unsigned char *b = skb_tail_pointer(skb);
+ struct nlattr *nest;
+- u64 timeout = 0;
+
+ nest = nla_nest_start_noflag(skb, NFTA_LIST_ELEM);
+ if (nest == NULL)
+@@ -5589,15 +5588,11 @@ static int nf_tables_fill_setelem(struct
+ htonl(*nft_set_ext_flags(ext))))
+ goto nla_put_failure;
+
+- if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) {
+- timeout = *nft_set_ext_timeout(ext);
+- if (nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
+- nf_jiffies64_to_msecs(timeout),
+- NFTA_SET_ELEM_PAD))
+- goto nla_put_failure;
+- } else if (set->flags & NFT_SET_TIMEOUT) {
+- timeout = READ_ONCE(set->timeout);
+- }
++ if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
++ nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
++ nf_jiffies64_to_msecs(*nft_set_ext_timeout(ext)),
++ NFTA_SET_ELEM_PAD))
++ goto nla_put_failure;
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
+ u64 expires, now = get_jiffies_64();
+@@ -5612,9 +5607,6 @@ static int nf_tables_fill_setelem(struct
+ nf_jiffies64_to_msecs(expires),
+ NFTA_SET_ELEM_PAD))
+ goto nla_put_failure;
+-
+- if (reset)
+- *nft_set_ext_expiration(ext) = now + timeout;
+ }
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) {
--- /dev/null
+From ebd032fa881882fef2acb9da1bbde48d8233241d Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Wed, 4 Oct 2023 13:12:58 +0200
+Subject: netfilter: nf_tables: do not remove elements if set backend implements .abort
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit ebd032fa881882fef2acb9da1bbde48d8233241d upstream.
+
+pipapo set backend maintains two copies of the datastructure, removing
+the elements from the copy that is going to be discarded slows down
+the abort path significantly, from several minutes to few seconds after
+this patch.
+
+Fixes: 212ed75dc5fb ("netfilter: nf_tables: integrate pipapo into commit protocol")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -10336,7 +10336,10 @@ static int __nf_tables_abort(struct net
+ break;
+ }
+ te = (struct nft_trans_elem *)trans->data;
+- nft_setelem_remove(net, te->set, &te->elem);
++ if (!te->set->ops->abort ||
++ nft_setelem_is_catchall(te->set, &te->elem))
++ nft_setelem_remove(net, te->set, &te->elem);
++
+ if (!nft_setelem_is_catchall(te->set, &te->elem))
+ atomic_dec(&te->set->nelems);
+
--- /dev/null
+From f86fb94011aeb3b26337fc22204ca726aeb8bc24 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Wed, 18 Oct 2023 13:18:39 +0200
+Subject: netfilter: nf_tables: revert do not remove elements if set backend implements .abort
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit f86fb94011aeb3b26337fc22204ca726aeb8bc24 upstream.
+
+nf_tables_abort_release() path calls nft_set_elem_destroy() for
+NFT_MSG_NEWSETELEM which releases the element, however, a reference to
+the element still remains in the working copy.
+
+Fixes: ebd032fa8818 ("netfilter: nf_tables: do not remove elements if set backend implements .abort")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -10336,10 +10336,7 @@ static int __nf_tables_abort(struct net
+ break;
+ }
+ te = (struct nft_trans_elem *)trans->data;
+- if (!te->set->ops->abort ||
+- nft_setelem_is_catchall(te->set, &te->elem))
+- nft_setelem_remove(net, te->set, &te->elem);
+-
++ nft_setelem_remove(net, te->set, &te->elem);
+ if (!nft_setelem_is_catchall(te->set, &te->elem))
+ atomic_dec(&te->set->nelems);
+
--- /dev/null
+From d111692a59c1470ae530cbb39bcf0346c950ecc7 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Tue, 17 Oct 2023 12:28:27 +0200
+Subject: netfilter: nft_set_rbtree: .deactivate fails if element has expired
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit d111692a59c1470ae530cbb39bcf0346c950ecc7 upstream.
+
+This allows to remove an expired element which is not possible in other
+existing set backends, this is more noticeable if gc-interval is high so
+expired elements remain in the tree. On-demand gc also does not help in
+this case, because this is delete element path. Return NULL if element
+has expired.
+
+Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_set_rbtree.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/netfilter/nft_set_rbtree.c
++++ b/net/netfilter/nft_set_rbtree.c
+@@ -568,6 +568,8 @@ static void *nft_rbtree_deactivate(const
+ nft_rbtree_interval_end(this)) {
+ parent = parent->rb_right;
+ continue;
++ } else if (nft_set_elem_expired(&rbe->ext)) {
++ break;
+ } else if (!nft_set_elem_active(&rbe->ext, genmask)) {
+ parent = parent->rb_left;
+ continue;
--- /dev/null
+From 503930f8e113edc86f92b767efb4ea57bdffffb2 Mon Sep 17 00:00:00 2001
+From: Christoph Paasch <cpaasch@apple.com>
+Date: Thu, 12 Oct 2023 21:14:48 -0700
+Subject: netlink: Correct offload_xstats size
+
+From: Christoph Paasch <cpaasch@apple.com>
+
+commit 503930f8e113edc86f92b767efb4ea57bdffffb2 upstream.
+
+rtnl_offload_xstats_get_size_hw_s_info_one() conditionalizes the
+size-computation for IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED based on whether
+or not the device has offload_xstats enabled.
+
+However, rtnl_offload_xstats_fill_hw_s_info_one() is adding the u8 for
+that field uncondtionally.
+
+syzkaller triggered a WARNING in rtnl_stats_get due to this:
+------------[ cut here ]------------
+WARNING: CPU: 0 PID: 754 at net/core/rtnetlink.c:5982 rtnl_stats_get+0x2f4/0x300
+Modules linked in:
+CPU: 0 PID: 754 Comm: syz-executor148 Not tainted 6.6.0-rc2-g331b78eb12af #45
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
+RIP: 0010:rtnl_stats_get+0x2f4/0x300 net/core/rtnetlink.c:5982
+Code: ff ff 89 ee e8 7d 72 50 ff 83 fd a6 74 17 e8 33 6e 50 ff 4c 89 ef be 02 00 00 00 e8 86 00 fa ff e9 7b fe ff ff e8 1c 6e 50 ff <0f> 0b eb e5 e8 73 79 7b 00 0f 1f 00 90 90 90 90 90 90 90 90 90 90
+RSP: 0018:ffffc900006837c0 EFLAGS: 00010293
+RAX: ffffffff81cf7f24 RBX: ffff8881015d9000 RCX: ffff888101815a00
+RDX: 0000000000000000 RSI: 00000000ffffffa6 RDI: 00000000ffffffa6
+RBP: 00000000ffffffa6 R08: ffffffff81cf7f03 R09: 0000000000000001
+R10: ffff888101ba47b9 R11: ffff888101815a00 R12: ffff8881017dae00
+R13: ffff8881017dad00 R14: ffffc90000683ab8 R15: ffffffff83c1f740
+FS: 00007fbc22dbc740(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000020000046 CR3: 000000010264e003 CR4: 0000000000170ef0
+Call Trace:
+ <TASK>
+ rtnetlink_rcv_msg+0x677/0x710 net/core/rtnetlink.c:6480
+ netlink_rcv_skb+0xea/0x1c0 net/netlink/af_netlink.c:2545
+ netlink_unicast+0x430/0x500 net/netlink/af_netlink.c:1342
+ netlink_sendmsg+0x4fc/0x620 net/netlink/af_netlink.c:1910
+ sock_sendmsg+0xa8/0xd0 net/socket.c:730
+ ____sys_sendmsg+0x22a/0x320 net/socket.c:2541
+ ___sys_sendmsg+0x143/0x190 net/socket.c:2595
+ __x64_sys_sendmsg+0xd8/0x150 net/socket.c:2624
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x47/0xa0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x6e/0xd8
+RIP: 0033:0x7fbc22e8d6a9
+Code: 5c c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 4f 37 0d 00 f7 d8 64 89 01 48
+RSP: 002b:00007ffc4320e778 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+RAX: ffffffffffffffda RBX: 00000000004007d0 RCX: 00007fbc22e8d6a9
+RDX: 0000000000000000 RSI: 0000000020000000 RDI: 0000000000000003
+RBP: 0000000000000001 R08: 0000000000000000 R09: 00000000004007d0
+R10: 0000000000000008 R11: 0000000000000246 R12: 00007ffc4320e898
+R13: 00007ffc4320e8a8 R14: 00000000004004a0 R15: 00007fbc22fa5a80
+ </TASK>
+---[ end trace 0000000000000000 ]---
+
+Which didn't happen prior to commit bf9f1baa279f ("net: add dedicated
+kmem_cache for typical/small skb->head") as the skb always was large
+enough.
+
+Fixes: 0e7788fd7622 ("net: rtnetlink: Add UAPI for obtaining L3 offload xstats")
+Signed-off-by: Christoph Paasch <cpaasch@apple.com>
+Reviewed-by: Petr Machata <petrm@nvidia.com>
+Link: https://lore.kernel.org/r/20231013041448.8229-1-cpaasch@apple.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -5504,13 +5504,11 @@ static unsigned int
+ rtnl_offload_xstats_get_size_hw_s_info_one(const struct net_device *dev,
+ enum netdev_offload_xstats_type type)
+ {
+- bool enabled = netdev_offload_xstats_enabled(dev, type);
+-
+ return nla_total_size(0) +
+ /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST */
+ nla_total_size(sizeof(u8)) +
+ /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED */
+- (enabled ? nla_total_size(sizeof(u8)) : 0) +
++ nla_total_size(sizeof(u8)) +
+ 0;
+ }
+
--- /dev/null
+From 505ce0630ad5d31185695f8a29dde8d29f28faa7 Mon Sep 17 00:00:00 2001
+From: Xingyuan Mo <hdthky0@gmail.com>
+Date: Mon, 9 Oct 2023 18:36:15 +0800
+Subject: nf_tables: fix NULL pointer dereference in nft_expr_inner_parse()
+
+From: Xingyuan Mo <hdthky0@gmail.com>
+
+commit 505ce0630ad5d31185695f8a29dde8d29f28faa7 upstream.
+
+We should check whether the NFTA_EXPR_NAME netlink attribute is present
+before accessing it, otherwise a null pointer deference error will occur.
+
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x4f/0x90
+ print_report+0x3f0/0x620
+ kasan_report+0xcd/0x110
+ __asan_load2+0x7d/0xa0
+ nla_strcmp+0x2f/0x90
+ __nft_expr_type_get+0x41/0xb0
+ nft_expr_inner_parse+0xe3/0x200
+ nft_inner_init+0x1be/0x2e0
+ nf_tables_newrule+0x813/0x1230
+ nfnetlink_rcv_batch+0xec3/0x1170
+ nfnetlink_rcv+0x1e4/0x220
+ netlink_unicast+0x34e/0x4b0
+ netlink_sendmsg+0x45c/0x7e0
+ __sys_sendto+0x355/0x370
+ __x64_sys_sendto+0x84/0xa0
+ do_syscall_64+0x3f/0x90
+ entry_SYSCALL_64_after_hwframe+0x6e/0xd8
+
+Fixes: 3a07327d10a0 ("netfilter: nft_inner: support for inner tunnel header matching")
+Signed-off-by: Xingyuan Mo <hdthky0@gmail.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index aae6ffebb413..a623d31b6518 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -3166,7 +3166,7 @@ int nft_expr_inner_parse(const struct nft_ctx *ctx, const struct nlattr *nla,
+ if (err < 0)
+ return err;
+
+- if (!tb[NFTA_EXPR_DATA])
++ if (!tb[NFTA_EXPR_DATA] || !tb[NFTA_EXPR_NAME])
+ return -EINVAL;
+
+ type = __nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]);
+--
+2.42.0
+
--- /dev/null
+From 52177bbf19e6e9398375a148d2e13ed492b40b80 Mon Sep 17 00:00:00 2001
+From: Xingyuan Mo <hdthky0@gmail.com>
+Date: Mon, 9 Oct 2023 18:36:14 +0800
+Subject: nf_tables: fix NULL pointer dereference in nft_inner_init()
+
+From: Xingyuan Mo <hdthky0@gmail.com>
+
+commit 52177bbf19e6e9398375a148d2e13ed492b40b80 upstream.
+
+We should check whether the NFTA_INNER_NUM netlink attribute is present
+before accessing it, otherwise a null pointer deference error will occur.
+
+Call Trace:
+ dump_stack_lvl+0x4f/0x90
+ print_report+0x3f0/0x620
+ kasan_report+0xcd/0x110
+ __asan_load4+0x84/0xa0
+ nft_inner_init+0x128/0x2e0
+ nf_tables_newrule+0x813/0x1230
+ nfnetlink_rcv_batch+0xec3/0x1170
+ nfnetlink_rcv+0x1e4/0x220
+ netlink_unicast+0x34e/0x4b0
+ netlink_sendmsg+0x45c/0x7e0
+ __sys_sendto+0x355/0x370
+ __x64_sys_sendto+0x84/0xa0
+ do_syscall_64+0x3f/0x90
+ entry_SYSCALL_64_after_hwframe+0x6e/0xd8
+
+Fixes: 3a07327d10a0 ("netfilter: nft_inner: support for inner tunnel header matching")
+Signed-off-by: Xingyuan Mo <hdthky0@gmail.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_inner.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/netfilter/nft_inner.c b/net/netfilter/nft_inner.c
+index 28e2873ba24e..928312d01eb1 100644
+--- a/net/netfilter/nft_inner.c
++++ b/net/netfilter/nft_inner.c
+@@ -298,6 +298,7 @@ static int nft_inner_init(const struct nft_ctx *ctx,
+ int err;
+
+ if (!tb[NFTA_INNER_FLAGS] ||
++ !tb[NFTA_INNER_NUM] ||
+ !tb[NFTA_INNER_HDRSIZE] ||
+ !tb[NFTA_INNER_TYPE] ||
+ !tb[NFTA_INNER_EXPR])
+--
+2.42.0
+
--- /dev/null
+From a0ca6b9dfef0b3cc83aa8bb485ed61a018f84982 Mon Sep 17 00:00:00 2001
+From: Shinas Rasheed <srasheed@marvell.com>
+Date: Tue, 17 Oct 2023 03:50:30 -0700
+Subject: octeon_ep: update BQL sent bytes before ringing doorbell
+
+From: Shinas Rasheed <srasheed@marvell.com>
+
+commit a0ca6b9dfef0b3cc83aa8bb485ed61a018f84982 upstream.
+
+Sometimes Tx is completed immediately after doorbell is updated, which
+causes Tx completion routing to update completion bytes before the
+same packet bytes are updated in sent bytes in transmit function, hence
+hitting BUG_ON() in dql_completed(). To avoid this, update BQL
+sent bytes before ringing doorbell.
+
+Fixes: 37d79d059606 ("octeon_ep: add Tx/Rx processing and interrupt support")
+Signed-off-by: Shinas Rasheed <srasheed@marvell.com>
+Link: https://lore.kernel.org/r/20231017105030.2310966-1-srasheed@marvell.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 13 ++++++-------
+ 1 file changed, 6 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
++++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+@@ -715,20 +715,19 @@ static netdev_tx_t octep_start_xmit(stru
+ hw_desc->dptr = tx_buffer->sglist_dma;
+ }
+
+- /* Flush the hw descriptor before writing to doorbell */
+- wmb();
+-
+- /* Ring Doorbell to notify the NIC there is a new packet */
+- writel(1, iq->doorbell_reg);
++ netdev_tx_sent_queue(iq->netdev_q, skb->len);
++ skb_tx_timestamp(skb);
+ atomic_inc(&iq->instr_pending);
+ wi++;
+ if (wi == iq->max_count)
+ wi = 0;
+ iq->host_write_index = wi;
++ /* Flush the hw descriptor before writing to doorbell */
++ wmb();
+
+- netdev_tx_sent_queue(iq->netdev_q, skb->len);
++ /* Ring Doorbell to notify the NIC there is a new packet */
++ writel(1, iq->doorbell_reg);
+ iq->stats.instr_posted++;
+- skb_tx_timestamp(skb);
+ return NETDEV_TX_OK;
+
+ dma_map_sg_err:
--- /dev/null
+From 2e2d9c7d4d37d74873583d7b0c94eac8b6869486 Mon Sep 17 00:00:00 2001
+From: Phil Sutter <phil@nwl.cc>
+Date: Fri, 13 Oct 2023 22:02:24 +0200
+Subject: selftests: netfilter: Run nft_audit.sh in its own netns
+
+From: Phil Sutter <phil@nwl.cc>
+
+commit 2e2d9c7d4d37d74873583d7b0c94eac8b6869486 upstream.
+
+Don't mess with the host's firewall ruleset. Since audit logging is not
+per-netns, add an initial delay of a second so other selftests' netns
+cleanups have a chance to finish.
+
+Fixes: e8dbde59ca3f ("selftests: netfilter: Test nf_tables audit logging")
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/netfilter/nft_audit.sh | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/tools/testing/selftests/netfilter/nft_audit.sh
++++ b/tools/testing/selftests/netfilter/nft_audit.sh
+@@ -11,6 +11,12 @@ nft --version >/dev/null 2>&1 || {
+ exit $SKIP_RC
+ }
+
++# Run everything in a separate network namespace
++[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
++
++# give other scripts a chance to finish - audit_logread sees all activity
++sleep 1
++
+ logfile=$(mktemp)
+ rulefile=$(mktemp)
+ echo "logging into $logfile"
--- /dev/null
+From 92e37f20f20a23fec4626ae72eda50f127acb130 Mon Sep 17 00:00:00 2001
+From: Aaron Conole <aconole@redhat.com>
+Date: Wed, 11 Oct 2023 15:49:36 -0400
+Subject: selftests: openvswitch: Add version check for pyroute2
+
+From: Aaron Conole <aconole@redhat.com>
+
+commit 92e37f20f20a23fec4626ae72eda50f127acb130 upstream.
+
+Paolo Abeni reports that on some systems the pyroute2 version isn't
+new enough to run the test suite. Ensure that we support a minimum
+version of 0.6 for all cases (which does include the existing ones).
+The 0.6.1 version was released in May of 2021, so should be
+propagated to most installations at this point.
+
+The alternative that Paolo proposed was to only skip when the
+add-flow is being run. This would be okay for most cases, except
+if a future test case is added that needs to do flow dump without
+an associated add (just guessing). In that case, it could also be
+broken and we would need additional skip logic anyway. Just draw
+a line in the sand now.
+
+Fixes: 25f16c873fb1 ("selftests: add openvswitch selftest suite")
+Reported-by: Paolo Abeni <pabeni@redhat.com>
+Closes: https://lore.kernel.org/lkml/8470c431e0930d2ea204a9363a60937289b7fdbe.camel@redhat.com/
+Signed-off-by: Aaron Conole <aconole@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/openvswitch/openvswitch.sh | 2 +-
+ tools/testing/selftests/net/openvswitch/ovs-dpctl.py | 10 +++++++++-
+ 2 files changed, 10 insertions(+), 2 deletions(-)
+
+--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
++++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
+@@ -204,7 +204,7 @@ run_test() {
+ fi
+
+ if python3 ovs-dpctl.py -h 2>&1 | \
+- grep "Need to install the python" >/dev/null 2>&1; then
++ grep -E "Need to (install|upgrade) the python" >/dev/null 2>&1; then
+ stdbuf -o0 printf "TEST: %-60s [PYLIB]\n" "${tdesc}"
+ return $ksft_skip
+ fi
+--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
++++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+@@ -25,8 +25,10 @@ try:
+ from pyroute2.netlink import nlmsg_atoms
+ from pyroute2.netlink.exceptions import NetlinkError
+ from pyroute2.netlink.generic import GenericNetlinkSocket
++ import pyroute2
++
+ except ModuleNotFoundError:
+- print("Need to install the python pyroute2 package.")
++ print("Need to install the python pyroute2 package >= 0.6.")
+ sys.exit(0)
+
+
+@@ -1459,6 +1461,12 @@ def main(argv):
+ nlmsg_atoms.ovskey = ovskey
+ nlmsg_atoms.ovsactions = ovsactions
+
++ # version check for pyroute2
++ prverscheck = pyroute2.__version__.split(".")
++ if int(prverscheck[0]) == 0 and int(prverscheck[1]) < 6:
++ print("Need to upgrade the python pyroute2 package to >= 0.6.")
++ sys.exit(0)
++
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "-v",
--- /dev/null
+From af846afad5ca1c1a24d320adf9e48255e97db84e Mon Sep 17 00:00:00 2001
+From: Aaron Conole <aconole@redhat.com>
+Date: Wed, 11 Oct 2023 15:49:37 -0400
+Subject: selftests: openvswitch: Catch cases where the tests are killed
+
+From: Aaron Conole <aconole@redhat.com>
+
+commit af846afad5ca1c1a24d320adf9e48255e97db84e upstream.
+
+In case of fatal signal, or early abort at least cleanup the current
+test case.
+
+Fixes: 25f16c873fb1 ("selftests: add openvswitch selftest suite")
+Signed-off-by: Aaron Conole <aconole@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/openvswitch/openvswitch.sh | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
++++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
+@@ -3,6 +3,8 @@
+ #
+ # OVS kernel module self tests
+
++trap ovs_exit_sig EXIT TERM INT ERR
++
+ # Kselftest framework requirement - SKIP code is 4.
+ ksft_skip=4
+
--- /dev/null
+From 8eff0e062201e26739c74ac2355b7362622b7190 Mon Sep 17 00:00:00 2001
+From: Aaron Conole <aconole@redhat.com>
+Date: Wed, 11 Oct 2023 15:49:39 -0400
+Subject: selftests: openvswitch: Fix the ct_tuple for v4
+
+From: Aaron Conole <aconole@redhat.com>
+
+commit 8eff0e062201e26739c74ac2355b7362622b7190 upstream.
+
+The ct_tuple v4 data structure decode / encode routines were using
+the v6 IP address decode and relying on default encode. This could
+cause exceptions during encode / decode depending on how a ct4
+tuple would appear in a netlink message.
+
+Caught during code review.
+
+Fixes: e52b07aa1a54 ("selftests: openvswitch: add flow dump support")
+Signed-off-by: Aaron Conole <aconole@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/openvswitch/ovs-dpctl.py | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
++++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+@@ -732,12 +732,14 @@ class ovskey(nla):
+ "src",
+ lambda x: str(ipaddress.IPv4Address(x)),
+ int,
++ convert_ipv4,
+ ),
+ (
+ "dst",
+ "dst",
+- lambda x: str(ipaddress.IPv6Address(x)),
++ lambda x: str(ipaddress.IPv4Address(x)),
+ int,
++ convert_ipv4,
+ ),
+ ("tp_src", "tp_src", "%d", int),
+ ("tp_dst", "tp_dst", "%d", int),
asoc-codecs-wcd938x-fix-regulator-leaks-on-probe-errors.patch
asoc-codecs-wcd938x-fix-runtime-pm-imbalance-on-remove.patch
qed-fix-ll2-rx-buffer-allocation.patch
+xfrm-fix-a-data-race-in-xfrm_lookup_with_ifid.patch
+xfrm6-fix-inet6_dev-refcount-underflow-problem.patch
+xfrm-fix-a-data-race-in-xfrm_gen_index.patch
+xfrm-interface-use-dev_stats_inc.patch
+net-xfrm-skip-policies-marked-as-dead-while-reinserting-policies.patch
+fprobe-fix-to-ensure-the-number-of-active-retprobes-is-not-zero.patch
+wifi-cfg80211-use-system_unbound_wq-for-wiphy-work.patch
+net-ipv4-fix-return-value-check-in-esp_remove_trailer.patch
+net-ipv6-fix-return-value-check-in-esp_remove_trailer.patch
+net-rfkill-gpio-prevent-value-glitch-during-probe.patch
+tcp-fix-excessive-tlp-and-rack-timeouts-from-hz-rounding.patch
+tcp-tsq-relax-tcp_small_queue_check-when-rtx-queue-contains-a-single-skb.patch
+tcp-fix-listen-warning-with-v4-mapped-v6-address.patch
+docs-fix-info-about-representor-identification.patch
+tun-prevent-negative-ifindex.patch
+gve-do-not-fully-free-qpl-pages-on-prefill-errors.patch
+ipv4-fib-annotate-races-around-nh-nh_saddr_genid-and-nh-nh_saddr.patch
+net-usb-smsc95xx-fix-an-error-code-in-smsc95xx_reset.patch
+octeon_ep-update-bql-sent-bytes-before-ringing-doorbell.patch
+i40e-prevent-crash-on-probe-if-hw-registers-have-invalid-values.patch
+net-dsa-bcm_sf2-fix-possible-memory-leak-in-bcm_sf2_mdio_register.patch
+bonding-return-pointer-to-data-after-pull-on-skb.patch
+net-sched-sch_hfsc-upgrade-rt-to-sc-when-it-becomes-a-inner-curve.patch
+neighbor-tracing-move-pin6-inside-config_ipv6-y-section.patch
+selftests-openvswitch-catch-cases-where-the-tests-are-killed.patch
+selftests-openvswitch-fix-the-ct_tuple-for-v4.patch
+selftests-netfilter-run-nft_audit.sh-in-its-own-netns.patch
+netfilter-nft_set_rbtree-.deactivate-fails-if-element-has-expired.patch
+netlink-correct-offload_xstats-size.patch
+netfilter-nf_tables-do-not-refresh-timeout-when-resetting-element.patch
+nf_tables-fix-null-pointer-dereference-in-nft_expr_inner_parse.patch
+nf_tables-fix-null-pointer-dereference-in-nft_inner_init.patch
+netfilter-nf_tables-do-not-remove-elements-if-set-backend-implements-.abort.patch
+netfilter-nf_tables-revert-do-not-remove-elements-if-set-backend-implements-.abort.patch
+selftests-openvswitch-add-version-check-for-pyroute2.patch
+net-phy-bcm7xxx-add-missing-16nm-ephy-statistics.patch
+net-pktgen-fix-interface-flags-printing.patch
+net-more-strict-virtio_net_hdr_gso_udp_l4-validation.patch
+net-mdio-mux-fix-c45-access-returning-eio-after-api-change.patch
+net-avoid-uaf-on-deleted-altname.patch
+net-fix-ifname-in-netlink-ntf-during-netns-move.patch
+net-check-for-altname-conflicts-when-changing-netdev-s-netns.patch
--- /dev/null
+From 1c2709cfff1dedbb9591e989e2f001484208d914 Mon Sep 17 00:00:00 2001
+From: Neal Cardwell <ncardwell@google.com>
+Date: Sun, 15 Oct 2023 13:47:00 -0400
+Subject: tcp: fix excessive TLP and RACK timeouts from HZ rounding
+
+From: Neal Cardwell <ncardwell@google.com>
+
+commit 1c2709cfff1dedbb9591e989e2f001484208d914 upstream.
+
+We discovered from packet traces of slow loss recovery on kernels with
+the default HZ=250 setting (and min_rtt < 1ms) that after reordering,
+when receiving a SACKed sequence range, the RACK reordering timer was
+firing after about 16ms rather than the desired value of roughly
+min_rtt/4 + 2ms. The problem is largely due to the RACK reorder timer
+calculation adding in TCP_TIMEOUT_MIN, which is 2 jiffies. On kernels
+with HZ=250, this is 2*4ms = 8ms. The TLP timer calculation has the
+exact same issue.
+
+This commit fixes the TLP transmit timer and RACK reordering timer
+floor calculation to more closely match the intended 2ms floor even on
+kernels with HZ=250. It does this by adding in a new
+TCP_TIMEOUT_MIN_US floor of 2000 us and then converting to jiffies,
+instead of the current approach of converting to jiffies and then
+adding th TCP_TIMEOUT_MIN value of 2 jiffies.
+
+Our testing has verified that on kernels with HZ=1000, as expected,
+this does not produce significant changes in behavior, but on kernels
+with the default HZ=250 the latency improvement can be large. For
+example, our tests show that for HZ=250 kernels at low RTTs this fix
+roughly halves the latency for the RACK reorder timer: instead of
+mostly firing at 16ms it mostly fires at 8ms.
+
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Fixes: bb4d991a28cc ("tcp: adjust tail loss probe timeout")
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20231015174700.2206872-1-ncardwell.sw@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h | 3 +++
+ net/ipv4/tcp_output.c | 9 +++++----
+ net/ipv4/tcp_recovery.c | 2 +-
+ 3 files changed, 9 insertions(+), 5 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -142,6 +142,9 @@ void tcp_time_wait(struct sock *sk, int
+ #define TCP_RTO_MAX ((unsigned)(120*HZ))
+ #define TCP_RTO_MIN ((unsigned)(HZ/5))
+ #define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */
++
++#define TCP_TIMEOUT_MIN_US (2*USEC_PER_MSEC) /* Min TCP timeout in microsecs */
++
+ #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */
+ #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now
+ * used as a fallback RTO for the
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2773,7 +2773,7 @@ bool tcp_schedule_loss_probe(struct sock
+ {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+- u32 timeout, rto_delta_us;
++ u32 timeout, timeout_us, rto_delta_us;
+ int early_retrans;
+
+ /* Don't do any loss probe on a Fast Open connection before 3WHS
+@@ -2797,11 +2797,12 @@ bool tcp_schedule_loss_probe(struct sock
+ * sample is available then probe after TCP_TIMEOUT_INIT.
+ */
+ if (tp->srtt_us) {
+- timeout = usecs_to_jiffies(tp->srtt_us >> 2);
++ timeout_us = tp->srtt_us >> 2;
+ if (tp->packets_out == 1)
+- timeout += TCP_RTO_MIN;
++ timeout_us += tcp_rto_min_us(sk);
+ else
+- timeout += TCP_TIMEOUT_MIN;
++ timeout_us += TCP_TIMEOUT_MIN_US;
++ timeout = usecs_to_jiffies(timeout_us);
+ } else {
+ timeout = TCP_TIMEOUT_INIT;
+ }
+--- a/net/ipv4/tcp_recovery.c
++++ b/net/ipv4/tcp_recovery.c
+@@ -104,7 +104,7 @@ bool tcp_rack_mark_lost(struct sock *sk)
+ tp->rack.advanced = 0;
+ tcp_rack_detect_loss(sk, &timeout);
+ if (timeout) {
+- timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
++ timeout = usecs_to_jiffies(timeout + TCP_TIMEOUT_MIN_US);
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
+ timeout, inet_csk(sk)->icsk_rto);
+ }
--- /dev/null
+From 8702cf12e6ba91616a72d684e90357977972991b Mon Sep 17 00:00:00 2001
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+Date: Mon, 9 Oct 2023 18:38:14 -0700
+Subject: tcp: Fix listen() warning with v4-mapped-v6 address.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+commit 8702cf12e6ba91616a72d684e90357977972991b upstream.
+
+syzbot reported a warning [0] introduced by commit c48ef9c4aed3 ("tcp: Fix
+bind() regression for v4-mapped-v6 non-wildcard address.").
+
+After the cited commit, a v4 socket's address matches the corresponding
+v4-mapped-v6 tb2 in inet_bind2_bucket_match_addr(), not vice versa.
+
+During X.X.X.X -> ::ffff:X.X.X.X order bind()s, the second bind() uses
+bhash and conflicts properly without checking bhash2 so that we need not
+check if a v4-mapped-v6 sk matches the corresponding v4 address tb2 in
+inet_bind2_bucket_match_addr(). However, the repro shows that we need
+to check that in a no-conflict case.
+
+The repro bind()s two sockets to the 2-tuples using SO_REUSEPORT and calls
+listen() for the first socket:
+
+ from socket import *
+
+ s1 = socket()
+ s1.setsockopt(SOL_SOCKET, SO_REUSEPORT, 1)
+ s1.bind(('127.0.0.1', 0))
+
+ s2 = socket(AF_INET6)
+ s2.setsockopt(SOL_SOCKET, SO_REUSEPORT, 1)
+ s2.bind(('::ffff:127.0.0.1', s1.getsockname()[1]))
+
+ s1.listen()
+
+The second socket should belong to the first socket's tb2, but the second
+bind() creates another tb2 bucket because inet_bind2_bucket_find() returns
+NULL in inet_csk_get_port() as the v4-mapped-v6 sk does not match the
+corresponding v4 address tb2.
+
+ bhash2[] -> tb2(::ffff:X.X.X.X) -> tb2(X.X.X.X)
+
+Then, listen() for the first socket calls inet_csk_get_port(), where the
+v4 address matches the v4-mapped-v6 tb2 and WARN_ON() is triggered.
+
+To avoid that, we need to check if v4-mapped-v6 sk address matches with
+the corresponding v4 address tb2 in inet_bind2_bucket_match().
+
+The same checks are needed in inet_bind2_bucket_addr_match() too, so we
+can move all checks there and call it from inet_bind2_bucket_match().
+
+Note that now tb->family is just an address family of tb->(v6_)?rcv_saddr
+and not of sockets in the bucket. This could be refactored later by
+defining tb->rcv_saddr as tb->v6_rcv_saddr.s6_addr32[3] and prepending
+::ffff: when creating v4 tb2.
+
+[0]:
+WARNING: CPU: 0 PID: 5049 at net/ipv4/inet_connection_sock.c:587 inet_csk_get_port+0xf96/0x2350 net/ipv4/inet_connection_sock.c:587
+Modules linked in:
+CPU: 0 PID: 5049 Comm: syz-executor288 Not tainted 6.6.0-rc2-syzkaller-00018-g2cf0f7156238 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/04/2023
+RIP: 0010:inet_csk_get_port+0xf96/0x2350 net/ipv4/inet_connection_sock.c:587
+Code: 7c 24 08 e8 4c b6 8a 01 31 d2 be 88 01 00 00 48 c7 c7 e0 94 ae 8b e8 59 2e a3 f8 2e 2e 2e 31 c0 e9 04 fe ff ff e8 ca 88 d0 f8 <0f> 0b e9 0f f9 ff ff e8 be 88 d0 f8 49 8d 7e 48 e8 65 ca 5a 00 31
+RSP: 0018:ffffc90003abfbf0 EFLAGS: 00010293
+RAX: 0000000000000000 RBX: ffff888026429100 RCX: 0000000000000000
+RDX: ffff88807edcbb80 RSI: ffffffff88b73d66 RDI: ffff888026c49f38
+RBP: ffff888026c49f30 R08: 0000000000000005 R09: 0000000000000000
+R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff9260f200
+R13: ffff888026c49880 R14: 0000000000000000 R15: ffff888026429100
+FS: 00005555557d5380(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 000000000045ad50 CR3: 0000000025754000 CR4: 00000000003506f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <TASK>
+ inet_csk_listen_start+0x155/0x360 net/ipv4/inet_connection_sock.c:1256
+ __inet_listen_sk+0x1b8/0x5c0 net/ipv4/af_inet.c:217
+ inet_listen+0x93/0xd0 net/ipv4/af_inet.c:239
+ __sys_listen+0x194/0x270 net/socket.c:1866
+ __do_sys_listen net/socket.c:1875 [inline]
+ __se_sys_listen net/socket.c:1873 [inline]
+ __x64_sys_listen+0x53/0x80 net/socket.c:1873
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+RIP: 0033:0x7f3a5bce3af9
+Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 c1 17 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007ffc1a1c79e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000032
+RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3a5bce3af9
+RDX: 00007f3a5bce3af9 RSI: 0000000000000000 RDI: 0000000000000003
+RBP: 00007f3a5bd565f0 R08: 0000000000000006 R09: 0000000000000006
+R10: 0000000000000006 R11: 0000000000000246 R12: 0000000000000001
+R13: 431bde82d7b634db R14: 0000000000000001 R15: 0000000000000001
+ </TASK>
+
+Fixes: c48ef9c4aed3 ("tcp: Fix bind() regression for v4-mapped-v6 non-wildcard address.")
+Reported-by: syzbot+71e724675ba3958edb31@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=71e724675ba3958edb31
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20231010013814.70571-1-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/inet_hashtables.c | 24 +++++++++---------------
+ 1 file changed, 9 insertions(+), 15 deletions(-)
+
+--- a/net/ipv4/inet_hashtables.c
++++ b/net/ipv4/inet_hashtables.c
+@@ -148,8 +148,14 @@ static bool inet_bind2_bucket_addr_match
+ const struct sock *sk)
+ {
+ #if IS_ENABLED(CONFIG_IPV6)
+- if (sk->sk_family != tb2->family)
+- return false;
++ if (sk->sk_family != tb2->family) {
++ if (sk->sk_family == AF_INET)
++ return ipv6_addr_v4mapped(&tb2->v6_rcv_saddr) &&
++ tb2->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr;
++
++ return ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr) &&
++ sk->sk_v6_rcv_saddr.s6_addr32[3] == tb2->rcv_saddr;
++ }
+
+ if (sk->sk_family == AF_INET6)
+ return ipv6_addr_equal(&tb2->v6_rcv_saddr,
+@@ -799,19 +805,7 @@ static bool inet_bind2_bucket_match(cons
+ tb->l3mdev != l3mdev)
+ return false;
+
+-#if IS_ENABLED(CONFIG_IPV6)
+- if (sk->sk_family != tb->family) {
+- if (sk->sk_family == AF_INET)
+- return ipv6_addr_v4mapped(&tb->v6_rcv_saddr) &&
+- tb->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr;
+-
+- return false;
+- }
+-
+- if (sk->sk_family == AF_INET6)
+- return ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
+-#endif
+- return tb->rcv_saddr == sk->sk_rcv_saddr;
++ return inet_bind2_bucket_addr_match(tb, sk);
+ }
+
+ bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net,
--- /dev/null
+From f921a4a5bffa8a0005b190fb9421a7fc1fd716b6 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 17 Oct 2023 12:45:26 +0000
+Subject: tcp: tsq: relax tcp_small_queue_check() when rtx queue contains a single skb
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit f921a4a5bffa8a0005b190fb9421a7fc1fd716b6 upstream.
+
+In commit 75eefc6c59fd ("tcp: tsq: add a shortcut in tcp_small_queue_check()")
+we allowed to send an skb regardless of TSQ limits being hit if rtx queue
+was empty or had a single skb, in order to better fill the pipe
+when/if TX completions were slow.
+
+Then later, commit 75c119afe14f ("tcp: implement rb-tree based
+retransmit queue") accidentally removed the special case for
+one skb in rtx queue.
+
+Stefan Wahren reported a regression in single TCP flow throughput
+using a 100Mbit fec link, starting from commit 65466904b015 ("tcp: adjust
+TSO packet sizes based on min_rtt"). This last commit only made the
+regression more visible, because it locked the TCP flow on a particular
+behavior where TSQ prevented two skbs being pushed downstream,
+adding silences on the wire between each TSO packet.
+
+Many thanks to Stefan for his invaluable help !
+
+Fixes: 75c119afe14f ("tcp: implement rb-tree based retransmit queue")
+Link: https://lore.kernel.org/netdev/7f31ddc8-9971-495e-a1f6-819df542e0af@gmx.net/
+Reported-by: Stefan Wahren <wahrenst@gmx.net>
+Tested-by: Stefan Wahren <wahrenst@gmx.net>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Link: https://lore.kernel.org/r/20231017124526.4060202-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c | 16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2527,6 +2527,18 @@ static bool tcp_pacing_check(struct sock
+ return true;
+ }
+
++static bool tcp_rtx_queue_empty_or_single_skb(const struct sock *sk)
++{
++ const struct rb_node *node = sk->tcp_rtx_queue.rb_node;
++
++ /* No skb in the rtx queue. */
++ if (!node)
++ return true;
++
++ /* Only one skb in rtx queue. */
++ return !node->rb_left && !node->rb_right;
++}
++
+ /* TCP Small Queues :
+ * Control number of packets in qdisc/devices to two packets / or ~1 ms.
+ * (These limits are doubled for retransmits)
+@@ -2564,12 +2576,12 @@ static bool tcp_small_queue_check(struct
+ limit += extra_bytes;
+ }
+ if (refcount_read(&sk->sk_wmem_alloc) > limit) {
+- /* Always send skb if rtx queue is empty.
++ /* Always send skb if rtx queue is empty or has one skb.
+ * No need to wait for TX completion to call us back,
+ * after softirq/tasklet schedule.
+ * This helps when TX completions are delayed too much.
+ */
+- if (tcp_rtx_queue_empty(sk))
++ if (tcp_rtx_queue_empty_or_single_skb(sk))
+ return false;
+
+ set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
--- /dev/null
+From cbfbfe3aee718dc4c3c837f5d2463170ee59d78c Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 16 Oct 2023 18:08:51 +0000
+Subject: tun: prevent negative ifindex
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit cbfbfe3aee718dc4c3c837f5d2463170ee59d78c upstream.
+
+After commit 956db0a13b47 ("net: warn about attempts to register
+negative ifindex") syzbot is able to trigger the following splat.
+
+Negative ifindex are not supported.
+
+WARNING: CPU: 1 PID: 6003 at net/core/dev.c:9596 dev_index_reserve+0x104/0x210
+Modules linked in:
+CPU: 1 PID: 6003 Comm: syz-executor926 Not tainted 6.6.0-rc4-syzkaller-g19af4a4ed414 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/06/2023
+pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+pc : dev_index_reserve+0x104/0x210
+lr : dev_index_reserve+0x100/0x210
+sp : ffff800096a878e0
+x29: ffff800096a87930 x28: ffff0000d04380d0 x27: ffff0000d04380f8
+x26: ffff0000d04380f0 x25: 1ffff00012d50f20 x24: 1ffff00012d50f1c
+x23: dfff800000000000 x22: ffff8000929c21c0 x21: 00000000ffffffea
+x20: ffff0000d04380e0 x19: ffff800096a87900 x18: ffff800096a874c0
+x17: ffff800084df5008 x16: ffff80008051f9c4 x15: 0000000000000001
+x14: 1fffe0001a087198 x13: 0000000000000000 x12: 0000000000000000
+x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000
+x8 : ffff0000d41c9bc0 x7 : 0000000000000000 x6 : 0000000000000000
+x5 : ffff800091763d88 x4 : 0000000000000000 x3 : ffff800084e04748
+x2 : 0000000000000001 x1 : 00000000fead71c7 x0 : 0000000000000000
+Call trace:
+dev_index_reserve+0x104/0x210
+register_netdevice+0x598/0x1074 net/core/dev.c:10084
+tun_set_iff+0x630/0xb0c drivers/net/tun.c:2850
+__tun_chr_ioctl+0x788/0x2af8 drivers/net/tun.c:3118
+tun_chr_ioctl+0x38/0x4c drivers/net/tun.c:3403
+vfs_ioctl fs/ioctl.c:51 [inline]
+__do_sys_ioctl fs/ioctl.c:871 [inline]
+__se_sys_ioctl fs/ioctl.c:857 [inline]
+__arm64_sys_ioctl+0x14c/0x1c8 fs/ioctl.c:857
+__invoke_syscall arch/arm64/kernel/syscall.c:37 [inline]
+invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:51
+el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:136
+do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:155
+el0_svc+0x58/0x16c arch/arm64/kernel/entry-common.c:678
+el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:696
+el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:595
+irq event stamp: 11348
+hardirqs last enabled at (11347): [<ffff80008a716574>] __raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:151 [inline]
+hardirqs last enabled at (11347): [<ffff80008a716574>] _raw_spin_unlock_irqrestore+0x38/0x98 kernel/locking/spinlock.c:194
+hardirqs last disabled at (11348): [<ffff80008a627820>] el1_dbg+0x24/0x80 arch/arm64/kernel/entry-common.c:436
+softirqs last enabled at (11138): [<ffff8000887ca53c>] spin_unlock_bh include/linux/spinlock.h:396 [inline]
+softirqs last enabled at (11138): [<ffff8000887ca53c>] release_sock+0x15c/0x1b0 net/core/sock.c:3531
+softirqs last disabled at (11136): [<ffff8000887ca41c>] spin_lock_bh include/linux/spinlock.h:356 [inline]
+softirqs last disabled at (11136): [<ffff8000887ca41c>] release_sock+0x3c/0x1b0 net/core/sock.c:3518
+
+Fixes: fb7589a16216 ("tun: Add ability to create tun device with given index")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Link: https://lore.kernel.org/r/20231016180851.3560092-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -3068,10 +3068,11 @@ static long __tun_chr_ioctl(struct file
+ struct net *net = sock_net(&tfile->sk);
+ struct tun_struct *tun;
+ void __user* argp = (void __user*)arg;
+- unsigned int ifindex, carrier;
++ unsigned int carrier;
+ struct ifreq ifr;
+ kuid_t owner;
+ kgid_t group;
++ int ifindex;
+ int sndbuf;
+ int vnet_hdr_sz;
+ int le;
+@@ -3127,7 +3128,9 @@ static long __tun_chr_ioctl(struct file
+ ret = -EFAULT;
+ if (copy_from_user(&ifindex, argp, sizeof(ifindex)))
+ goto unlock;
+-
++ ret = -EINVAL;
++ if (ifindex < 0)
++ goto unlock;
+ ret = 0;
+ tfile->ifindex = ifindex;
+ goto unlock;
--- /dev/null
+From 91d20ab9d9ca035527af503d00e1e30d6c375f2a Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Mon, 9 Oct 2023 10:18:01 +0200
+Subject: wifi: cfg80211: use system_unbound_wq for wiphy work
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit 91d20ab9d9ca035527af503d00e1e30d6c375f2a upstream.
+
+Since wiphy work items can run pretty much arbitrary
+code in the stack/driver, it can take longer to run
+all of this, so we shouldn't be using system_wq via
+schedule_work(). Also, we lock the wiphy (which is
+the reason this exists), so use system_unbound_wq.
+
+Reported-and-tested-by: Kalle Valo <kvalo@kernel.org>
+Fixes: a3ee4dc84c4e ("wifi: cfg80211: add a work abstraction with special semantics")
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/wireless/core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/wireless/core.c
++++ b/net/wireless/core.c
+@@ -1622,7 +1622,7 @@ void wiphy_work_queue(struct wiphy *wiph
+ list_add_tail(&work->entry, &rdev->wiphy_work_list);
+ spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);
+
+- schedule_work(&rdev->wiphy_work);
++ queue_work(system_unbound_wq, &rdev->wiphy_work);
+ }
+ EXPORT_SYMBOL_GPL(wiphy_work_queue);
+
--- /dev/null
+From 3e4bc23926b83c3c67e5f61ae8571602754131a6 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 8 Sep 2023 18:13:59 +0000
+Subject: xfrm: fix a data-race in xfrm_gen_index()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 3e4bc23926b83c3c67e5f61ae8571602754131a6 upstream.
+
+xfrm_gen_index() mutual exclusion uses net->xfrm.xfrm_policy_lock.
+
+This means we must use a per-netns idx_generator variable,
+instead of a static one.
+Alternative would be to use an atomic variable.
+
+syzbot reported:
+
+BUG: KCSAN: data-race in xfrm_sk_policy_insert / xfrm_sk_policy_insert
+
+write to 0xffffffff87005938 of 4 bytes by task 29466 on cpu 0:
+xfrm_gen_index net/xfrm/xfrm_policy.c:1385 [inline]
+xfrm_sk_policy_insert+0x262/0x640 net/xfrm/xfrm_policy.c:2347
+xfrm_user_policy+0x413/0x540 net/xfrm/xfrm_state.c:2639
+do_ipv6_setsockopt+0x1317/0x2ce0 net/ipv6/ipv6_sockglue.c:943
+ipv6_setsockopt+0x57/0x130 net/ipv6/ipv6_sockglue.c:1012
+rawv6_setsockopt+0x21e/0x410 net/ipv6/raw.c:1054
+sock_common_setsockopt+0x61/0x70 net/core/sock.c:3697
+__sys_setsockopt+0x1c9/0x230 net/socket.c:2263
+__do_sys_setsockopt net/socket.c:2274 [inline]
+__se_sys_setsockopt net/socket.c:2271 [inline]
+__x64_sys_setsockopt+0x66/0x80 net/socket.c:2271
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+read to 0xffffffff87005938 of 4 bytes by task 29460 on cpu 1:
+xfrm_sk_policy_insert+0x13e/0x640
+xfrm_user_policy+0x413/0x540 net/xfrm/xfrm_state.c:2639
+do_ipv6_setsockopt+0x1317/0x2ce0 net/ipv6/ipv6_sockglue.c:943
+ipv6_setsockopt+0x57/0x130 net/ipv6/ipv6_sockglue.c:1012
+rawv6_setsockopt+0x21e/0x410 net/ipv6/raw.c:1054
+sock_common_setsockopt+0x61/0x70 net/core/sock.c:3697
+__sys_setsockopt+0x1c9/0x230 net/socket.c:2263
+__do_sys_setsockopt net/socket.c:2274 [inline]
+__se_sys_setsockopt net/socket.c:2271 [inline]
+__x64_sys_setsockopt+0x66/0x80 net/socket.c:2271
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+value changed: 0x00006ad8 -> 0x00006b18
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 29460 Comm: syz-executor.1 Not tainted 6.5.0-rc5-syzkaller-00243-g9106536c1aa3 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023
+
+Fixes: 1121994c803f ("netns xfrm: policy insertion in netns")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Steffen Klassert <steffen.klassert@secunet.com>
+Cc: Herbert Xu <herbert@gondor.apana.org.au>
+Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/netns/xfrm.h | 1 +
+ net/xfrm/xfrm_policy.c | 6 ++----
+ 2 files changed, 3 insertions(+), 4 deletions(-)
+
+--- a/include/net/netns/xfrm.h
++++ b/include/net/netns/xfrm.h
+@@ -50,6 +50,7 @@ struct netns_xfrm {
+ struct list_head policy_all;
+ struct hlist_head *policy_byidx;
+ unsigned int policy_idx_hmask;
++ unsigned int idx_generator;
+ struct hlist_head policy_inexact[XFRM_POLICY_MAX];
+ struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX];
+ unsigned int policy_count[XFRM_POLICY_MAX * 2];
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -1372,8 +1372,6 @@ EXPORT_SYMBOL(xfrm_policy_hash_rebuild);
+ * of an absolute inpredictability of ordering of rules. This will not pass. */
+ static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
+ {
+- static u32 idx_generator;
+-
+ for (;;) {
+ struct hlist_head *list;
+ struct xfrm_policy *p;
+@@ -1381,8 +1379,8 @@ static u32 xfrm_gen_index(struct net *ne
+ int found;
+
+ if (!index) {
+- idx = (idx_generator | dir);
+- idx_generator += 8;
++ idx = (net->xfrm.idx_generator | dir);
++ net->xfrm.idx_generator += 8;
+ } else {
+ idx = index;
+ index = 0;
--- /dev/null
+From de5724ca38fd5e442bae9c1fab31942b6544012d Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 11 Oct 2023 10:24:29 +0000
+Subject: xfrm: fix a data-race in xfrm_lookup_with_ifid()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit de5724ca38fd5e442bae9c1fab31942b6544012d upstream.
+
+syzbot complains about a race in xfrm_lookup_with_ifid() [1]
+
+When preparing commit 0a9e5794b21e ("xfrm: annotate data-race
+around use_time") I thought xfrm_lookup_with_ifid() was modifying
+a still private structure.
+
+[1]
+BUG: KCSAN: data-race in xfrm_lookup_with_ifid / xfrm_lookup_with_ifid
+
+write to 0xffff88813ea41108 of 8 bytes by task 8150 on cpu 1:
+xfrm_lookup_with_ifid+0xce7/0x12d0 net/xfrm/xfrm_policy.c:3218
+xfrm_lookup net/xfrm/xfrm_policy.c:3270 [inline]
+xfrm_lookup_route+0x3b/0x100 net/xfrm/xfrm_policy.c:3281
+ip6_dst_lookup_flow+0x98/0xc0 net/ipv6/ip6_output.c:1246
+send6+0x241/0x3c0 drivers/net/wireguard/socket.c:139
+wg_socket_send_skb_to_peer+0xbd/0x130 drivers/net/wireguard/socket.c:178
+wg_socket_send_buffer_to_peer+0xd6/0x100 drivers/net/wireguard/socket.c:200
+wg_packet_send_handshake_initiation drivers/net/wireguard/send.c:40 [inline]
+wg_packet_handshake_send_worker+0x10c/0x150 drivers/net/wireguard/send.c:51
+process_one_work kernel/workqueue.c:2630 [inline]
+process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703
+worker_thread+0x525/0x730 kernel/workqueue.c:2784
+kthread+0x1d7/0x210 kernel/kthread.c:388
+ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147
+ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
+
+write to 0xffff88813ea41108 of 8 bytes by task 15867 on cpu 0:
+xfrm_lookup_with_ifid+0xce7/0x12d0 net/xfrm/xfrm_policy.c:3218
+xfrm_lookup net/xfrm/xfrm_policy.c:3270 [inline]
+xfrm_lookup_route+0x3b/0x100 net/xfrm/xfrm_policy.c:3281
+ip6_dst_lookup_flow+0x98/0xc0 net/ipv6/ip6_output.c:1246
+send6+0x241/0x3c0 drivers/net/wireguard/socket.c:139
+wg_socket_send_skb_to_peer+0xbd/0x130 drivers/net/wireguard/socket.c:178
+wg_socket_send_buffer_to_peer+0xd6/0x100 drivers/net/wireguard/socket.c:200
+wg_packet_send_handshake_initiation drivers/net/wireguard/send.c:40 [inline]
+wg_packet_handshake_send_worker+0x10c/0x150 drivers/net/wireguard/send.c:51
+process_one_work kernel/workqueue.c:2630 [inline]
+process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703
+worker_thread+0x525/0x730 kernel/workqueue.c:2784
+kthread+0x1d7/0x210 kernel/kthread.c:388
+ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147
+ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304
+
+value changed: 0x00000000651cd9d1 -> 0x00000000651cd9d2
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 15867 Comm: kworker/u4:58 Not tainted 6.6.0-rc4-syzkaller-00016-g5e62ed3b1c8a #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/06/2023
+Workqueue: wg-kex-wg2 wg_packet_handshake_send_worker
+
+Fixes: 0a9e5794b21e ("xfrm: annotate data-race around use_time")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/xfrm/xfrm_policy.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -3215,7 +3215,7 @@ no_transform:
+ }
+
+ for (i = 0; i < num_pols; i++)
+- pols[i]->curlft.use_time = ktime_get_real_seconds();
++ WRITE_ONCE(pols[i]->curlft.use_time, ktime_get_real_seconds());
+
+ if (num_xfrms < 0) {
+ /* Prohibit the flow */
--- /dev/null
+From f7c4e3e5d4f6609b4725a97451948ca2e425379a Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 5 Sep 2023 13:23:03 +0000
+Subject: xfrm: interface: use DEV_STATS_INC()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit f7c4e3e5d4f6609b4725a97451948ca2e425379a upstream.
+
+syzbot/KCSAN reported data-races in xfrm whenever dev->stats fields
+are updated.
+
+It appears all of these updates can happen from multiple cpus.
+
+Adopt SMP safe DEV_STATS_INC() to update dev->stats fields.
+
+BUG: KCSAN: data-race in xfrmi_xmit / xfrmi_xmit
+
+read-write to 0xffff88813726b160 of 8 bytes by task 23986 on cpu 1:
+xfrmi_xmit+0x74e/0xb20 net/xfrm/xfrm_interface_core.c:583
+__netdev_start_xmit include/linux/netdevice.h:4889 [inline]
+netdev_start_xmit include/linux/netdevice.h:4903 [inline]
+xmit_one net/core/dev.c:3544 [inline]
+dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3560
+__dev_queue_xmit+0xeee/0x1de0 net/core/dev.c:4340
+dev_queue_xmit include/linux/netdevice.h:3082 [inline]
+neigh_connected_output+0x231/0x2a0 net/core/neighbour.c:1581
+neigh_output include/net/neighbour.h:542 [inline]
+ip_finish_output2+0x74a/0x850 net/ipv4/ip_output.c:230
+ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:318
+NF_HOOK_COND include/linux/netfilter.h:293 [inline]
+ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:432
+dst_output include/net/dst.h:458 [inline]
+ip_local_out net/ipv4/ip_output.c:127 [inline]
+ip_send_skb+0x72/0xe0 net/ipv4/ip_output.c:1487
+udp_send_skb+0x6a4/0x990 net/ipv4/udp.c:963
+udp_sendmsg+0x1249/0x12d0 net/ipv4/udp.c:1246
+inet_sendmsg+0x63/0x80 net/ipv4/af_inet.c:840
+sock_sendmsg_nosec net/socket.c:730 [inline]
+sock_sendmsg net/socket.c:753 [inline]
+____sys_sendmsg+0x37c/0x4d0 net/socket.c:2540
+___sys_sendmsg net/socket.c:2594 [inline]
+__sys_sendmmsg+0x269/0x500 net/socket.c:2680
+__do_sys_sendmmsg net/socket.c:2709 [inline]
+__se_sys_sendmmsg net/socket.c:2706 [inline]
+__x64_sys_sendmmsg+0x57/0x60 net/socket.c:2706
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+read-write to 0xffff88813726b160 of 8 bytes by task 23987 on cpu 0:
+xfrmi_xmit+0x74e/0xb20 net/xfrm/xfrm_interface_core.c:583
+__netdev_start_xmit include/linux/netdevice.h:4889 [inline]
+netdev_start_xmit include/linux/netdevice.h:4903 [inline]
+xmit_one net/core/dev.c:3544 [inline]
+dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3560
+__dev_queue_xmit+0xeee/0x1de0 net/core/dev.c:4340
+dev_queue_xmit include/linux/netdevice.h:3082 [inline]
+neigh_connected_output+0x231/0x2a0 net/core/neighbour.c:1581
+neigh_output include/net/neighbour.h:542 [inline]
+ip_finish_output2+0x74a/0x850 net/ipv4/ip_output.c:230
+ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:318
+NF_HOOK_COND include/linux/netfilter.h:293 [inline]
+ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:432
+dst_output include/net/dst.h:458 [inline]
+ip_local_out net/ipv4/ip_output.c:127 [inline]
+ip_send_skb+0x72/0xe0 net/ipv4/ip_output.c:1487
+udp_send_skb+0x6a4/0x990 net/ipv4/udp.c:963
+udp_sendmsg+0x1249/0x12d0 net/ipv4/udp.c:1246
+inet_sendmsg+0x63/0x80 net/ipv4/af_inet.c:840
+sock_sendmsg_nosec net/socket.c:730 [inline]
+sock_sendmsg net/socket.c:753 [inline]
+____sys_sendmsg+0x37c/0x4d0 net/socket.c:2540
+___sys_sendmsg net/socket.c:2594 [inline]
+__sys_sendmmsg+0x269/0x500 net/socket.c:2680
+__do_sys_sendmmsg net/socket.c:2709 [inline]
+__se_sys_sendmmsg net/socket.c:2706 [inline]
+__x64_sys_sendmmsg+0x57/0x60 net/socket.c:2706
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+value changed: 0x00000000000010d7 -> 0x00000000000010d8
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 23987 Comm: syz-executor.5 Not tainted 6.5.0-syzkaller-10885-g0468be89b3fa #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023
+
+Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/xfrm/xfrm_interface_core.c | 22 ++++++++++------------
+ 1 file changed, 10 insertions(+), 12 deletions(-)
+
+--- a/net/xfrm/xfrm_interface_core.c
++++ b/net/xfrm/xfrm_interface_core.c
+@@ -380,8 +380,8 @@ static int xfrmi_rcv_cb(struct sk_buff *
+ skb->dev = dev;
+
+ if (err) {
+- dev->stats.rx_errors++;
+- dev->stats.rx_dropped++;
++ DEV_STATS_INC(dev, rx_errors);
++ DEV_STATS_INC(dev, rx_dropped);
+
+ return 0;
+ }
+@@ -426,7 +426,6 @@ static int
+ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
+ {
+ struct xfrm_if *xi = netdev_priv(dev);
+- struct net_device_stats *stats = &xi->dev->stats;
+ struct dst_entry *dst = skb_dst(skb);
+ unsigned int length = skb->len;
+ struct net_device *tdev;
+@@ -473,7 +472,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct
+ tdev = dst->dev;
+
+ if (tdev == dev) {
+- stats->collisions++;
++ DEV_STATS_INC(dev, collisions);
+ net_warn_ratelimited("%s: Local routing loop detected!\n",
+ dev->name);
+ goto tx_err_dst_release;
+@@ -512,13 +511,13 @@ xmit:
+ if (net_xmit_eval(err) == 0) {
+ dev_sw_netstats_tx_add(dev, 1, length);
+ } else {
+- stats->tx_errors++;
+- stats->tx_aborted_errors++;
++ DEV_STATS_INC(dev, tx_errors);
++ DEV_STATS_INC(dev, tx_aborted_errors);
+ }
+
+ return 0;
+ tx_err_link_failure:
+- stats->tx_carrier_errors++;
++ DEV_STATS_INC(dev, tx_carrier_errors);
+ dst_link_failure(skb);
+ tx_err_dst_release:
+ dst_release(dst);
+@@ -528,7 +527,6 @@ tx_err_dst_release:
+ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+ struct xfrm_if *xi = netdev_priv(dev);
+- struct net_device_stats *stats = &xi->dev->stats;
+ struct dst_entry *dst = skb_dst(skb);
+ struct flowi fl;
+ int ret;
+@@ -545,7 +543,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_
+ dst = ip6_route_output(dev_net(dev), NULL, &fl.u.ip6);
+ if (dst->error) {
+ dst_release(dst);
+- stats->tx_carrier_errors++;
++ DEV_STATS_INC(dev, tx_carrier_errors);
+ goto tx_err;
+ }
+ skb_dst_set(skb, dst);
+@@ -561,7 +559,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_
+ fl.u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC;
+ rt = __ip_route_output_key(dev_net(dev), &fl.u.ip4);
+ if (IS_ERR(rt)) {
+- stats->tx_carrier_errors++;
++ DEV_STATS_INC(dev, tx_carrier_errors);
+ goto tx_err;
+ }
+ skb_dst_set(skb, &rt->dst);
+@@ -580,8 +578,8 @@ static netdev_tx_t xfrmi_xmit(struct sk_
+ return NETDEV_TX_OK;
+
+ tx_err:
+- stats->tx_errors++;
+- stats->tx_dropped++;
++ DEV_STATS_INC(dev, tx_errors);
++ DEV_STATS_INC(dev, tx_dropped);
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
--- /dev/null
+From cc9b364bb1d58d3dae270c7a931a8cc717dc2b3b Mon Sep 17 00:00:00 2001
+From: Zhang Changzhong <zhangchangzhong@huawei.com>
+Date: Fri, 15 Sep 2023 19:20:41 +0800
+Subject: xfrm6: fix inet6_dev refcount underflow problem
+
+From: Zhang Changzhong <zhangchangzhong@huawei.com>
+
+commit cc9b364bb1d58d3dae270c7a931a8cc717dc2b3b upstream.
+
+There are race conditions that may lead to inet6_dev refcount underflow
+in xfrm6_dst_destroy() and rt6_uncached_list_flush_dev().
+
+One of the refcount underflow bugs is shown below:
+ (cpu 1) | (cpu 2)
+xfrm6_dst_destroy() |
+ ... |
+ in6_dev_put() |
+ | rt6_uncached_list_flush_dev()
+ ... | ...
+ | in6_dev_put()
+ rt6_uncached_list_del() | ...
+ ... |
+
+xfrm6_dst_destroy() calls rt6_uncached_list_del() after in6_dev_put(),
+so rt6_uncached_list_flush_dev() has a chance to call in6_dev_put()
+again for the same inet6_dev.
+
+Fix it by moving in6_dev_put() after rt6_uncached_list_del() in
+xfrm6_dst_destroy().
+
+Fixes: 510c321b5571 ("xfrm: reuse uncached_list to track xdsts")
+Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
+Reviewed-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/xfrm6_policy.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/xfrm6_policy.c
++++ b/net/ipv6/xfrm6_policy.c
+@@ -117,10 +117,10 @@ static void xfrm6_dst_destroy(struct dst
+ {
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+
+- if (likely(xdst->u.rt6.rt6i_idev))
+- in6_dev_put(xdst->u.rt6.rt6i_idev);
+ dst_destroy_metrics_generic(dst);
+ rt6_uncached_list_del(&xdst->u.rt6);
++ if (likely(xdst->u.rt6.rt6i_idev))
++ in6_dev_put(xdst->u.rt6.rt6i_idev);
+ xfrm_dst_destroy(xdst);
+ }
+