]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.1
authorSasha Levin <sashal@kernel.org>
Sat, 26 Oct 2024 07:36:45 +0000 (03:36 -0400)
committerSasha Levin <sashal@kernel.org>
Sat, 26 Oct 2024 07:36:45 +0000 (03:36 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
23 files changed:
queue-6.1/be2net-fix-potential-memory-leak-in-be_xmit.patch [new file with mode: 0644]
queue-6.1/bluetooth-iso-fix-uaf-on-iso_sock_timeout.patch [new file with mode: 0644]
queue-6.1/bluetooth-sco-fix-uaf-on-sco_sock_timeout.patch [new file with mode: 0644]
queue-6.1/bpf-perf-fix-perf_event_detach_bpf_prog-error-handli.patch [new file with mode: 0644]
queue-6.1/docs-net-reformat-driver.rst-from-a-list-to-sections.patch [new file with mode: 0644]
queue-6.1/net-dsa-mv88e6xxx-fix-error-when-setting-port-policy.patch [new file with mode: 0644]
queue-6.1/net-fix-races-in-netdev_tx_sent_queue-dev_watchdog.patch [new file with mode: 0644]
queue-6.1/net-plip-fix-break-causing-plip-to-never-transmit.patch [new file with mode: 0644]
queue-6.1/net-provide-macros-for-commonly-copied-lockless-queu.patch [new file with mode: 0644]
queue-6.1/net-sched-act_api-deny-mismatched-skip_sw-skip_hw-fl.patch [new file with mode: 0644]
queue-6.1/net-sched-adjust-device-watchdog-timer-to-detect-sto.patch [new file with mode: 0644]
queue-6.1/net-sched-fix-use-after-free-in-taprio_change.patch [new file with mode: 0644]
queue-6.1/net-sun3_82586-fix-potential-memory-leak-in-sun3_825.patch [new file with mode: 0644]
queue-6.1/net-usb-usbnet-fix-name-regression.patch [new file with mode: 0644]
queue-6.1/net-wwan-fix-global-oob-in-wwan_rtnl_policy.patch [new file with mode: 0644]
queue-6.1/netfilter-xtables-fix-typo-causing-some-targets-not-.patch [new file with mode: 0644]
queue-6.1/octeon_ep-add-skb-allocation-failures-handling-in-__.patch [new file with mode: 0644]
queue-6.1/octeon_ep-implement-helper-for-iterating-packets-in-.patch [new file with mode: 0644]
queue-6.1/posix-clock-posix-clock-fix-unbalanced-locking-in-pc.patch [new file with mode: 0644]
queue-6.1/r8169-avoid-unsolicited-interrupts.patch [new file with mode: 0644]
queue-6.1/series
queue-6.1/xfrm-extract-dst-lookup-parameters-into-a-struct.patch [new file with mode: 0644]
queue-6.1/xfrm-respect-ip-protocols-rules-criteria-when-perfor.patch [new file with mode: 0644]

diff --git a/queue-6.1/be2net-fix-potential-memory-leak-in-be_xmit.patch b/queue-6.1/be2net-fix-potential-memory-leak-in-be_xmit.patch
new file mode 100644 (file)
index 0000000..0effdae
--- /dev/null
@@ -0,0 +1,61 @@
+From db5162c632371287150b3c8ca8fe16848b1e01e0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Oct 2024 22:48:02 +0800
+Subject: be2net: fix potential memory leak in be_xmit()
+
+From: Wang Hai <wanghai38@huawei.com>
+
+[ Upstream commit e4dd8bfe0f6a23acd305f9b892c00899089bd621 ]
+
+The be_xmit() returns NETDEV_TX_OK without freeing skb
+in case of be_xmit_enqueue() fails, add dev_kfree_skb_any() to fix it.
+
+Fixes: 760c295e0e8d ("be2net: Support for OS2BMC.")
+Signed-off-by: Wang Hai <wanghai38@huawei.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
+Message-ID: <20241015144802.12150-1-wanghai38@huawei.com>
+Signed-off-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/emulex/benet/be_main.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
+index a9e4e6464a04c..b0a85c9b952b9 100644
+--- a/drivers/net/ethernet/emulex/benet/be_main.c
++++ b/drivers/net/ethernet/emulex/benet/be_main.c
+@@ -1382,10 +1382,8 @@ static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
+       be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
+       wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
+-      if (unlikely(!wrb_cnt)) {
+-              dev_kfree_skb_any(skb);
+-              goto drop;
+-      }
++      if (unlikely(!wrb_cnt))
++              goto drop_skb;
+       /* if os2bmc is enabled and if the pkt is destined to bmc,
+        * enqueue the pkt a 2nd time with mgmt bit set.
+@@ -1394,7 +1392,7 @@ static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
+               BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
+               wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
+               if (unlikely(!wrb_cnt))
+-                      goto drop;
++                      goto drop_skb;
+               else
+                       skb_get(skb);
+       }
+@@ -1408,6 +1406,8 @@ static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
+               be_xmit_flush(adapter, txo);
+       return NETDEV_TX_OK;
++drop_skb:
++      dev_kfree_skb_any(skb);
+ drop:
+       tx_stats(txo)->tx_drv_drops++;
+       /* Flush the already enqueued tx requests */
+-- 
+2.43.0
+
diff --git a/queue-6.1/bluetooth-iso-fix-uaf-on-iso_sock_timeout.patch b/queue-6.1/bluetooth-iso-fix-uaf-on-iso_sock_timeout.patch
new file mode 100644 (file)
index 0000000..fb18f79
--- /dev/null
@@ -0,0 +1,66 @@
+From d88db45c2d7ab392e3d55c08c69ce75a432aa58f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Oct 2024 15:35:49 -0400
+Subject: Bluetooth: ISO: Fix UAF on iso_sock_timeout
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+[ Upstream commit 246b435ad668596aa0e2bbb9d491b6413861211a ]
+
+conn->sk maybe have been unlinked/freed while waiting for iso_conn_lock
+so this checks if the conn->sk is still valid by checking if it part of
+iso_sk_list.
+
+Fixes: ccf74f2390d6 ("Bluetooth: Add BTPROTO_ISO socket type")
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/iso.c | 18 ++++++++++++------
+ 1 file changed, 12 insertions(+), 6 deletions(-)
+
+diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
+index b61abddc7bd4e..27efca5dc7bbf 100644
+--- a/net/bluetooth/iso.c
++++ b/net/bluetooth/iso.c
+@@ -68,6 +68,16 @@ struct iso_pinfo {
+ #define ISO_CONN_TIMEOUT      (HZ * 40)
+ #define ISO_DISCONN_TIMEOUT   (HZ * 2)
++static struct sock *iso_sock_hold(struct iso_conn *conn)
++{
++      if (!conn || !bt_sock_linked(&iso_sk_list, conn->sk))
++              return NULL;
++
++      sock_hold(conn->sk);
++
++      return conn->sk;
++}
++
+ static void iso_sock_timeout(struct work_struct *work)
+ {
+       struct iso_conn *conn = container_of(work, struct iso_conn,
+@@ -75,9 +85,7 @@ static void iso_sock_timeout(struct work_struct *work)
+       struct sock *sk;
+       iso_conn_lock(conn);
+-      sk = conn->sk;
+-      if (sk)
+-              sock_hold(sk);
++      sk = iso_sock_hold(conn);
+       iso_conn_unlock(conn);
+       if (!sk)
+@@ -184,9 +192,7 @@ static void iso_conn_del(struct hci_conn *hcon, int err)
+       /* Kill socket */
+       iso_conn_lock(conn);
+-      sk = conn->sk;
+-      if (sk)
+-              sock_hold(sk);
++      sk = iso_sock_hold(conn);
+       iso_conn_unlock(conn);
+       if (sk) {
+-- 
+2.43.0
+
diff --git a/queue-6.1/bluetooth-sco-fix-uaf-on-sco_sock_timeout.patch b/queue-6.1/bluetooth-sco-fix-uaf-on-sco_sock_timeout.patch
new file mode 100644 (file)
index 0000000..bc72e1a
--- /dev/null
@@ -0,0 +1,116 @@
+From 5b04cab418ca4c5f8d8951556b795f6a4fbd70b1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Oct 2024 12:31:08 -0400
+Subject: Bluetooth: SCO: Fix UAF on sco_sock_timeout
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+[ Upstream commit 1bf4470a3939c678fb822073e9ea77a0560bc6bb ]
+
+conn->sk maybe have been unlinked/freed while waiting for sco_conn_lock
+so this checks if the conn->sk is still valid by checking if it part of
+sco_sk_list.
+
+Reported-by: syzbot+4c0d0c4cde787116d465@syzkaller.appspotmail.com
+Tested-by: syzbot+4c0d0c4cde787116d465@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=4c0d0c4cde787116d465
+Fixes: ba316be1b6a0 ("Bluetooth: schedule SCO timeouts with delayed_work")
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/bluetooth/bluetooth.h |  1 +
+ net/bluetooth/af_bluetooth.c      | 22 ++++++++++++++++++++++
+ net/bluetooth/sco.c               | 18 ++++++++++++------
+ 3 files changed, 35 insertions(+), 6 deletions(-)
+
+diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
+index c7f1dd34ea470..41fc7f12971a5 100644
+--- a/include/net/bluetooth/bluetooth.h
++++ b/include/net/bluetooth/bluetooth.h
+@@ -383,6 +383,7 @@ int  bt_sock_register(int proto, const struct net_proto_family *ops);
+ void bt_sock_unregister(int proto);
+ void bt_sock_link(struct bt_sock_list *l, struct sock *s);
+ void bt_sock_unlink(struct bt_sock_list *l, struct sock *s);
++bool bt_sock_linked(struct bt_sock_list *l, struct sock *s);
+ struct sock *bt_sock_alloc(struct net *net, struct socket *sock,
+                          struct proto *prot, int proto, gfp_t prio, int kern);
+ int  bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
+index 0c70172555b4f..4c3bd05160387 100644
+--- a/net/bluetooth/af_bluetooth.c
++++ b/net/bluetooth/af_bluetooth.c
+@@ -177,6 +177,28 @@ void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk)
+ }
+ EXPORT_SYMBOL(bt_sock_unlink);
++bool bt_sock_linked(struct bt_sock_list *l, struct sock *s)
++{
++      struct sock *sk;
++
++      if (!l || !s)
++              return false;
++
++      read_lock(&l->lock);
++
++      sk_for_each(sk, &l->head) {
++              if (s == sk) {
++                      read_unlock(&l->lock);
++                      return true;
++              }
++      }
++
++      read_unlock(&l->lock);
++
++      return false;
++}
++EXPORT_SYMBOL(bt_sock_linked);
++
+ void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
+ {
+       BT_DBG("parent %p, sk %p", parent, sk);
+diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
+index 92ea01f9def3e..ad5afde17213a 100644
+--- a/net/bluetooth/sco.c
++++ b/net/bluetooth/sco.c
+@@ -77,6 +77,16 @@ struct sco_pinfo {
+ #define SCO_CONN_TIMEOUT      (HZ * 40)
+ #define SCO_DISCONN_TIMEOUT   (HZ * 2)
++static struct sock *sco_sock_hold(struct sco_conn *conn)
++{
++      if (!conn || !bt_sock_linked(&sco_sk_list, conn->sk))
++              return NULL;
++
++      sock_hold(conn->sk);
++
++      return conn->sk;
++}
++
+ static void sco_sock_timeout(struct work_struct *work)
+ {
+       struct sco_conn *conn = container_of(work, struct sco_conn,
+@@ -88,9 +98,7 @@ static void sco_sock_timeout(struct work_struct *work)
+               sco_conn_unlock(conn);
+               return;
+       }
+-      sk = conn->sk;
+-      if (sk)
+-              sock_hold(sk);
++      sk = sco_sock_hold(conn);
+       sco_conn_unlock(conn);
+       if (!sk)
+@@ -195,9 +203,7 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
+       /* Kill socket */
+       sco_conn_lock(conn);
+-      sk = conn->sk;
+-      if (sk)
+-              sock_hold(sk);
++      sk = sco_sock_hold(conn);
+       sco_conn_unlock(conn);
+       if (sk) {
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-perf-fix-perf_event_detach_bpf_prog-error-handli.patch b/queue-6.1/bpf-perf-fix-perf_event_detach_bpf_prog-error-handli.patch
new file mode 100644 (file)
index 0000000..46b4883
--- /dev/null
@@ -0,0 +1,46 @@
+From c3bad56a1d490930e03355a6b37f30d7777e476e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Oct 2024 22:03:52 +0200
+Subject: bpf,perf: Fix perf_event_detach_bpf_prog error handling
+
+From: Jiri Olsa <jolsa@kernel.org>
+
+[ Upstream commit 0ee288e69d033850bc87abe0f9cc3ada24763d7f ]
+
+Peter reported that perf_event_detach_bpf_prog might skip to release
+the bpf program for -ENOENT error from bpf_prog_array_copy.
+
+This can't happen because bpf program is stored in perf event and is
+detached and released only when perf event is freed.
+
+Let's drop the -ENOENT check and make sure the bpf program is released
+in any case.
+
+Fixes: 170a7e3ea070 ("bpf: bpf_prog_array_copy() should return -ENOENT if exclude_prog not found")
+Reported-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Jiri Olsa <jolsa@kernel.org>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/bpf/20241023200352.3488610-1-jolsa@kernel.org
+
+Closes: https://lore.kernel.org/lkml/20241022111638.GC16066@noisy.programming.kicks-ass.net/
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/bpf_trace.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
+index 3fdde232eaa92..583961a9e539a 100644
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -2179,8 +2179,6 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
+       old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
+       ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array);
+-      if (ret == -ENOENT)
+-              goto unlock;
+       if (ret < 0) {
+               bpf_prog_array_delete_safe(old_array, event->prog);
+       } else {
+-- 
+2.43.0
+
diff --git a/queue-6.1/docs-net-reformat-driver.rst-from-a-list-to-sections.patch b/queue-6.1/docs-net-reformat-driver.rst-from-a-list-to-sections.patch
new file mode 100644 (file)
index 0000000..3686c49
--- /dev/null
@@ -0,0 +1,160 @@
+From 906af1927a49314dec380c27db03c391606faea1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Apr 2023 18:25:30 -0700
+Subject: docs: net: reformat driver.rst from a list to sections
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit d2f5c68e3f7157e874a759e382a5eaffa775b869 ]
+
+driver.rst had a historical form of list of common problems.
+In the age os Sphinx and rendered documentation it's better
+to use the more usual title + text format.
+
+This will allow us to render kdoc into the output more naturally.
+
+No changes to the actual text.
+
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 95ecba62e2fd ("net: fix races in netdev_tx_sent_queue()/dev_watchdog()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/networking/driver.rst | 91 ++++++++++++++++++-----------
+ 1 file changed, 56 insertions(+), 35 deletions(-)
+
+diff --git a/Documentation/networking/driver.rst b/Documentation/networking/driver.rst
+index 64f7236ff10be..3040a74d421c7 100644
+--- a/Documentation/networking/driver.rst
++++ b/Documentation/networking/driver.rst
+@@ -4,15 +4,19 @@
+ Softnet Driver Issues
+ =====================
+-Transmit path guidelines:
++Transmit path guidelines
++========================
+-1) The ndo_start_xmit method must not return NETDEV_TX_BUSY under
+-   any normal circumstances.  It is considered a hard error unless
+-   there is no way your device can tell ahead of time when its
+-   transmit function will become busy.
++Stop queues in advance
++----------------------
+-   Instead it must maintain the queue properly.  For example,
+-   for a driver implementing scatter-gather this means::
++The ndo_start_xmit method must not return NETDEV_TX_BUSY under
++any normal circumstances.  It is considered a hard error unless
++there is no way your device can tell ahead of time when its
++transmit function will become busy.
++
++Instead it must maintain the queue properly.  For example,
++for a driver implementing scatter-gather this means::
+       static netdev_tx_t drv_hard_start_xmit(struct sk_buff *skb,
+                                              struct net_device *dev)
+@@ -42,56 +46,73 @@ Transmit path guidelines:
+               return NETDEV_TX_OK;
+       }
+-   And then at the end of your TX reclamation event handling::
++And then at the end of your TX reclamation event handling::
+       if (netif_queue_stopped(dp->dev) &&
+           TX_BUFFS_AVAIL(dp) > (MAX_SKB_FRAGS + 1))
+               netif_wake_queue(dp->dev);
+-   For a non-scatter-gather supporting card, the three tests simply become::
++For a non-scatter-gather supporting card, the three tests simply become::
+               /* This is a hard error log it. */
+               if (TX_BUFFS_AVAIL(dp) <= 0)
+-   and::
++and::
+               if (TX_BUFFS_AVAIL(dp) == 0)
+-   and::
++and::
+       if (netif_queue_stopped(dp->dev) &&
+           TX_BUFFS_AVAIL(dp) > 0)
+               netif_wake_queue(dp->dev);
+-2) An ndo_start_xmit method must not modify the shared parts of a
+-   cloned SKB.
++No exclusive ownership
++----------------------
++
++An ndo_start_xmit method must not modify the shared parts of a
++cloned SKB.
++
++Timely completions
++------------------
++
++Do not forget that once you return NETDEV_TX_OK from your
++ndo_start_xmit method, it is your driver's responsibility to free
++up the SKB and in some finite amount of time.
+-3) Do not forget that once you return NETDEV_TX_OK from your
+-   ndo_start_xmit method, it is your driver's responsibility to free
+-   up the SKB and in some finite amount of time.
++For example, this means that it is not allowed for your TX
++mitigation scheme to let TX packets "hang out" in the TX
++ring unreclaimed forever if no new TX packets are sent.
++This error can deadlock sockets waiting for send buffer room
++to be freed up.
+-   For example, this means that it is not allowed for your TX
+-   mitigation scheme to let TX packets "hang out" in the TX
+-   ring unreclaimed forever if no new TX packets are sent.
+-   This error can deadlock sockets waiting for send buffer room
+-   to be freed up.
++If you return NETDEV_TX_BUSY from the ndo_start_xmit method, you
++must not keep any reference to that SKB and you must not attempt
++to free it up.
+-   If you return NETDEV_TX_BUSY from the ndo_start_xmit method, you
+-   must not keep any reference to that SKB and you must not attempt
+-   to free it up.
++Probing guidelines
++==================
+-Probing guidelines:
++Address validation
++------------------
++
++Any hardware layer address you obtain for your device should
++be verified.  For example, for ethernet check it with
++linux/etherdevice.h:is_valid_ether_addr()
++
++Close/stop guidelines
++=====================
+-1) Any hardware layer address you obtain for your device should
+-   be verified.  For example, for ethernet check it with
+-   linux/etherdevice.h:is_valid_ether_addr()
++Quiescence
++----------
+-Close/stop guidelines:
++After the ndo_stop routine has been called, the hardware must
++not receive or transmit any data.  All in flight packets must
++be aborted. If necessary, poll or wait for completion of
++any reset commands.
+-1) After the ndo_stop routine has been called, the hardware must
+-   not receive or transmit any data.  All in flight packets must
+-   be aborted. If necessary, poll or wait for completion of
+-   any reset commands.
++Auto-close
++----------
+-2) The ndo_stop routine will be called by unregister_netdevice
+-   if device is still UP.
++The ndo_stop routine will be called by unregister_netdevice
++if device is still UP.
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-dsa-mv88e6xxx-fix-error-when-setting-port-policy.patch b/queue-6.1/net-dsa-mv88e6xxx-fix-error-when-setting-port-policy.patch
new file mode 100644 (file)
index 0000000..29d96d1
--- /dev/null
@@ -0,0 +1,41 @@
+From 3939881b1bf85f5db415e87d3cac620d55272838 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Oct 2024 21:08:22 -0700
+Subject: net: dsa: mv88e6xxx: Fix error when setting port policy on mv88e6393x
+
+From: Peter Rashleigh <peter@rashleigh.ca>
+
+[ Upstream commit 12bc14949c4a7272b509af0f1022a0deeb215fd8 ]
+
+mv88e6393x_port_set_policy doesn't correctly shift the ptr value when
+converting the policy format between the old and new styles, so the
+target register ends up with the ptr being written over the data bits.
+
+Shift the pointer to align with the format expected by
+mv88e6393x_port_policy_write().
+
+Fixes: 6584b26020fc ("net: dsa: mv88e6xxx: implement .port_set_policy for Amethyst")
+Signed-off-by: Peter Rashleigh <peter@rashleigh.ca>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Message-ID: <20241016040822.3917-1-peter@rashleigh.ca>
+Signed-off-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/mv88e6xxx/port.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
+index f79cf716c541f..553f577c25a6b 100644
+--- a/drivers/net/dsa/mv88e6xxx/port.c
++++ b/drivers/net/dsa/mv88e6xxx/port.c
+@@ -1728,6 +1728,7 @@ int mv88e6393x_port_set_policy(struct mv88e6xxx_chip *chip, int port,
+       ptr = shift / 8;
+       shift %= 8;
+       mask >>= ptr * 8;
++      ptr <<= 8;
+       err = mv88e6393x_port_policy_read(chip, port, ptr, &reg);
+       if (err)
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-fix-races-in-netdev_tx_sent_queue-dev_watchdog.patch b/queue-6.1/net-fix-races-in-netdev_tx_sent_queue-dev_watchdog.patch
new file mode 100644 (file)
index 0000000..f6e62f2
--- /dev/null
@@ -0,0 +1,118 @@
+From 814fa503a2f9aa61d4bfd8d26064fd39e63d5706 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Oct 2024 19:41:18 +0000
+Subject: net: fix races in netdev_tx_sent_queue()/dev_watchdog()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 95ecba62e2fd201bcdcca636f5d774f1cd4f1458 ]
+
+Some workloads hit the infamous dev_watchdog() message:
+
+"NETDEV WATCHDOG: eth0 (xxxx): transmit queue XX timed out"
+
+It seems possible to hit this even for perfectly normal
+BQL enabled drivers:
+
+1) Assume a TX queue was idle for more than dev->watchdog_timeo
+   (5 seconds unless changed by the driver)
+
+2) Assume a big packet is sent, exceeding current BQL limit.
+
+3) Driver ndo_start_xmit() puts the packet in TX ring,
+   and netdev_tx_sent_queue() is called.
+
+4) QUEUE_STATE_STACK_XOFF could be set from netdev_tx_sent_queue()
+   before txq->trans_start has been written.
+
+5) txq->trans_start is written later, from netdev_start_xmit()
+
+    if (rc == NETDEV_TX_OK)
+          txq_trans_update(txq)
+
+dev_watchdog() running on another cpu could read the old
+txq->trans_start, and then see QUEUE_STATE_STACK_XOFF, because 5)
+did not happen yet.
+
+To solve the issue, write txq->trans_start right before one XOFF bit
+is set :
+
+- _QUEUE_STATE_DRV_XOFF from netif_tx_stop_queue()
+- __QUEUE_STATE_STACK_XOFF from netdev_tx_sent_queue()
+
+From dev_watchdog(), we have to read txq->state before txq->trans_start.
+
+Add memory barriers to enforce correct ordering.
+
+In the future, we could avoid writing over txq->trans_start for normal
+operations, and rename this field to txq->xoff_start_time.
+
+Fixes: bec251bc8b6a ("net: no longer stop all TX queues in dev_watchdog()")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Link: https://patch.msgid.link/20241015194118.3951657-1-edumazet@google.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netdevice.h | 12 ++++++++++++
+ net/sched/sch_generic.c   |  8 +++++++-
+ 2 files changed, 19 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index 8b67b266cce63..fbbd0df1106b6 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -3274,6 +3274,12 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev)
+ static __always_inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
+ {
++      /* Paired with READ_ONCE() from dev_watchdog() */
++      WRITE_ONCE(dev_queue->trans_start, jiffies);
++
++      /* This barrier is paired with smp_mb() from dev_watchdog() */
++      smp_mb__before_atomic();
++
+       /* Must be an atomic op see netif_txq_try_stop() */
+       set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
+ }
+@@ -3390,6 +3396,12 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
+       if (likely(dql_avail(&dev_queue->dql) >= 0))
+               return;
++      /* Paired with READ_ONCE() from dev_watchdog() */
++      WRITE_ONCE(dev_queue->trans_start, jiffies);
++
++      /* This barrier is paired with smp_mb() from dev_watchdog() */
++      smp_mb__before_atomic();
++
+       set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state);
+       /*
+diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
+index 7f0c8df7b63e0..b51af871a621c 100644
+--- a/net/sched/sch_generic.c
++++ b/net/sched/sch_generic.c
+@@ -511,9 +511,15 @@ static void dev_watchdog(struct timer_list *t)
+                               struct netdev_queue *txq;
+                               txq = netdev_get_tx_queue(dev, i);
+-                              trans_start = READ_ONCE(txq->trans_start);
+                               if (!netif_xmit_stopped(txq))
+                                       continue;
++
++                              /* Paired with WRITE_ONCE() + smp_mb...() in
++                               * netdev_tx_sent_queue() and netif_tx_stop_queue().
++                               */
++                              smp_mb();
++                              trans_start = READ_ONCE(txq->trans_start);
++
+                               if (time_after(jiffies, trans_start + dev->watchdog_timeo)) {
+                                       timedout_ms = jiffies_to_msecs(jiffies - trans_start);
+                                       atomic_long_inc(&txq->trans_timeout);
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-plip-fix-break-causing-plip-to-never-transmit.patch b/queue-6.1/net-plip-fix-break-causing-plip-to-never-transmit.patch
new file mode 100644 (file)
index 0000000..c5bdcdf
--- /dev/null
@@ -0,0 +1,45 @@
+From 51ee1b000ff35a75e66c5be7090f329eb1d41b3e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Oct 2024 17:16:04 +0200
+Subject: net: plip: fix break; causing plip to never transmit
+
+From: Jakub Boehm <boehm.jakub@gmail.com>
+
+[ Upstream commit f99cf996ba5a315f8b9f13cc21dff0604a0eb749 ]
+
+Since commit
+  71ae2cb30531 ("net: plip: Fix fall-through warnings for Clang")
+
+plip was not able to send any packets, this patch replaces one
+unintended break; with fallthrough; which was originally missed by
+commit 9525d69a3667 ("net: plip: mark expected switch fall-throughs").
+
+I have verified with a real hardware PLIP connection that everything
+works once again after applying this patch.
+
+Fixes: 71ae2cb30531 ("net: plip: Fix fall-through warnings for Clang")
+Signed-off-by: Jakub Boehm <boehm.jakub@gmail.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Message-ID: <20241015-net-plip-tx-fix-v1-1-32d8be1c7e0b@gmail.com>
+Signed-off-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/plip/plip.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c
+index 40ce8abe69995..6019811920a44 100644
+--- a/drivers/net/plip/plip.c
++++ b/drivers/net/plip/plip.c
+@@ -815,7 +815,7 @@ plip_send_packet(struct net_device *dev, struct net_local *nl,
+                               return HS_TIMEOUT;
+                       }
+               }
+-              break;
++              fallthrough;
+       case PLIP_PK_LENGTH_LSB:
+               if (plip_send(nibble_timeout, dev,
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-provide-macros-for-commonly-copied-lockless-queu.patch b/queue-6.1/net-provide-macros-for-commonly-copied-lockless-queu.patch
new file mode 100644 (file)
index 0000000..ff97e3c
--- /dev/null
@@ -0,0 +1,216 @@
+From c7f15a3dbf838e111b178494d30641adab0517c1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Apr 2023 18:25:33 -0700
+Subject: net: provide macros for commonly copied lockless queue stop/wake code
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit c91c46de6bbc1147ae5dfe046b87f5f3d6593215 ]
+
+A lot of drivers follow the same scheme to stop / start queues
+without introducing locks between xmit and NAPI tx completions.
+I'm guessing they all copy'n'paste each other's code.
+The original code dates back all the way to e1000 and Linux 2.6.19.
+
+Smaller drivers shy away from the scheme and introduce a lock
+which may cause deadlocks in netpoll.
+
+Provide macros which encapsulate the necessary logic.
+
+The macros do not prevent false wake ups, the extra barrier
+required to close that race is not worth it. See discussion in:
+https://lore.kernel.org/all/c39312a2-4537-14b4-270c-9fe1fbb91e89@gmail.com/
+
+Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 95ecba62e2fd ("net: fix races in netdev_tx_sent_queue()/dev_watchdog()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/networking/driver.rst |   6 ++
+ include/linux/netdevice.h           |   1 +
+ include/net/netdev_queues.h         | 144 ++++++++++++++++++++++++++++
+ 3 files changed, 151 insertions(+)
+ create mode 100644 include/net/netdev_queues.h
+
+diff --git a/Documentation/networking/driver.rst b/Documentation/networking/driver.rst
+index 3040a74d421c7..870f933e4a1a1 100644
+--- a/Documentation/networking/driver.rst
++++ b/Documentation/networking/driver.rst
+@@ -67,6 +67,12 @@ and::
+           TX_BUFFS_AVAIL(dp) > 0)
+               netif_wake_queue(dp->dev);
++Lockless queue stop / wake helper macros
++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
++
++.. kernel-doc:: include/net/netdev_queues.h
++   :doc: Lockless queue stopping / waking helpers.
++
+ No exclusive ownership
+ ----------------------
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index 0373e09359905..8b67b266cce63 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -3274,6 +3274,7 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev)
+ static __always_inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
+ {
++      /* Must be an atomic op see netif_txq_try_stop() */
+       set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
+ }
+diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
+new file mode 100644
+index 0000000000000..5236d78bbdebb
+--- /dev/null
++++ b/include/net/netdev_queues.h
+@@ -0,0 +1,144 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef _LINUX_NET_QUEUES_H
++#define _LINUX_NET_QUEUES_H
++
++#include <linux/netdevice.h>
++
++/**
++ * DOC: Lockless queue stopping / waking helpers.
++ *
++ * The netif_txq_maybe_stop() and __netif_txq_completed_wake()
++ * macros are designed to safely implement stopping
++ * and waking netdev queues without full lock protection.
++ *
++ * We assume that there can be no concurrent stop attempts and no concurrent
++ * wake attempts. The try-stop should happen from the xmit handler,
++ * while wake up should be triggered from NAPI poll context.
++ * The two may run concurrently (single producer, single consumer).
++ *
++ * The try-stop side is expected to run from the xmit handler and therefore
++ * it does not reschedule Tx (netif_tx_start_queue() instead of
++ * netif_tx_wake_queue()). Uses of the ``stop`` macros outside of the xmit
++ * handler may lead to xmit queue being enabled but not run.
++ * The waking side does not have similar context restrictions.
++ *
++ * The macros guarantee that rings will not remain stopped if there's
++ * space available, but they do *not* prevent false wake ups when
++ * the ring is full! Drivers should check for ring full at the start
++ * for the xmit handler.
++ *
++ * All descriptor ring indexes (and other relevant shared state) must
++ * be updated before invoking the macros.
++ */
++
++#define netif_txq_try_stop(txq, get_desc, start_thrs)                 \
++      ({                                                              \
++              int _res;                                               \
++                                                                      \
++              netif_tx_stop_queue(txq);                               \
++              /* Producer index and stop bit must be visible          \
++               * to consumer before we recheck.                       \
++               * Pairs with a barrier in __netif_txq_maybe_wake().    \
++               */                                                     \
++              smp_mb__after_atomic();                                 \
++                                                                      \
++              /* We need to check again in a case another             \
++               * CPU has just made room available.                    \
++               */                                                     \
++              _res = 0;                                               \
++              if (unlikely(get_desc >= start_thrs)) {                 \
++                      netif_tx_start_queue(txq);                      \
++                      _res = -1;                                      \
++              }                                                       \
++              _res;                                                   \
++      })                                                              \
++
++/**
++ * netif_txq_maybe_stop() - locklessly stop a Tx queue, if needed
++ * @txq:      struct netdev_queue to stop/start
++ * @get_desc: get current number of free descriptors (see requirements below!)
++ * @stop_thrs:        minimal number of available descriptors for queue to be left
++ *            enabled
++ * @start_thrs:       minimal number of descriptors to re-enable the queue, can be
++ *            equal to @stop_thrs or higher to avoid frequent waking
++ *
++ * All arguments may be evaluated multiple times, beware of side effects.
++ * @get_desc must be a formula or a function call, it must always
++ * return up-to-date information when evaluated!
++ * Expected to be used from ndo_start_xmit, see the comment on top of the file.
++ *
++ * Returns:
++ *     0 if the queue was stopped
++ *     1 if the queue was left enabled
++ *    -1 if the queue was re-enabled (raced with waking)
++ */
++#define netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs)    \
++      ({                                                              \
++              int _res;                                               \
++                                                                      \
++              _res = 1;                                               \
++              if (unlikely(get_desc < stop_thrs))                     \
++                      _res = netif_txq_try_stop(txq, get_desc, start_thrs); \
++              _res;                                                   \
++      })                                                              \
++
++
++/**
++ * __netif_txq_maybe_wake() - locklessly wake a Tx queue, if needed
++ * @txq:      struct netdev_queue to stop/start
++ * @get_desc: get current number of free descriptors (see requirements below!)
++ * @start_thrs:       minimal number of descriptors to re-enable the queue
++ * @down_cond:        down condition, predicate indicating that the queue should
++ *            not be woken up even if descriptors are available
++ *
++ * All arguments may be evaluated multiple times.
++ * @get_desc must be a formula or a function call, it must always
++ * return up-to-date information when evaluated!
++ *
++ * Returns:
++ *     0 if the queue was woken up
++ *     1 if the queue was already enabled (or disabled but @down_cond is true)
++ *    -1 if the queue was left unchanged (@start_thrs not reached)
++ */
++#define __netif_txq_maybe_wake(txq, get_desc, start_thrs, down_cond)  \
++      ({                                                              \
++              int _res;                                               \
++                                                                      \
++              _res = -1;                                              \
++              if (likely(get_desc > start_thrs)) {                    \
++                      /* Make sure that anybody stopping the queue after \
++                       * this sees the new next_to_clean.             \
++                       */                                             \
++                      smp_mb();                                       \
++                      _res = 1;                                       \
++                      if (unlikely(netif_tx_queue_stopped(txq)) &&    \
++                          !(down_cond)) {                             \
++                              netif_tx_wake_queue(txq);               \
++                              _res = 0;                               \
++                      }                                               \
++              }                                                       \
++              _res;                                                   \
++      })
++
++#define netif_txq_maybe_wake(txq, get_desc, start_thrs)               \
++      __netif_txq_maybe_wake(txq, get_desc, start_thrs, false)
++
++/* subqueue variants follow */
++
++#define netif_subqueue_try_stop(dev, idx, get_desc, start_thrs)               \
++      ({                                                              \
++              struct netdev_queue *txq;                               \
++                                                                      \
++              txq = netdev_get_tx_queue(dev, idx);                    \
++              netif_txq_try_stop(txq, get_desc, start_thrs);          \
++      })
++
++#define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \
++      ({                                                              \
++              struct netdev_queue *txq;                               \
++                                                                      \
++              txq = netdev_get_tx_queue(dev, idx);                    \
++              netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs); \
++      })
++
++#endif
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-sched-act_api-deny-mismatched-skip_sw-skip_hw-fl.patch b/queue-6.1/net-sched-act_api-deny-mismatched-skip_sw-skip_hw-fl.patch
new file mode 100644 (file)
index 0000000..b8ca318
--- /dev/null
@@ -0,0 +1,135 @@
+From 1e39b731c8c085ef04cc304f7e53f340b87e3d6b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Oct 2024 19:10:48 +0300
+Subject: net/sched: act_api: deny mismatched skip_sw/skip_hw flags for actions
+ created by classifiers
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit 34d35b4edbbe890a91bec939bfd29ad92517a52b ]
+
+tcf_action_init() has logic for checking mismatches between action and
+filter offload flags (skip_sw/skip_hw). AFAIU, this is intended to run
+on the transition between the new tc_act_bind(flags) returning true (aka
+now gets bound to classifier) and tc_act_bind(act->tcfa_flags) returning
+false (aka action was not bound to classifier before). Otherwise, the
+check is skipped.
+
+For the case where an action is not standalone, but rather it was
+created by a classifier and is bound to it, tcf_action_init() skips the
+check entirely, and this means it allows mismatched flags to occur.
+
+Taking the matchall classifier code path as an example (with mirred as
+an action), the reason is the following:
+
+ 1 | mall_change()
+ 2 | -> mall_replace_hw_filter()
+ 3 |   -> tcf_exts_validate_ex()
+ 4 |      -> flags |= TCA_ACT_FLAGS_BIND;
+ 5 |      -> tcf_action_init()
+ 6 |         -> tcf_action_init_1()
+ 7 |            -> a_o->init()
+ 8 |               -> tcf_mirred_init()
+ 9 |                  -> tcf_idr_create_from_flags()
+10 |                     -> tcf_idr_create()
+11 |                        -> p->tcfa_flags = flags;
+12 |         -> tc_act_bind(flags))
+13 |         -> tc_act_bind(act->tcfa_flags)
+
+When invoked from tcf_exts_validate_ex() like matchall does (but other
+classifiers validate their extensions as well), tcf_action_init() runs
+in a call path where "flags" always contains TCA_ACT_FLAGS_BIND (set by
+line 4). So line 12 is always true, and line 13 is always true as well.
+No transition ever takes place, and the check is skipped.
+
+The code was added in this form in commit c86e0209dc77 ("flow_offload:
+validate flags of filter and actions"), but I'm attributing the blame
+even earlier in that series, to when TCA_ACT_FLAGS_SKIP_HW and
+TCA_ACT_FLAGS_SKIP_SW were added to the UAPI.
+
+Following the development process of this change, the check did not
+always exist in this form. A change took place between v3 [1] and v4 [2],
+AFAIU due to review feedback that it doesn't make sense for action flags
+to be different than classifier flags. I think I agree with that
+feedback, but it was translated into code that omits enforcing this for
+"classic" actions created at the same time with the filters themselves.
+
+There are 3 more important cases to discuss. First there is this command:
+
+$ tc qdisc add dev eth0 clasct
+$ tc filter add dev eth0 ingress matchall skip_sw \
+       action mirred ingress mirror dev eth1
+
+which should be allowed, because prior to the concept of dedicated
+action flags, it used to work and it used to mean the action inherited
+the skip_sw/skip_hw flags from the classifier. It's not a mismatch.
+
+Then we have this command:
+
+$ tc qdisc add dev eth0 clasct
+$ tc filter add dev eth0 ingress matchall skip_sw \
+       action mirred ingress mirror dev eth1 skip_hw
+
+where there is a mismatch and it should be rejected.
+
+Finally, we have:
+
+$ tc qdisc add dev eth0 clasct
+$ tc filter add dev eth0 ingress matchall skip_sw \
+       action mirred ingress mirror dev eth1 skip_sw
+
+where the offload flags coincide, and this should be treated the same as
+the first command based on inheritance, and accepted.
+
+[1]: https://lore.kernel.org/netdev/20211028110646.13791-9-simon.horman@corigine.com/
+[2]: https://lore.kernel.org/netdev/20211118130805.23897-10-simon.horman@corigine.com/
+Fixes: 7adc57651211 ("flow_offload: add skip_hw and skip_sw to control if offload the action")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Tested-by: Ido Schimmel <idosch@nvidia.com>
+Link: https://patch.msgid.link/20241017161049.3570037-1-vladimir.oltean@nxp.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/act_api.c | 23 ++++++++++++++++++++++-
+ 1 file changed, 22 insertions(+), 1 deletion(-)
+
+diff --git a/net/sched/act_api.c b/net/sched/act_api.c
+index 5a361deb804a3..05bd1e9bca36a 100644
+--- a/net/sched/act_api.c
++++ b/net/sched/act_api.c
+@@ -1493,8 +1493,29 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
+                       bool skip_sw = tc_skip_sw(fl_flags);
+                       bool skip_hw = tc_skip_hw(fl_flags);
+-                      if (tc_act_bind(act->tcfa_flags))
++                      if (tc_act_bind(act->tcfa_flags)) {
++                              /* Action is created by classifier and is not
++                               * standalone. Check that the user did not set
++                               * any action flags different than the
++                               * classifier flags, and inherit the flags from
++                               * the classifier for the compatibility case
++                               * where no flags were specified at all.
++                               */
++                              if ((tc_act_skip_sw(act->tcfa_flags) && !skip_sw) ||
++                                  (tc_act_skip_hw(act->tcfa_flags) && !skip_hw)) {
++                                      NL_SET_ERR_MSG(extack,
++                                                     "Mismatch between action and filter offload flags");
++                                      err = -EINVAL;
++                                      goto err;
++                              }
++                              if (skip_sw)
++                                      act->tcfa_flags |= TCA_ACT_FLAGS_SKIP_SW;
++                              if (skip_hw)
++                                      act->tcfa_flags |= TCA_ACT_FLAGS_SKIP_HW;
+                               continue;
++                      }
++
++                      /* Action is standalone */
+                       if (skip_sw != tc_act_skip_sw(act->tcfa_flags) ||
+                           skip_hw != tc_act_skip_hw(act->tcfa_flags)) {
+                               NL_SET_ERR_MSG(extack,
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-sched-adjust-device-watchdog-timer-to-detect-sto.patch b/queue-6.1/net-sched-adjust-device-watchdog-timer-to-detect-sto.patch
new file mode 100644 (file)
index 0000000..77fe032
--- /dev/null
@@ -0,0 +1,72 @@
+From bde764eabf47948525f388a54af5c6eb5ff90ac0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 8 May 2024 19:06:17 +0530
+Subject: net/sched: adjust device watchdog timer to detect stopped queue at
+ right time
+
+From: Praveen Kumar Kannoju <praveen.kannoju@oracle.com>
+
+[ Upstream commit 33fb988b67050d9bb512f77f08453fa00088943c ]
+
+Applications are sensitive to long network latency, particularly
+heartbeat monitoring ones. Longer the tx timeout recovery higher the
+risk with such applications on a production machines. This patch
+remedies, yet honoring device set tx timeout.
+
+Modify watchdog next timeout to be shorter than the device specified.
+Compute the next timeout be equal to device watchdog timeout less the
+how long ago queue stop had been done. At next watchdog timeout tx
+timeout handler is called into if still in stopped state. Either called
+or not called, restore the watchdog timeout back to device specified.
+
+Signed-off-by: Praveen Kumar Kannoju <praveen.kannoju@oracle.com>
+Link: https://lore.kernel.org/r/20240508133617.4424-1-praveen.kannoju@oracle.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 95ecba62e2fd ("net: fix races in netdev_tx_sent_queue()/dev_watchdog()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_generic.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
+index 6ab9359c1706f..7f0c8df7b63e0 100644
+--- a/net/sched/sch_generic.c
++++ b/net/sched/sch_generic.c
+@@ -505,19 +505,22 @@ static void dev_watchdog(struct timer_list *t)
+                       unsigned int timedout_ms = 0;
+                       unsigned int i;
+                       unsigned long trans_start;
++                      unsigned long oldest_start = jiffies;
+                       for (i = 0; i < dev->num_tx_queues; i++) {
+                               struct netdev_queue *txq;
+                               txq = netdev_get_tx_queue(dev, i);
+                               trans_start = READ_ONCE(txq->trans_start);
+-                              if (netif_xmit_stopped(txq) &&
+-                                  time_after(jiffies, (trans_start +
+-                                                       dev->watchdog_timeo))) {
++                              if (!netif_xmit_stopped(txq))
++                                      continue;
++                              if (time_after(jiffies, trans_start + dev->watchdog_timeo)) {
+                                       timedout_ms = jiffies_to_msecs(jiffies - trans_start);
+                                       atomic_long_inc(&txq->trans_timeout);
+                                       break;
+                               }
++                              if (time_after(oldest_start, trans_start))
++                                      oldest_start = trans_start;
+                       }
+                       if (unlikely(timedout_ms)) {
+@@ -530,7 +533,7 @@ static void dev_watchdog(struct timer_list *t)
+                               netif_unfreeze_queues(dev);
+                       }
+                       if (!mod_timer(&dev->watchdog_timer,
+-                                     round_jiffies(jiffies +
++                                     round_jiffies(oldest_start +
+                                                    dev->watchdog_timeo)))
+                               release = false;
+               }
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-sched-fix-use-after-free-in-taprio_change.patch b/queue-6.1/net-sched-fix-use-after-free-in-taprio_change.patch
new file mode 100644 (file)
index 0000000..7fb7628
--- /dev/null
@@ -0,0 +1,45 @@
+From eb15215539e3dac0d80f7a85b8e3bc3978b57ec8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Oct 2024 08:13:38 +0300
+Subject: net: sched: fix use-after-free in taprio_change()
+
+From: Dmitry Antipov <dmantipov@yandex.ru>
+
+[ Upstream commit f504465970aebb2467da548f7c1efbbf36d0f44b ]
+
+In 'taprio_change()', 'admin' pointer may become dangling due to sched
+switch / removal caused by 'advance_sched()', and critical section
+protected by 'q->current_entry_lock' is too small to prevent from such
+a scenario (which causes use-after-free detected by KASAN). Fix this
+by prefer 'rcu_replace_pointer()' over 'rcu_assign_pointer()' to update
+'admin' immediately before an attempt to schedule freeing.
+
+Fixes: a3d43c0d56f1 ("taprio: Add support adding an admin schedule")
+Reported-by: syzbot+b65e0af58423fc8a73aa@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=b65e0af58423fc8a73aa
+Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru>
+Link: https://patch.msgid.link/20241018051339.418890-1-dmantipov@yandex.ru
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_taprio.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
+index 07f6f5343dd71..212fef2b72f50 100644
+--- a/net/sched/sch_taprio.c
++++ b/net/sched/sch_taprio.c
+@@ -1681,7 +1681,8 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
+               taprio_start_sched(sch, start, new_admin);
+-              rcu_assign_pointer(q->admin_sched, new_admin);
++              admin = rcu_replace_pointer(q->admin_sched, new_admin,
++                                          lockdep_rtnl_is_held());
+               if (admin)
+                       call_rcu(&admin->rcu, taprio_free_sched_cb);
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-sun3_82586-fix-potential-memory-leak-in-sun3_825.patch b/queue-6.1/net-sun3_82586-fix-potential-memory-leak-in-sun3_825.patch
new file mode 100644 (file)
index 0000000..f720560
--- /dev/null
@@ -0,0 +1,37 @@
+From e9e5f21b0711a747ccaedbd37e6a0083957bf69b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Oct 2024 22:41:48 +0800
+Subject: net/sun3_82586: fix potential memory leak in sun3_82586_send_packet()
+
+From: Wang Hai <wanghai38@huawei.com>
+
+[ Upstream commit 2cb3f56e827abb22c4168ad0c1bbbf401bb2f3b8 ]
+
+The sun3_82586_send_packet() returns NETDEV_TX_OK without freeing skb
+in case of skb->len being too long, add dev_kfree_skb() to fix it.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Wang Hai <wanghai38@huawei.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Message-ID: <20241015144148.7918-1-wanghai38@huawei.com>
+Signed-off-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/i825xx/sun3_82586.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c
+index 72d3b5328ebb4..54c83e66bf78b 100644
+--- a/drivers/net/ethernet/i825xx/sun3_82586.c
++++ b/drivers/net/ethernet/i825xx/sun3_82586.c
+@@ -1011,6 +1011,7 @@ sun3_82586_send_packet(struct sk_buff *skb, struct net_device *dev)
+       if(skb->len > XMIT_BUFF_SIZE)
+       {
+               printk("%s: Sorry, max. framelength is %d bytes. The length of your frame is %d bytes.\n",dev->name,XMIT_BUFF_SIZE,skb->len);
++              dev_kfree_skb(skb);
+               return NETDEV_TX_OK;
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-usb-usbnet-fix-name-regression.patch b/queue-6.1/net-usb-usbnet-fix-name-regression.patch
new file mode 100644 (file)
index 0000000..f04eb1f
--- /dev/null
@@ -0,0 +1,46 @@
+From 06db4f5d113ac9fbf69a9a731199b552e0ece928 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Oct 2024 09:18:37 +0200
+Subject: net: usb: usbnet: fix name regression
+
+From: Oliver Neukum <oneukum@suse.com>
+
+[ Upstream commit 8a7d12d674ac6f2147c18f36d1e15f1a48060edf ]
+
+The fix for MAC addresses broke detection of the naming convention
+because it gave network devices no random MAC before bind()
+was called. This means that the check for the local assignment bit
+was always negative as the address was zeroed from allocation,
+instead of from overwriting the MAC with a unique hardware address.
+
+The correct check for whether bind() has altered the MAC is
+done with is_zero_ether_addr
+
+Signed-off-by: Oliver Neukum <oneukum@suse.com>
+Reported-by: Greg Thelen <gthelen@google.com>
+Diagnosed-by: John Sperbeck <jsperbeck@google.com>
+Fixes: bab8eb0dd4cb9 ("usbnet: modern method to get random MAC")
+Link: https://patch.msgid.link/20241017071849.389636-1-oneukum@suse.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/usbnet.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
+index ce587a12b894c..ae1282487b02a 100644
+--- a/drivers/net/usb/usbnet.c
++++ b/drivers/net/usb/usbnet.c
+@@ -1766,7 +1766,8 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
+               // can rename the link if it knows better.
+               if ((dev->driver_info->flags & FLAG_ETHER) != 0 &&
+                   ((dev->driver_info->flags & FLAG_POINTTOPOINT) == 0 ||
+-                   (net->dev_addr [0] & 0x02) == 0))
++                   /* somebody touched it*/
++                   !is_zero_ether_addr(net->dev_addr)))
+                       strscpy(net->name, "eth%d", sizeof(net->name));
+               /* WLAN devices should always be named "wlan%d" */
+               if ((dev->driver_info->flags & FLAG_WLAN) != 0)
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-wwan-fix-global-oob-in-wwan_rtnl_policy.patch b/queue-6.1/net-wwan-fix-global-oob-in-wwan_rtnl_policy.patch
new file mode 100644 (file)
index 0000000..8d4373f
--- /dev/null
@@ -0,0 +1,107 @@
+From 732427f87e1023f023e6b8bd5c016944db1bd104 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Oct 2024 21:16:21 +0800
+Subject: net: wwan: fix global oob in wwan_rtnl_policy
+
+From: Lin Ma <linma@zju.edu.cn>
+
+[ Upstream commit 47dd5447cab8ce30a847a0337d5341ae4c7476a7 ]
+
+The variable wwan_rtnl_link_ops assign a *bigger* maxtype which leads to
+a global out-of-bounds read when parsing the netlink attributes. Exactly
+same bug cause as the oob fixed in commit b33fb5b801c6 ("net: qualcomm:
+rmnet: fix global oob in rmnet_policy").
+
+==================================================================
+BUG: KASAN: global-out-of-bounds in validate_nla lib/nlattr.c:388 [inline]
+BUG: KASAN: global-out-of-bounds in __nla_validate_parse+0x19d7/0x29a0 lib/nlattr.c:603
+Read of size 1 at addr ffffffff8b09cb60 by task syz.1.66276/323862
+
+CPU: 0 PID: 323862 Comm: syz.1.66276 Not tainted 6.1.70 #1
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014
+Call Trace:
+ <TASK>
+ __dump_stack lib/dump_stack.c:88 [inline]
+ dump_stack_lvl+0x177/0x231 lib/dump_stack.c:106
+ print_address_description mm/kasan/report.c:284 [inline]
+ print_report+0x14f/0x750 mm/kasan/report.c:395
+ kasan_report+0x139/0x170 mm/kasan/report.c:495
+ validate_nla lib/nlattr.c:388 [inline]
+ __nla_validate_parse+0x19d7/0x29a0 lib/nlattr.c:603
+ __nla_parse+0x3c/0x50 lib/nlattr.c:700
+ nla_parse_nested_deprecated include/net/netlink.h:1269 [inline]
+ __rtnl_newlink net/core/rtnetlink.c:3514 [inline]
+ rtnl_newlink+0x7bc/0x1fd0 net/core/rtnetlink.c:3623
+ rtnetlink_rcv_msg+0x794/0xef0 net/core/rtnetlink.c:6122
+ netlink_rcv_skb+0x1de/0x420 net/netlink/af_netlink.c:2508
+ netlink_unicast_kernel net/netlink/af_netlink.c:1326 [inline]
+ netlink_unicast+0x74b/0x8c0 net/netlink/af_netlink.c:1352
+ netlink_sendmsg+0x882/0xb90 net/netlink/af_netlink.c:1874
+ sock_sendmsg_nosec net/socket.c:716 [inline]
+ __sock_sendmsg net/socket.c:728 [inline]
+ ____sys_sendmsg+0x5cc/0x8f0 net/socket.c:2499
+ ___sys_sendmsg+0x21c/0x290 net/socket.c:2553
+ __sys_sendmsg net/socket.c:2582 [inline]
+ __do_sys_sendmsg net/socket.c:2591 [inline]
+ __se_sys_sendmsg+0x19e/0x270 net/socket.c:2589
+ do_syscall_x64 arch/x86/entry/common.c:51 [inline]
+ do_syscall_64+0x45/0x90 arch/x86/entry/common.c:81
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+RIP: 0033:0x7f67b19a24ad
+RSP: 002b:00007f67b17febb8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+RAX: ffffffffffffffda RBX: 00007f67b1b45f80 RCX: 00007f67b19a24ad
+RDX: 0000000000000000 RSI: 0000000020005e40 RDI: 0000000000000004
+RBP: 00007f67b1a1e01d R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
+R13: 00007ffd2513764f R14: 00007ffd251376e0 R15: 00007f67b17fed40
+ </TASK>
+
+The buggy address belongs to the variable:
+ wwan_rtnl_policy+0x20/0x40
+
+The buggy address belongs to the physical page:
+page:ffffea00002c2700 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0xb09c
+flags: 0xfff00000001000(reserved|node=0|zone=1|lastcpupid=0x7ff)
+raw: 00fff00000001000 ffffea00002c2708 ffffea00002c2708 0000000000000000
+raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000
+page dumped because: kasan: bad access detected
+page_owner info is not present (never set?)
+
+Memory state around the buggy address:
+ ffffffff8b09ca00: 05 f9 f9 f9 05 f9 f9 f9 00 01 f9 f9 00 01 f9 f9
+ ffffffff8b09ca80: 00 00 00 05 f9 f9 f9 f9 00 00 03 f9 f9 f9 f9 f9
+>ffffffff8b09cb00: 00 00 00 00 05 f9 f9 f9 00 00 00 00 f9 f9 f9 f9
+                                                       ^
+ ffffffff8b09cb80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+==================================================================
+
+According to the comment of `nla_parse_nested_deprecated`, use correct size
+`IFLA_WWAN_MAX` here to fix this issue.
+
+Fixes: 88b710532e53 ("wwan: add interface creation support")
+Signed-off-by: Lin Ma <linma@zju.edu.cn>
+Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20241015131621.47503-1-linma@zju.edu.cn
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wwan/wwan_core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c
+index 62e9f7d6c9fe9..7089cb103885d 100644
+--- a/drivers/net/wwan/wwan_core.c
++++ b/drivers/net/wwan/wwan_core.c
+@@ -994,7 +994,7 @@ static const struct nla_policy wwan_rtnl_policy[IFLA_WWAN_MAX + 1] = {
+ static struct rtnl_link_ops wwan_rtnl_link_ops __read_mostly = {
+       .kind = "wwan",
+-      .maxtype = __IFLA_WWAN_MAX,
++      .maxtype = IFLA_WWAN_MAX,
+       .alloc = wwan_rtnl_alloc,
+       .validate = wwan_rtnl_validate,
+       .newlink = wwan_rtnl_newlink,
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-xtables-fix-typo-causing-some-targets-not-.patch b/queue-6.1/netfilter-xtables-fix-typo-causing-some-targets-not-.patch
new file mode 100644 (file)
index 0000000..ee4c7fc
--- /dev/null
@@ -0,0 +1,74 @@
+From cf1c77ac09349a33653ad5d9e49b619745dabf32 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 20 Oct 2024 14:49:51 +0200
+Subject: netfilter: xtables: fix typo causing some targets not to load on IPv6
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 306ed1728e8438caed30332e1ab46b28c25fe3d8 ]
+
+- There is no NFPROTO_IPV6 family for mark and NFLOG.
+- TRACE is also missing module autoload with NFPROTO_IPV6.
+
+This results in ip6tables failing to restore a ruleset. This issue has been
+reported by several users providing incomplete patches.
+
+Very similar to Ilya Katsnelson's patch including a missing chunk in the
+TRACE extension.
+
+Fixes: 0bfcb7b71e73 ("netfilter: xtables: avoid NFPROTO_UNSPEC where needed")
+Reported-by: Ignat Korchagin <ignat@cloudflare.com>
+Reported-by: Ilya Katsnelson <me@0upti.me>
+Reported-by: Krzysztof OlÄ™dzki <ole@ans.pl>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/xt_NFLOG.c | 2 +-
+ net/netfilter/xt_TRACE.c | 1 +
+ net/netfilter/xt_mark.c  | 2 +-
+ 3 files changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
+index d80abd6ccaf8f..6dcf4bc7e30b2 100644
+--- a/net/netfilter/xt_NFLOG.c
++++ b/net/netfilter/xt_NFLOG.c
+@@ -79,7 +79,7 @@ static struct xt_target nflog_tg_reg[] __read_mostly = {
+       {
+               .name       = "NFLOG",
+               .revision   = 0,
+-              .family     = NFPROTO_IPV4,
++              .family     = NFPROTO_IPV6,
+               .checkentry = nflog_tg_check,
+               .destroy    = nflog_tg_destroy,
+               .target     = nflog_tg,
+diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
+index f3fa4f11348cd..a642ff09fc8e8 100644
+--- a/net/netfilter/xt_TRACE.c
++++ b/net/netfilter/xt_TRACE.c
+@@ -49,6 +49,7 @@ static struct xt_target trace_tg_reg[] __read_mostly = {
+               .target         = trace_tg,
+               .checkentry     = trace_tg_check,
+               .destroy        = trace_tg_destroy,
++              .me             = THIS_MODULE,
+       },
+ #endif
+ };
+diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
+index f76fe04fc9a4e..65b965ca40ea7 100644
+--- a/net/netfilter/xt_mark.c
++++ b/net/netfilter/xt_mark.c
+@@ -62,7 +62,7 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
+       {
+               .name           = "MARK",
+               .revision       = 2,
+-              .family         = NFPROTO_IPV4,
++              .family         = NFPROTO_IPV6,
+               .target         = mark_tg,
+               .targetsize     = sizeof(struct xt_mark_tginfo2),
+               .me             = THIS_MODULE,
+-- 
+2.43.0
+
diff --git a/queue-6.1/octeon_ep-add-skb-allocation-failures-handling-in-__.patch b/queue-6.1/octeon_ep-add-skb-allocation-failures-handling-in-__.patch
new file mode 100644 (file)
index 0000000..a9f617f
--- /dev/null
@@ -0,0 +1,84 @@
+From 6ff9546d7d7dc85d1acc1345050ad7bbcb64aa5c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Oct 2024 13:06:51 +0300
+Subject: octeon_ep: Add SKB allocation failures handling in
+ __octep_oq_process_rx()
+
+From: Aleksandr Mishin <amishin@t-argos.ru>
+
+[ Upstream commit eb592008f79be52ccef88cd9a5249b3fc0367278 ]
+
+build_skb() returns NULL in case of a memory allocation failure so handle
+it inside __octep_oq_process_rx() to avoid NULL pointer dereference.
+
+__octep_oq_process_rx() is called during NAPI polling by the driver. If
+skb allocation fails, keep on pulling packets out of the Rx DMA queue: we
+shouldn't break the polling immediately and thus falsely indicate to the
+octep_napi_poll() that the Rx pressure is going down. As there is no
+associated skb in this case, don't process the packets and don't push them
+up the network stack - they are skipped.
+
+Helper function is implemented to unmmap/flush all the fragment buffers
+used by the dropped packet. 'alloc_failures' counter is incremented to
+mark the skb allocation error in driver statistics.
+
+Found by Linux Verification Center (linuxtesting.org) with SVACE.
+
+Fixes: 37d79d059606 ("octeon_ep: add Tx/Rx processing and interrupt support")
+Suggested-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Aleksandr Mishin <amishin@t-argos.ru>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/marvell/octeon_ep/octep_rx.c | 27 +++++++++++++++++++
+ 1 file changed, 27 insertions(+)
+
+diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
+index 03d46c28a009c..4f3c1187a6e82 100644
+--- a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
++++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
+@@ -360,6 +360,27 @@ static void octep_oq_next_pkt(struct octep_oq *oq,
+               *read_idx = 0;
+ }
++/**
++ * octep_oq_drop_rx() - Free the resources associated with a packet.
++ *
++ * @oq: Octeon Rx queue data structure.
++ * @buff_info: Current packet buffer info.
++ * @read_idx: Current packet index in the ring.
++ * @desc_used: Current packet descriptor number.
++ *
++ */
++static void octep_oq_drop_rx(struct octep_oq *oq,
++                           struct octep_rx_buffer *buff_info,
++                           u32 *read_idx, u32 *desc_used)
++{
++      int data_len = buff_info->len - oq->max_single_buffer_size;
++
++      while (data_len > 0) {
++              octep_oq_next_pkt(oq, buff_info, read_idx, desc_used);
++              data_len -= oq->buffer_size;
++      };
++}
++
+ /**
+  * __octep_oq_process_rx() - Process hardware Rx queue and push to stack.
+  *
+@@ -415,6 +436,12 @@ static int __octep_oq_process_rx(struct octep_device *oct,
+               octep_oq_next_pkt(oq, buff_info, &read_idx, &desc_used);
+               skb = build_skb((void *)resp_hw, PAGE_SIZE);
++              if (!skb) {
++                      octep_oq_drop_rx(oq, buff_info,
++                                       &read_idx, &desc_used);
++                      oq->stats.alloc_failures++;
++                      continue;
++              }
+               skb_reserve(skb, data_offset);
+               rx_bytes += buff_info->len;
+-- 
+2.43.0
+
diff --git a/queue-6.1/octeon_ep-implement-helper-for-iterating-packets-in-.patch b/queue-6.1/octeon_ep-implement-helper-for-iterating-packets-in-.patch
new file mode 100644 (file)
index 0000000..5b4faba
--- /dev/null
@@ -0,0 +1,132 @@
+From c82adea79e872034aedfb8b0950e7999ca542841 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Oct 2024 13:06:50 +0300
+Subject: octeon_ep: Implement helper for iterating packets in Rx queue
+
+From: Aleksandr Mishin <amishin@t-argos.ru>
+
+[ Upstream commit bd28df26197b2bd0913bf1b36770836481975143 ]
+
+The common code with some packet and index manipulations is extracted and
+moved to newly implemented helper to make the code more readable and avoid
+duplication. This is a preparation for skb allocation failure handling.
+
+Found by Linux Verification Center (linuxtesting.org) with SVACE.
+
+Suggested-by: Simon Horman <horms@kernel.org>
+Suggested-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Aleksandr Mishin <amishin@t-argos.ru>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: Andrew Lunn <andrew@lunn.ch>
+Stable-dep-of: eb592008f79b ("octeon_ep: Add SKB allocation failures handling in __octep_oq_process_rx()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/marvell/octeon_ep/octep_rx.c | 55 +++++++++++--------
+ 1 file changed, 32 insertions(+), 23 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
+index 392d9b0da0d7a..03d46c28a009c 100644
+--- a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
++++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c
+@@ -336,6 +336,30 @@ static int octep_oq_check_hw_for_pkts(struct octep_device *oct,
+       return new_pkts;
+ }
++/**
++ * octep_oq_next_pkt() - Move to the next packet in Rx queue.
++ *
++ * @oq: Octeon Rx queue data structure.
++ * @buff_info: Current packet buffer info.
++ * @read_idx: Current packet index in the ring.
++ * @desc_used: Current packet descriptor number.
++ *
++ * Free the resources associated with a packet.
++ * Increment packet index in the ring and packet descriptor number.
++ */
++static void octep_oq_next_pkt(struct octep_oq *oq,
++                            struct octep_rx_buffer *buff_info,
++                            u32 *read_idx, u32 *desc_used)
++{
++      dma_unmap_page(oq->dev, oq->desc_ring[*read_idx].buffer_ptr,
++                     PAGE_SIZE, DMA_FROM_DEVICE);
++      buff_info->page = NULL;
++      (*read_idx)++;
++      (*desc_used)++;
++      if (*read_idx == oq->max_count)
++              *read_idx = 0;
++}
++
+ /**
+  * __octep_oq_process_rx() - Process hardware Rx queue and push to stack.
+  *
+@@ -365,10 +389,7 @@ static int __octep_oq_process_rx(struct octep_device *oct,
+       desc_used = 0;
+       for (pkt = 0; pkt < pkts_to_process; pkt++) {
+               buff_info = (struct octep_rx_buffer *)&oq->buff_info[read_idx];
+-              dma_unmap_page(oq->dev, oq->desc_ring[read_idx].buffer_ptr,
+-                             PAGE_SIZE, DMA_FROM_DEVICE);
+               resp_hw = page_address(buff_info->page);
+-              buff_info->page = NULL;
+               /* Swap the length field that is in Big-Endian to CPU */
+               buff_info->len = be64_to_cpu(resp_hw->length);
+@@ -390,36 +411,27 @@ static int __octep_oq_process_rx(struct octep_device *oct,
+                        */
+                       data_offset = OCTEP_OQ_RESP_HW_SIZE;
+               }
++
++              octep_oq_next_pkt(oq, buff_info, &read_idx, &desc_used);
++
++              skb = build_skb((void *)resp_hw, PAGE_SIZE);
++              skb_reserve(skb, data_offset);
++
+               rx_bytes += buff_info->len;
+               if (buff_info->len <= oq->max_single_buffer_size) {
+-                      skb = build_skb((void *)resp_hw, PAGE_SIZE);
+-                      skb_reserve(skb, data_offset);
+                       skb_put(skb, buff_info->len);
+-                      read_idx++;
+-                      desc_used++;
+-                      if (read_idx == oq->max_count)
+-                              read_idx = 0;
+               } else {
+                       struct skb_shared_info *shinfo;
+                       u16 data_len;
+-                      skb = build_skb((void *)resp_hw, PAGE_SIZE);
+-                      skb_reserve(skb, data_offset);
+                       /* Head fragment includes response header(s);
+                        * subsequent fragments contains only data.
+                        */
+                       skb_put(skb, oq->max_single_buffer_size);
+-                      read_idx++;
+-                      desc_used++;
+-                      if (read_idx == oq->max_count)
+-                              read_idx = 0;
+-
+                       shinfo = skb_shinfo(skb);
+                       data_len = buff_info->len - oq->max_single_buffer_size;
+                       while (data_len) {
+-                              dma_unmap_page(oq->dev, oq->desc_ring[read_idx].buffer_ptr,
+-                                             PAGE_SIZE, DMA_FROM_DEVICE);
+                               buff_info = (struct octep_rx_buffer *)
+                                           &oq->buff_info[read_idx];
+                               if (data_len < oq->buffer_size) {
+@@ -434,11 +446,8 @@ static int __octep_oq_process_rx(struct octep_device *oct,
+                                               buff_info->page, 0,
+                                               buff_info->len,
+                                               buff_info->len);
+-                              buff_info->page = NULL;
+-                              read_idx++;
+-                              desc_used++;
+-                              if (read_idx == oq->max_count)
+-                                      read_idx = 0;
++
++                              octep_oq_next_pkt(oq, buff_info, &read_idx, &desc_used);
+                       }
+               }
+-- 
+2.43.0
+
diff --git a/queue-6.1/posix-clock-posix-clock-fix-unbalanced-locking-in-pc.patch b/queue-6.1/posix-clock-posix-clock-fix-unbalanced-locking-in-pc.patch
new file mode 100644 (file)
index 0000000..88a17b0
--- /dev/null
@@ -0,0 +1,58 @@
+From 1dc5aaec946de46cb09d0e42028ebd846acbdcdd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Oct 2024 18:07:48 +0800
+Subject: posix-clock: posix-clock: Fix unbalanced locking in
+ pc_clock_settime()
+
+From: Jinjie Ruan <ruanjinjie@huawei.com>
+
+[ Upstream commit 6e62807c7fbb3c758d233018caf94dfea9c65dbd ]
+
+If get_clock_desc() succeeds, it calls fget() for the clockid's fd,
+and get the clk->rwsem read lock, so the error path should release
+the lock to make the lock balance and fput the clockid's fd to make
+the refcount balance and release the fd related resource.
+
+However the below commit left the error path locked behind resulting in
+unbalanced locking. Check timespec64_valid_strict() before
+get_clock_desc() to fix it, because the "ts" is not changed
+after that.
+
+Fixes: d8794ac20a29 ("posix-clock: Fix missing timespec64 check in pc_clock_settime()")
+Acked-by: Richard Cochran <richardcochran@gmail.com>
+Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
+Acked-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
+[pabeni@redhat.com: fixed commit message typo]
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/time/posix-clock.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
+index 8127673bfc45e..05e73d209aa87 100644
+--- a/kernel/time/posix-clock.c
++++ b/kernel/time/posix-clock.c
+@@ -290,6 +290,9 @@ static int pc_clock_settime(clockid_t id, const struct timespec64 *ts)
+       struct posix_clock_desc cd;
+       int err;
++      if (!timespec64_valid_strict(ts))
++              return -EINVAL;
++
+       err = get_clock_desc(id, &cd);
+       if (err)
+               return err;
+@@ -299,9 +302,6 @@ static int pc_clock_settime(clockid_t id, const struct timespec64 *ts)
+               goto out;
+       }
+-      if (!timespec64_valid_strict(ts))
+-              return -EINVAL;
+-
+       if (cd.clk->ops.clock_settime)
+               err = cd.clk->ops.clock_settime(cd.clk, ts);
+       else
+-- 
+2.43.0
+
diff --git a/queue-6.1/r8169-avoid-unsolicited-interrupts.patch b/queue-6.1/r8169-avoid-unsolicited-interrupts.patch
new file mode 100644 (file)
index 0000000..e50fa03
--- /dev/null
@@ -0,0 +1,49 @@
+From 2f3b4296152a7e33fa9d504d485634352d5ea64e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Oct 2024 11:08:16 +0200
+Subject: r8169: avoid unsolicited interrupts
+
+From: Heiner Kallweit <hkallweit1@gmail.com>
+
+[ Upstream commit 10ce0db787004875f4dba068ea952207d1d8abeb ]
+
+It was reported that after resume from suspend a PCI error is logged
+and connectivity is broken. Error message is:
+PCI error (cmd = 0x0407, status_errs = 0x0000)
+The message seems to be a red herring as none of the error bits is set,
+and the PCI command register value also is normal. Exception handling
+for a PCI error includes a chip reset what apparently brakes connectivity
+here. The interrupt status bit triggering the PCI error handling isn't
+actually used on PCIe chip versions, so it's not clear why this bit is
+set by the chip. Fix this by ignoring this bit on PCIe chip versions.
+
+Fixes: 0e4851502f84 ("r8169: merge with version 8.001.00 of Realtek's r8168 driver")
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219388
+Tested-by: Atlas Yu <atlas.yu@canonical.com>
+Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/78e2f535-438f-4212-ad94-a77637ac6c9c@gmail.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/realtek/r8169_main.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
+index 8b35e14fba3a8..a74e33bf0302e 100644
+--- a/drivers/net/ethernet/realtek/r8169_main.c
++++ b/drivers/net/ethernet/realtek/r8169_main.c
+@@ -4617,7 +4617,9 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
+       if ((status & 0xffff) == 0xffff || !(status & tp->irq_mask))
+               return IRQ_NONE;
+-      if (unlikely(status & SYSErr)) {
++      /* At least RTL8168fp may unexpectedly set the SYSErr bit */
++      if (unlikely(status & SYSErr &&
++          tp->mac_version <= RTL_GIGA_MAC_VER_06)) {
+               rtl8169_pcierr_interrupt(tp->dev);
+               goto out;
+       }
+-- 
+2.43.0
+
index 6c43245276318e04db30a2ceced8dc864bd25d21..038c15ba915c45f34fce5bc8fc433a01dd6a7b75 100644 (file)
@@ -83,3 +83,25 @@ loongarch-add-support-to-clone-a-time-namespace.patch
 loongarch-don-t-crash-in-stack_top-for-tasks-without.patch
 jfs-fix-sanity-check-in-dbmount.patch
 tracing-consider-the-null-character-when-validating-.patch
+xfrm-extract-dst-lookup-parameters-into-a-struct.patch
+xfrm-respect-ip-protocols-rules-criteria-when-perfor.patch
+net-sun3_82586-fix-potential-memory-leak-in-sun3_825.patch
+be2net-fix-potential-memory-leak-in-be_xmit.patch
+net-plip-fix-break-causing-plip-to-never-transmit.patch
+octeon_ep-implement-helper-for-iterating-packets-in-.patch
+octeon_ep-add-skb-allocation-failures-handling-in-__.patch
+net-dsa-mv88e6xxx-fix-error-when-setting-port-policy.patch
+netfilter-xtables-fix-typo-causing-some-targets-not-.patch
+net-wwan-fix-global-oob-in-wwan_rtnl_policy.patch
+docs-net-reformat-driver.rst-from-a-list-to-sections.patch
+net-provide-macros-for-commonly-copied-lockless-queu.patch
+net-sched-adjust-device-watchdog-timer-to-detect-sto.patch
+net-fix-races-in-netdev_tx_sent_queue-dev_watchdog.patch
+net-usb-usbnet-fix-name-regression.patch
+net-sched-act_api-deny-mismatched-skip_sw-skip_hw-fl.patch
+net-sched-fix-use-after-free-in-taprio_change.patch
+r8169-avoid-unsolicited-interrupts.patch
+posix-clock-posix-clock-fix-unbalanced-locking-in-pc.patch
+bluetooth-sco-fix-uaf-on-sco_sock_timeout.patch
+bluetooth-iso-fix-uaf-on-iso_sock_timeout.patch
+bpf-perf-fix-perf_event_detach_bpf_prog-error-handli.patch
diff --git a/queue-6.1/xfrm-extract-dst-lookup-parameters-into-a-struct.patch b/queue-6.1/xfrm-extract-dst-lookup-parameters-into-a-struct.patch
new file mode 100644 (file)
index 0000000..a816db8
--- /dev/null
@@ -0,0 +1,325 @@
+From 81c7b3a968324203d18fe538b4d936ac29352ab9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Sep 2024 17:07:09 -0700
+Subject: xfrm: extract dst lookup parameters into a struct
+
+From: Eyal Birger <eyal.birger@gmail.com>
+
+[ Upstream commit e509996b16728e37d5a909a5c63c1bd64f23b306 ]
+
+Preparation for adding more fields to dst lookup functions without
+changing their signatures.
+
+Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Stable-dep-of: b84697210343 ("xfrm: respect ip protocols rules criteria when performing dst lookups")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/xfrm.h      | 26 +++++++++++++-------------
+ net/ipv4/xfrm4_policy.c | 38 ++++++++++++++++----------------------
+ net/ipv6/xfrm6_policy.c | 28 +++++++++++++---------------
+ net/xfrm/xfrm_device.c  | 11 ++++++++---
+ net/xfrm/xfrm_policy.c  | 35 +++++++++++++++++++++++------------
+ 5 files changed, 73 insertions(+), 65 deletions(-)
+
+diff --git a/include/net/xfrm.h b/include/net/xfrm.h
+index 5b9c2c535702c..55ea15ccd5327 100644
+--- a/include/net/xfrm.h
++++ b/include/net/xfrm.h
+@@ -326,20 +326,23 @@ struct xfrm_if_cb {
+ void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb);
+ void xfrm_if_unregister_cb(void);
++struct xfrm_dst_lookup_params {
++      struct net *net;
++      int tos;
++      int oif;
++      xfrm_address_t *saddr;
++      xfrm_address_t *daddr;
++      u32 mark;
++};
++
+ struct net_device;
+ struct xfrm_type;
+ struct xfrm_dst;
+ struct xfrm_policy_afinfo {
+       struct dst_ops          *dst_ops;
+-      struct dst_entry        *(*dst_lookup)(struct net *net,
+-                                             int tos, int oif,
+-                                             const xfrm_address_t *saddr,
+-                                             const xfrm_address_t *daddr,
+-                                             u32 mark);
+-      int                     (*get_saddr)(struct net *net, int oif,
+-                                           xfrm_address_t *saddr,
+-                                           xfrm_address_t *daddr,
+-                                           u32 mark);
++      struct dst_entry        *(*dst_lookup)(const struct xfrm_dst_lookup_params *params);
++      int                     (*get_saddr)(xfrm_address_t *saddr,
++                                           const struct xfrm_dst_lookup_params *params);
+       int                     (*fill_dst)(struct xfrm_dst *xdst,
+                                           struct net_device *dev,
+                                           const struct flowi *fl);
+@@ -1659,10 +1662,7 @@ static inline int xfrm_user_policy(struct sock *sk, int optname,
+ }
+ #endif
+-struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
+-                                  const xfrm_address_t *saddr,
+-                                  const xfrm_address_t *daddr,
+-                                  int family, u32 mark);
++struct dst_entry *__xfrm_dst_lookup(int family, const struct xfrm_dst_lookup_params *params);
+ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp);
+diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
+index 3d0dfa6cf9f96..9ac9ed9738068 100644
+--- a/net/ipv4/xfrm4_policy.c
++++ b/net/ipv4/xfrm4_policy.c
+@@ -17,47 +17,41 @@
+ #include <net/ip.h>
+ #include <net/l3mdev.h>
+-static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
+-                                          int tos, int oif,
+-                                          const xfrm_address_t *saddr,
+-                                          const xfrm_address_t *daddr,
+-                                          u32 mark)
++static struct dst_entry *__xfrm4_dst_lookup(struct flowi4 *fl4,
++                                          const struct xfrm_dst_lookup_params *params)
+ {
+       struct rtable *rt;
+       memset(fl4, 0, sizeof(*fl4));
+-      fl4->daddr = daddr->a4;
+-      fl4->flowi4_tos = tos;
+-      fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif);
+-      fl4->flowi4_mark = mark;
+-      if (saddr)
+-              fl4->saddr = saddr->a4;
+-
+-      rt = __ip_route_output_key(net, fl4);
++      fl4->daddr = params->daddr->a4;
++      fl4->flowi4_tos = params->tos;
++      fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(params->net,
++                                                          params->oif);
++      fl4->flowi4_mark = params->mark;
++      if (params->saddr)
++              fl4->saddr = params->saddr->a4;
++
++      rt = __ip_route_output_key(params->net, fl4);
+       if (!IS_ERR(rt))
+               return &rt->dst;
+       return ERR_CAST(rt);
+ }
+-static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif,
+-                                        const xfrm_address_t *saddr,
+-                                        const xfrm_address_t *daddr,
+-                                        u32 mark)
++static struct dst_entry *xfrm4_dst_lookup(const struct xfrm_dst_lookup_params *params)
+ {
+       struct flowi4 fl4;
+-      return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr, mark);
++      return __xfrm4_dst_lookup(&fl4, params);
+ }
+-static int xfrm4_get_saddr(struct net *net, int oif,
+-                         xfrm_address_t *saddr, xfrm_address_t *daddr,
+-                         u32 mark)
++static int xfrm4_get_saddr(xfrm_address_t *saddr,
++                         const struct xfrm_dst_lookup_params *params)
+ {
+       struct dst_entry *dst;
+       struct flowi4 fl4;
+-      dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr, mark);
++      dst = __xfrm4_dst_lookup(&fl4, params);
+       if (IS_ERR(dst))
+               return -EHOSTUNREACH;
+diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
+index b7b5dbf5d037b..6e3e0f1bd81c9 100644
+--- a/net/ipv6/xfrm6_policy.c
++++ b/net/ipv6/xfrm6_policy.c
+@@ -23,23 +23,21 @@
+ #include <net/ip6_route.h>
+ #include <net/l3mdev.h>
+-static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
+-                                        const xfrm_address_t *saddr,
+-                                        const xfrm_address_t *daddr,
+-                                        u32 mark)
++static struct dst_entry *xfrm6_dst_lookup(const struct xfrm_dst_lookup_params *params)
+ {
+       struct flowi6 fl6;
+       struct dst_entry *dst;
+       int err;
+       memset(&fl6, 0, sizeof(fl6));
+-      fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(net, oif);
+-      fl6.flowi6_mark = mark;
+-      memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
+-      if (saddr)
+-              memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr));
++      fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(params->net,
++                                                         params->oif);
++      fl6.flowi6_mark = params->mark;
++      memcpy(&fl6.daddr, params->daddr, sizeof(fl6.daddr));
++      if (params->saddr)
++              memcpy(&fl6.saddr, params->saddr, sizeof(fl6.saddr));
+-      dst = ip6_route_output(net, NULL, &fl6);
++      dst = ip6_route_output(params->net, NULL, &fl6);
+       err = dst->error;
+       if (dst->error) {
+@@ -50,15 +48,14 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
+       return dst;
+ }
+-static int xfrm6_get_saddr(struct net *net, int oif,
+-                         xfrm_address_t *saddr, xfrm_address_t *daddr,
+-                         u32 mark)
++static int xfrm6_get_saddr(xfrm_address_t *saddr,
++                         const struct xfrm_dst_lookup_params *params)
+ {
+       struct dst_entry *dst;
+       struct net_device *dev;
+       struct inet6_dev *idev;
+-      dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr, mark);
++      dst = xfrm6_dst_lookup(params);
+       if (IS_ERR(dst))
+               return -EHOSTUNREACH;
+@@ -68,7 +65,8 @@ static int xfrm6_get_saddr(struct net *net, int oif,
+               return -EHOSTUNREACH;
+       }
+       dev = idev->dev;
+-      ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6);
++      ipv6_dev_get_saddr(dev_net(dev), dev, &params->daddr->in6, 0,
++                         &saddr->in6);
+       dst_release(dst);
+       return 0;
+ }
+diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
+index 21269e8f2db4b..2535ee034a5c8 100644
+--- a/net/xfrm/xfrm_device.c
++++ b/net/xfrm/xfrm_device.c
+@@ -248,6 +248,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
+       dev = dev_get_by_index(net, xuo->ifindex);
+       if (!dev) {
++              struct xfrm_dst_lookup_params params;
++
+               if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) {
+                       saddr = &x->props.saddr;
+                       daddr = &x->id.daddr;
+@@ -256,9 +258,12 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
+                       daddr = &x->props.saddr;
+               }
+-              dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr,
+-                                      x->props.family,
+-                                      xfrm_smark_get(0, x));
++              memset(&params, 0, sizeof(params));
++              params.net = net;
++              params.saddr = saddr;
++              params.daddr = daddr;
++              params.mark = xfrm_smark_get(0, x);
++              dst = __xfrm_dst_lookup(x->props.family, &params);
+               if (IS_ERR(dst))
+                       return 0;
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index 5fddde2d5bc48..adb12f428be30 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -251,10 +251,8 @@ static const struct xfrm_if_cb *xfrm_if_get_cb(void)
+       return rcu_dereference(xfrm_if_cb);
+ }
+-struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
+-                                  const xfrm_address_t *saddr,
+-                                  const xfrm_address_t *daddr,
+-                                  int family, u32 mark)
++struct dst_entry *__xfrm_dst_lookup(int family,
++                                  const struct xfrm_dst_lookup_params *params)
+ {
+       const struct xfrm_policy_afinfo *afinfo;
+       struct dst_entry *dst;
+@@ -263,7 +261,7 @@ struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
+       if (unlikely(afinfo == NULL))
+               return ERR_PTR(-EAFNOSUPPORT);
+-      dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr, mark);
++      dst = afinfo->dst_lookup(params);
+       rcu_read_unlock();
+@@ -277,6 +275,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
+                                               xfrm_address_t *prev_daddr,
+                                               int family, u32 mark)
+ {
++      struct xfrm_dst_lookup_params params;
+       struct net *net = xs_net(x);
+       xfrm_address_t *saddr = &x->props.saddr;
+       xfrm_address_t *daddr = &x->id.daddr;
+@@ -291,7 +290,14 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
+               daddr = x->coaddr;
+       }
+-      dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family, mark);
++      params.net = net;
++      params.saddr = saddr;
++      params.daddr = daddr;
++      params.tos = tos;
++      params.oif = oif;
++      params.mark = mark;
++
++      dst = __xfrm_dst_lookup(family, &params);
+       if (!IS_ERR(dst)) {
+               if (prev_saddr != saddr)
+@@ -2346,15 +2352,15 @@ int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
+ }
+ static int
+-xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local,
+-             xfrm_address_t *remote, unsigned short family, u32 mark)
++xfrm_get_saddr(unsigned short family, xfrm_address_t *saddr,
++             const struct xfrm_dst_lookup_params *params)
+ {
+       int err;
+       const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+       if (unlikely(afinfo == NULL))
+               return -EINVAL;
+-      err = afinfo->get_saddr(net, oif, local, remote, mark);
++      err = afinfo->get_saddr(saddr, params);
+       rcu_read_unlock();
+       return err;
+ }
+@@ -2383,9 +2389,14 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
+                       remote = &tmpl->id.daddr;
+                       local = &tmpl->saddr;
+                       if (xfrm_addr_any(local, tmpl->encap_family)) {
+-                              error = xfrm_get_saddr(net, fl->flowi_oif,
+-                                                     &tmp, remote,
+-                                                     tmpl->encap_family, 0);
++                              struct xfrm_dst_lookup_params params;
++
++                              memset(&params, 0, sizeof(params));
++                              params.net = net;
++                              params.oif = fl->flowi_oif;
++                              params.daddr = remote;
++                              error = xfrm_get_saddr(tmpl->encap_family, &tmp,
++                                                     &params);
+                               if (error)
+                                       goto fail;
+                               local = &tmp;
+-- 
+2.43.0
+
diff --git a/queue-6.1/xfrm-respect-ip-protocols-rules-criteria-when-perfor.patch b/queue-6.1/xfrm-respect-ip-protocols-rules-criteria-when-perfor.patch
new file mode 100644 (file)
index 0000000..4f36e36
--- /dev/null
@@ -0,0 +1,99 @@
+From 7dcff75260b6327c2f4c7f2b4b92b89a0e956dde Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Sep 2024 17:07:10 -0700
+Subject: xfrm: respect ip protocols rules criteria when performing dst lookups
+
+From: Eyal Birger <eyal.birger@gmail.com>
+
+[ Upstream commit b8469721034300bbb6dec5b4bf32492c95e16a0c ]
+
+The series in the "fixes" tag added the ability to consider L4 attributes
+in routing rules.
+
+The dst lookup on the outer packet of encapsulated traffic in the xfrm
+code was not adapted to this change, thus routing behavior that relies
+on L4 information is not respected.
+
+Pass the ip protocol information when performing dst lookups.
+
+Fixes: a25724b05af0 ("Merge branch 'fib_rules-support-sport-dport-and-proto-match'")
+Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
+Tested-by: Antony Antony <antony.antony@secunet.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/xfrm.h      |  2 ++
+ net/ipv4/xfrm4_policy.c |  2 ++
+ net/ipv6/xfrm6_policy.c |  3 +++
+ net/xfrm/xfrm_policy.c  | 15 +++++++++++++++
+ 4 files changed, 22 insertions(+)
+
+diff --git a/include/net/xfrm.h b/include/net/xfrm.h
+index 55ea15ccd5327..bf670929622dc 100644
+--- a/include/net/xfrm.h
++++ b/include/net/xfrm.h
+@@ -333,6 +333,8 @@ struct xfrm_dst_lookup_params {
+       xfrm_address_t *saddr;
+       xfrm_address_t *daddr;
+       u32 mark;
++      __u8 ipproto;
++      union flowi_uli uli;
+ };
+ struct net_device;
+diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
+index 9ac9ed9738068..76f3f38b110f2 100644
+--- a/net/ipv4/xfrm4_policy.c
++++ b/net/ipv4/xfrm4_policy.c
+@@ -30,6 +30,8 @@ static struct dst_entry *__xfrm4_dst_lookup(struct flowi4 *fl4,
+       fl4->flowi4_mark = params->mark;
+       if (params->saddr)
+               fl4->saddr = params->saddr->a4;
++      fl4->flowi4_proto = params->ipproto;
++      fl4->uli = params->uli;
+       rt = __ip_route_output_key(params->net, fl4);
+       if (!IS_ERR(rt))
+diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
+index 6e3e0f1bd81c9..8c1092ff3fe2e 100644
+--- a/net/ipv6/xfrm6_policy.c
++++ b/net/ipv6/xfrm6_policy.c
+@@ -37,6 +37,9 @@ static struct dst_entry *xfrm6_dst_lookup(const struct xfrm_dst_lookup_params *p
+       if (params->saddr)
+               memcpy(&fl6.saddr, params->saddr, sizeof(fl6.saddr));
++      fl6.flowi4_proto = params->ipproto;
++      fl6.uli = params->uli;
++
+       dst = ip6_route_output(params->net, NULL, &fl6);
+       err = dst->error;
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index adb12f428be30..a022f49846879 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -296,6 +296,21 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
+       params.tos = tos;
+       params.oif = oif;
+       params.mark = mark;
++      params.ipproto = x->id.proto;
++      if (x->encap) {
++              switch (x->encap->encap_type) {
++              case UDP_ENCAP_ESPINUDP:
++                      params.ipproto = IPPROTO_UDP;
++                      params.uli.ports.sport = x->encap->encap_sport;
++                      params.uli.ports.dport = x->encap->encap_dport;
++                      break;
++              case TCP_ENCAP_ESPINTCP:
++                      params.ipproto = IPPROTO_TCP;
++                      params.uli.ports.sport = x->encap->encap_sport;
++                      params.uli.ports.dport = x->encap->encap_dport;
++                      break;
++              }
++      }
+       dst = __xfrm_dst_lookup(family, &params);
+-- 
+2.43.0
+