From: Sasha Levin Date: Sat, 26 Oct 2024 07:36:45 +0000 (-0400) Subject: Fixes for 6.1 X-Git-Tag: v5.15.170~43 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=f7a3ff669cddfdba7575d216c4494380c605b081;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.1 Signed-off-by: Sasha Levin --- diff --git a/queue-6.1/be2net-fix-potential-memory-leak-in-be_xmit.patch b/queue-6.1/be2net-fix-potential-memory-leak-in-be_xmit.patch new file mode 100644 index 00000000000..0effdae1de1 --- /dev/null +++ b/queue-6.1/be2net-fix-potential-memory-leak-in-be_xmit.patch @@ -0,0 +1,61 @@ +From db5162c632371287150b3c8ca8fe16848b1e01e0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Oct 2024 22:48:02 +0800 +Subject: be2net: fix potential memory leak in be_xmit() + +From: Wang Hai + +[ Upstream commit e4dd8bfe0f6a23acd305f9b892c00899089bd621 ] + +The be_xmit() returns NETDEV_TX_OK without freeing skb +in case of be_xmit_enqueue() fails, add dev_kfree_skb_any() to fix it. + +Fixes: 760c295e0e8d ("be2net: Support for OS2BMC.") +Signed-off-by: Wang Hai +Reviewed-by: Simon Horman +Reviewed-by: Kalesh AP +Message-ID: <20241015144802.12150-1-wanghai38@huawei.com> +Signed-off-by: Andrew Lunn +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/emulex/benet/be_main.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c +index a9e4e6464a04c..b0a85c9b952b9 100644 +--- a/drivers/net/ethernet/emulex/benet/be_main.c ++++ b/drivers/net/ethernet/emulex/benet/be_main.c +@@ -1382,10 +1382,8 @@ static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev) + be_get_wrb_params_from_skb(adapter, skb, &wrb_params); + + wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params); +- if (unlikely(!wrb_cnt)) { +- dev_kfree_skb_any(skb); +- goto drop; +- } ++ if (unlikely(!wrb_cnt)) ++ goto drop_skb; + + /* if os2bmc is enabled and if the pkt is destined to bmc, + * enqueue the pkt a 2nd time with mgmt bit set. +@@ -1394,7 +1392,7 @@ static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev) + BE_WRB_F_SET(wrb_params.features, OS2BMC, 1); + wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params); + if (unlikely(!wrb_cnt)) +- goto drop; ++ goto drop_skb; + else + skb_get(skb); + } +@@ -1408,6 +1406,8 @@ static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev) + be_xmit_flush(adapter, txo); + + return NETDEV_TX_OK; ++drop_skb: ++ dev_kfree_skb_any(skb); + drop: + tx_stats(txo)->tx_drv_drops++; + /* Flush the already enqueued tx requests */ +-- +2.43.0 + diff --git a/queue-6.1/bluetooth-iso-fix-uaf-on-iso_sock_timeout.patch b/queue-6.1/bluetooth-iso-fix-uaf-on-iso_sock_timeout.patch new file mode 100644 index 00000000000..fb18f793013 --- /dev/null +++ b/queue-6.1/bluetooth-iso-fix-uaf-on-iso_sock_timeout.patch @@ -0,0 +1,66 @@ +From d88db45c2d7ab392e3d55c08c69ce75a432aa58f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Oct 2024 15:35:49 -0400 +Subject: Bluetooth: ISO: Fix UAF on iso_sock_timeout + +From: Luiz Augusto von Dentz + +[ Upstream commit 246b435ad668596aa0e2bbb9d491b6413861211a ] + +conn->sk maybe have been unlinked/freed while waiting for iso_conn_lock +so this checks if the conn->sk is still valid by checking if it part of +iso_sk_list. + +Fixes: ccf74f2390d6 ("Bluetooth: Add BTPROTO_ISO socket type") +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + net/bluetooth/iso.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c +index b61abddc7bd4e..27efca5dc7bbf 100644 +--- a/net/bluetooth/iso.c ++++ b/net/bluetooth/iso.c +@@ -68,6 +68,16 @@ struct iso_pinfo { + #define ISO_CONN_TIMEOUT (HZ * 40) + #define ISO_DISCONN_TIMEOUT (HZ * 2) + ++static struct sock *iso_sock_hold(struct iso_conn *conn) ++{ ++ if (!conn || !bt_sock_linked(&iso_sk_list, conn->sk)) ++ return NULL; ++ ++ sock_hold(conn->sk); ++ ++ return conn->sk; ++} ++ + static void iso_sock_timeout(struct work_struct *work) + { + struct iso_conn *conn = container_of(work, struct iso_conn, +@@ -75,9 +85,7 @@ static void iso_sock_timeout(struct work_struct *work) + struct sock *sk; + + iso_conn_lock(conn); +- sk = conn->sk; +- if (sk) +- sock_hold(sk); ++ sk = iso_sock_hold(conn); + iso_conn_unlock(conn); + + if (!sk) +@@ -184,9 +192,7 @@ static void iso_conn_del(struct hci_conn *hcon, int err) + + /* Kill socket */ + iso_conn_lock(conn); +- sk = conn->sk; +- if (sk) +- sock_hold(sk); ++ sk = iso_sock_hold(conn); + iso_conn_unlock(conn); + + if (sk) { +-- +2.43.0 + diff --git a/queue-6.1/bluetooth-sco-fix-uaf-on-sco_sock_timeout.patch b/queue-6.1/bluetooth-sco-fix-uaf-on-sco_sock_timeout.patch new file mode 100644 index 00000000000..bc72e1af240 --- /dev/null +++ b/queue-6.1/bluetooth-sco-fix-uaf-on-sco_sock_timeout.patch @@ -0,0 +1,116 @@ +From 5b04cab418ca4c5f8d8951556b795f6a4fbd70b1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Oct 2024 12:31:08 -0400 +Subject: Bluetooth: SCO: Fix UAF on sco_sock_timeout + +From: Luiz Augusto von Dentz + +[ Upstream commit 1bf4470a3939c678fb822073e9ea77a0560bc6bb ] + +conn->sk maybe have been unlinked/freed while waiting for sco_conn_lock +so this checks if the conn->sk is still valid by checking if it part of +sco_sk_list. + +Reported-by: syzbot+4c0d0c4cde787116d465@syzkaller.appspotmail.com +Tested-by: syzbot+4c0d0c4cde787116d465@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=4c0d0c4cde787116d465 +Fixes: ba316be1b6a0 ("Bluetooth: schedule SCO timeouts with delayed_work") +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + include/net/bluetooth/bluetooth.h | 1 + + net/bluetooth/af_bluetooth.c | 22 ++++++++++++++++++++++ + net/bluetooth/sco.c | 18 ++++++++++++------ + 3 files changed, 35 insertions(+), 6 deletions(-) + +diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h +index c7f1dd34ea470..41fc7f12971a5 100644 +--- a/include/net/bluetooth/bluetooth.h ++++ b/include/net/bluetooth/bluetooth.h +@@ -383,6 +383,7 @@ int bt_sock_register(int proto, const struct net_proto_family *ops); + void bt_sock_unregister(int proto); + void bt_sock_link(struct bt_sock_list *l, struct sock *s); + void bt_sock_unlink(struct bt_sock_list *l, struct sock *s); ++bool bt_sock_linked(struct bt_sock_list *l, struct sock *s); + struct sock *bt_sock_alloc(struct net *net, struct socket *sock, + struct proto *prot, int proto, gfp_t prio, int kern); + int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, +diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c +index 0c70172555b4f..4c3bd05160387 100644 +--- a/net/bluetooth/af_bluetooth.c ++++ b/net/bluetooth/af_bluetooth.c +@@ -177,6 +177,28 @@ void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk) + } + EXPORT_SYMBOL(bt_sock_unlink); + ++bool bt_sock_linked(struct bt_sock_list *l, struct sock *s) ++{ ++ struct sock *sk; ++ ++ if (!l || !s) ++ return false; ++ ++ read_lock(&l->lock); ++ ++ sk_for_each(sk, &l->head) { ++ if (s == sk) { ++ read_unlock(&l->lock); ++ return true; ++ } ++ } ++ ++ read_unlock(&l->lock); ++ ++ return false; ++} ++EXPORT_SYMBOL(bt_sock_linked); ++ + void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) + { + BT_DBG("parent %p, sk %p", parent, sk); +diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c +index 92ea01f9def3e..ad5afde17213a 100644 +--- a/net/bluetooth/sco.c ++++ b/net/bluetooth/sco.c +@@ -77,6 +77,16 @@ struct sco_pinfo { + #define SCO_CONN_TIMEOUT (HZ * 40) + #define SCO_DISCONN_TIMEOUT (HZ * 2) + ++static struct sock *sco_sock_hold(struct sco_conn *conn) ++{ ++ if (!conn || !bt_sock_linked(&sco_sk_list, conn->sk)) ++ return NULL; ++ ++ sock_hold(conn->sk); ++ ++ return conn->sk; ++} ++ + static void sco_sock_timeout(struct work_struct *work) + { + struct sco_conn *conn = container_of(work, struct sco_conn, +@@ -88,9 +98,7 @@ static void sco_sock_timeout(struct work_struct *work) + sco_conn_unlock(conn); + return; + } +- sk = conn->sk; +- if (sk) +- sock_hold(sk); ++ sk = sco_sock_hold(conn); + sco_conn_unlock(conn); + + if (!sk) +@@ -195,9 +203,7 @@ static void sco_conn_del(struct hci_conn *hcon, int err) + + /* Kill socket */ + sco_conn_lock(conn); +- sk = conn->sk; +- if (sk) +- sock_hold(sk); ++ sk = sco_sock_hold(conn); + sco_conn_unlock(conn); + + if (sk) { +-- +2.43.0 + diff --git a/queue-6.1/bpf-perf-fix-perf_event_detach_bpf_prog-error-handli.patch b/queue-6.1/bpf-perf-fix-perf_event_detach_bpf_prog-error-handli.patch new file mode 100644 index 00000000000..46b48836843 --- /dev/null +++ b/queue-6.1/bpf-perf-fix-perf_event_detach_bpf_prog-error-handli.patch @@ -0,0 +1,46 @@ +From c3bad56a1d490930e03355a6b37f30d7777e476e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Oct 2024 22:03:52 +0200 +Subject: bpf,perf: Fix perf_event_detach_bpf_prog error handling + +From: Jiri Olsa + +[ Upstream commit 0ee288e69d033850bc87abe0f9cc3ada24763d7f ] + +Peter reported that perf_event_detach_bpf_prog might skip to release +the bpf program for -ENOENT error from bpf_prog_array_copy. + +This can't happen because bpf program is stored in perf event and is +detached and released only when perf event is freed. + +Let's drop the -ENOENT check and make sure the bpf program is released +in any case. + +Fixes: 170a7e3ea070 ("bpf: bpf_prog_array_copy() should return -ENOENT if exclude_prog not found") +Reported-by: Peter Zijlstra +Signed-off-by: Jiri Olsa +Signed-off-by: Andrii Nakryiko +Link: https://lore.kernel.org/bpf/20241023200352.3488610-1-jolsa@kernel.org + +Closes: https://lore.kernel.org/lkml/20241022111638.GC16066@noisy.programming.kicks-ass.net/ +Signed-off-by: Sasha Levin +--- + kernel/trace/bpf_trace.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c +index 3fdde232eaa92..583961a9e539a 100644 +--- a/kernel/trace/bpf_trace.c ++++ b/kernel/trace/bpf_trace.c +@@ -2179,8 +2179,6 @@ void perf_event_detach_bpf_prog(struct perf_event *event) + + old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); + ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array); +- if (ret == -ENOENT) +- goto unlock; + if (ret < 0) { + bpf_prog_array_delete_safe(old_array, event->prog); + } else { +-- +2.43.0 + diff --git a/queue-6.1/docs-net-reformat-driver.rst-from-a-list-to-sections.patch b/queue-6.1/docs-net-reformat-driver.rst-from-a-list-to-sections.patch new file mode 100644 index 00000000000..3686c498b70 --- /dev/null +++ b/queue-6.1/docs-net-reformat-driver.rst-from-a-list-to-sections.patch @@ -0,0 +1,160 @@ +From 906af1927a49314dec380c27db03c391606faea1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Apr 2023 18:25:30 -0700 +Subject: docs: net: reformat driver.rst from a list to sections + +From: Jakub Kicinski + +[ Upstream commit d2f5c68e3f7157e874a759e382a5eaffa775b869 ] + +driver.rst had a historical form of list of common problems. +In the age os Sphinx and rendered documentation it's better +to use the more usual title + text format. + +This will allow us to render kdoc into the output more naturally. + +No changes to the actual text. + +Signed-off-by: Jakub Kicinski +Stable-dep-of: 95ecba62e2fd ("net: fix races in netdev_tx_sent_queue()/dev_watchdog()") +Signed-off-by: Sasha Levin +--- + Documentation/networking/driver.rst | 91 ++++++++++++++++++----------- + 1 file changed, 56 insertions(+), 35 deletions(-) + +diff --git a/Documentation/networking/driver.rst b/Documentation/networking/driver.rst +index 64f7236ff10be..3040a74d421c7 100644 +--- a/Documentation/networking/driver.rst ++++ b/Documentation/networking/driver.rst +@@ -4,15 +4,19 @@ + Softnet Driver Issues + ===================== + +-Transmit path guidelines: ++Transmit path guidelines ++======================== + +-1) The ndo_start_xmit method must not return NETDEV_TX_BUSY under +- any normal circumstances. It is considered a hard error unless +- there is no way your device can tell ahead of time when its +- transmit function will become busy. ++Stop queues in advance ++---------------------- + +- Instead it must maintain the queue properly. For example, +- for a driver implementing scatter-gather this means:: ++The ndo_start_xmit method must not return NETDEV_TX_BUSY under ++any normal circumstances. It is considered a hard error unless ++there is no way your device can tell ahead of time when its ++transmit function will become busy. ++ ++Instead it must maintain the queue properly. For example, ++for a driver implementing scatter-gather this means:: + + static netdev_tx_t drv_hard_start_xmit(struct sk_buff *skb, + struct net_device *dev) +@@ -42,56 +46,73 @@ Transmit path guidelines: + return NETDEV_TX_OK; + } + +- And then at the end of your TX reclamation event handling:: ++And then at the end of your TX reclamation event handling:: + + if (netif_queue_stopped(dp->dev) && + TX_BUFFS_AVAIL(dp) > (MAX_SKB_FRAGS + 1)) + netif_wake_queue(dp->dev); + +- For a non-scatter-gather supporting card, the three tests simply become:: ++For a non-scatter-gather supporting card, the three tests simply become:: + + /* This is a hard error log it. */ + if (TX_BUFFS_AVAIL(dp) <= 0) + +- and:: ++and:: + + if (TX_BUFFS_AVAIL(dp) == 0) + +- and:: ++and:: + + if (netif_queue_stopped(dp->dev) && + TX_BUFFS_AVAIL(dp) > 0) + netif_wake_queue(dp->dev); + +-2) An ndo_start_xmit method must not modify the shared parts of a +- cloned SKB. ++No exclusive ownership ++---------------------- ++ ++An ndo_start_xmit method must not modify the shared parts of a ++cloned SKB. ++ ++Timely completions ++------------------ ++ ++Do not forget that once you return NETDEV_TX_OK from your ++ndo_start_xmit method, it is your driver's responsibility to free ++up the SKB and in some finite amount of time. + +-3) Do not forget that once you return NETDEV_TX_OK from your +- ndo_start_xmit method, it is your driver's responsibility to free +- up the SKB and in some finite amount of time. ++For example, this means that it is not allowed for your TX ++mitigation scheme to let TX packets "hang out" in the TX ++ring unreclaimed forever if no new TX packets are sent. ++This error can deadlock sockets waiting for send buffer room ++to be freed up. + +- For example, this means that it is not allowed for your TX +- mitigation scheme to let TX packets "hang out" in the TX +- ring unreclaimed forever if no new TX packets are sent. +- This error can deadlock sockets waiting for send buffer room +- to be freed up. ++If you return NETDEV_TX_BUSY from the ndo_start_xmit method, you ++must not keep any reference to that SKB and you must not attempt ++to free it up. + +- If you return NETDEV_TX_BUSY from the ndo_start_xmit method, you +- must not keep any reference to that SKB and you must not attempt +- to free it up. ++Probing guidelines ++================== + +-Probing guidelines: ++Address validation ++------------------ ++ ++Any hardware layer address you obtain for your device should ++be verified. For example, for ethernet check it with ++linux/etherdevice.h:is_valid_ether_addr() ++ ++Close/stop guidelines ++===================== + +-1) Any hardware layer address you obtain for your device should +- be verified. For example, for ethernet check it with +- linux/etherdevice.h:is_valid_ether_addr() ++Quiescence ++---------- + +-Close/stop guidelines: ++After the ndo_stop routine has been called, the hardware must ++not receive or transmit any data. All in flight packets must ++be aborted. If necessary, poll or wait for completion of ++any reset commands. + +-1) After the ndo_stop routine has been called, the hardware must +- not receive or transmit any data. All in flight packets must +- be aborted. If necessary, poll or wait for completion of +- any reset commands. ++Auto-close ++---------- + +-2) The ndo_stop routine will be called by unregister_netdevice +- if device is still UP. ++The ndo_stop routine will be called by unregister_netdevice ++if device is still UP. +-- +2.43.0 + diff --git a/queue-6.1/net-dsa-mv88e6xxx-fix-error-when-setting-port-policy.patch b/queue-6.1/net-dsa-mv88e6xxx-fix-error-when-setting-port-policy.patch new file mode 100644 index 00000000000..29d96d10720 --- /dev/null +++ b/queue-6.1/net-dsa-mv88e6xxx-fix-error-when-setting-port-policy.patch @@ -0,0 +1,41 @@ +From 3939881b1bf85f5db415e87d3cac620d55272838 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Oct 2024 21:08:22 -0700 +Subject: net: dsa: mv88e6xxx: Fix error when setting port policy on mv88e6393x + +From: Peter Rashleigh + +[ Upstream commit 12bc14949c4a7272b509af0f1022a0deeb215fd8 ] + +mv88e6393x_port_set_policy doesn't correctly shift the ptr value when +converting the policy format between the old and new styles, so the +target register ends up with the ptr being written over the data bits. + +Shift the pointer to align with the format expected by +mv88e6393x_port_policy_write(). + +Fixes: 6584b26020fc ("net: dsa: mv88e6xxx: implement .port_set_policy for Amethyst") +Signed-off-by: Peter Rashleigh +Reviewed-by: Simon Horman +Message-ID: <20241016040822.3917-1-peter@rashleigh.ca> +Signed-off-by: Andrew Lunn +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/mv88e6xxx/port.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c +index f79cf716c541f..553f577c25a6b 100644 +--- a/drivers/net/dsa/mv88e6xxx/port.c ++++ b/drivers/net/dsa/mv88e6xxx/port.c +@@ -1728,6 +1728,7 @@ int mv88e6393x_port_set_policy(struct mv88e6xxx_chip *chip, int port, + ptr = shift / 8; + shift %= 8; + mask >>= ptr * 8; ++ ptr <<= 8; + + err = mv88e6393x_port_policy_read(chip, port, ptr, ®); + if (err) +-- +2.43.0 + diff --git a/queue-6.1/net-fix-races-in-netdev_tx_sent_queue-dev_watchdog.patch b/queue-6.1/net-fix-races-in-netdev_tx_sent_queue-dev_watchdog.patch new file mode 100644 index 00000000000..f6e62f256de --- /dev/null +++ b/queue-6.1/net-fix-races-in-netdev_tx_sent_queue-dev_watchdog.patch @@ -0,0 +1,118 @@ +From 814fa503a2f9aa61d4bfd8d26064fd39e63d5706 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Oct 2024 19:41:18 +0000 +Subject: net: fix races in netdev_tx_sent_queue()/dev_watchdog() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Eric Dumazet + +[ Upstream commit 95ecba62e2fd201bcdcca636f5d774f1cd4f1458 ] + +Some workloads hit the infamous dev_watchdog() message: + +"NETDEV WATCHDOG: eth0 (xxxx): transmit queue XX timed out" + +It seems possible to hit this even for perfectly normal +BQL enabled drivers: + +1) Assume a TX queue was idle for more than dev->watchdog_timeo + (5 seconds unless changed by the driver) + +2) Assume a big packet is sent, exceeding current BQL limit. + +3) Driver ndo_start_xmit() puts the packet in TX ring, + and netdev_tx_sent_queue() is called. + +4) QUEUE_STATE_STACK_XOFF could be set from netdev_tx_sent_queue() + before txq->trans_start has been written. + +5) txq->trans_start is written later, from netdev_start_xmit() + + if (rc == NETDEV_TX_OK) + txq_trans_update(txq) + +dev_watchdog() running on another cpu could read the old +txq->trans_start, and then see QUEUE_STATE_STACK_XOFF, because 5) +did not happen yet. + +To solve the issue, write txq->trans_start right before one XOFF bit +is set : + +- _QUEUE_STATE_DRV_XOFF from netif_tx_stop_queue() +- __QUEUE_STATE_STACK_XOFF from netdev_tx_sent_queue() + +From dev_watchdog(), we have to read txq->state before txq->trans_start. + +Add memory barriers to enforce correct ordering. + +In the future, we could avoid writing over txq->trans_start for normal +operations, and rename this field to txq->xoff_start_time. + +Fixes: bec251bc8b6a ("net: no longer stop all TX queues in dev_watchdog()") +Signed-off-by: Eric Dumazet +Reviewed-by: Willem de Bruijn +Reviewed-by: Toke Høiland-Jørgensen +Link: https://patch.msgid.link/20241015194118.3951657-1-edumazet@google.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + include/linux/netdevice.h | 12 ++++++++++++ + net/sched/sch_generic.c | 8 +++++++- + 2 files changed, 19 insertions(+), 1 deletion(-) + +diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h +index 8b67b266cce63..fbbd0df1106b6 100644 +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -3274,6 +3274,12 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev) + + static __always_inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) + { ++ /* Paired with READ_ONCE() from dev_watchdog() */ ++ WRITE_ONCE(dev_queue->trans_start, jiffies); ++ ++ /* This barrier is paired with smp_mb() from dev_watchdog() */ ++ smp_mb__before_atomic(); ++ + /* Must be an atomic op see netif_txq_try_stop() */ + set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); + } +@@ -3390,6 +3396,12 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, + if (likely(dql_avail(&dev_queue->dql) >= 0)) + return; + ++ /* Paired with READ_ONCE() from dev_watchdog() */ ++ WRITE_ONCE(dev_queue->trans_start, jiffies); ++ ++ /* This barrier is paired with smp_mb() from dev_watchdog() */ ++ smp_mb__before_atomic(); ++ + set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state); + + /* +diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c +index 7f0c8df7b63e0..b51af871a621c 100644 +--- a/net/sched/sch_generic.c ++++ b/net/sched/sch_generic.c +@@ -511,9 +511,15 @@ static void dev_watchdog(struct timer_list *t) + struct netdev_queue *txq; + + txq = netdev_get_tx_queue(dev, i); +- trans_start = READ_ONCE(txq->trans_start); + if (!netif_xmit_stopped(txq)) + continue; ++ ++ /* Paired with WRITE_ONCE() + smp_mb...() in ++ * netdev_tx_sent_queue() and netif_tx_stop_queue(). ++ */ ++ smp_mb(); ++ trans_start = READ_ONCE(txq->trans_start); ++ + if (time_after(jiffies, trans_start + dev->watchdog_timeo)) { + timedout_ms = jiffies_to_msecs(jiffies - trans_start); + atomic_long_inc(&txq->trans_timeout); +-- +2.43.0 + diff --git a/queue-6.1/net-plip-fix-break-causing-plip-to-never-transmit.patch b/queue-6.1/net-plip-fix-break-causing-plip-to-never-transmit.patch new file mode 100644 index 00000000000..c5bdcdf8d69 --- /dev/null +++ b/queue-6.1/net-plip-fix-break-causing-plip-to-never-transmit.patch @@ -0,0 +1,45 @@ +From 51ee1b000ff35a75e66c5be7090f329eb1d41b3e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Oct 2024 17:16:04 +0200 +Subject: net: plip: fix break; causing plip to never transmit + +From: Jakub Boehm + +[ Upstream commit f99cf996ba5a315f8b9f13cc21dff0604a0eb749 ] + +Since commit + 71ae2cb30531 ("net: plip: Fix fall-through warnings for Clang") + +plip was not able to send any packets, this patch replaces one +unintended break; with fallthrough; which was originally missed by +commit 9525d69a3667 ("net: plip: mark expected switch fall-throughs"). + +I have verified with a real hardware PLIP connection that everything +works once again after applying this patch. + +Fixes: 71ae2cb30531 ("net: plip: Fix fall-through warnings for Clang") +Signed-off-by: Jakub Boehm +Reviewed-by: Simon Horman +Message-ID: <20241015-net-plip-tx-fix-v1-1-32d8be1c7e0b@gmail.com> +Signed-off-by: Andrew Lunn +Signed-off-by: Sasha Levin +--- + drivers/net/plip/plip.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c +index 40ce8abe69995..6019811920a44 100644 +--- a/drivers/net/plip/plip.c ++++ b/drivers/net/plip/plip.c +@@ -815,7 +815,7 @@ plip_send_packet(struct net_device *dev, struct net_local *nl, + return HS_TIMEOUT; + } + } +- break; ++ fallthrough; + + case PLIP_PK_LENGTH_LSB: + if (plip_send(nibble_timeout, dev, +-- +2.43.0 + diff --git a/queue-6.1/net-provide-macros-for-commonly-copied-lockless-queu.patch b/queue-6.1/net-provide-macros-for-commonly-copied-lockless-queu.patch new file mode 100644 index 00000000000..ff97e3c6f3a --- /dev/null +++ b/queue-6.1/net-provide-macros-for-commonly-copied-lockless-queu.patch @@ -0,0 +1,216 @@ +From c7f15a3dbf838e111b178494d30641adab0517c1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Apr 2023 18:25:33 -0700 +Subject: net: provide macros for commonly copied lockless queue stop/wake code + +From: Jakub Kicinski + +[ Upstream commit c91c46de6bbc1147ae5dfe046b87f5f3d6593215 ] + +A lot of drivers follow the same scheme to stop / start queues +without introducing locks between xmit and NAPI tx completions. +I'm guessing they all copy'n'paste each other's code. +The original code dates back all the way to e1000 and Linux 2.6.19. + +Smaller drivers shy away from the scheme and introduce a lock +which may cause deadlocks in netpoll. + +Provide macros which encapsulate the necessary logic. + +The macros do not prevent false wake ups, the extra barrier +required to close that race is not worth it. See discussion in: +https://lore.kernel.org/all/c39312a2-4537-14b4-270c-9fe1fbb91e89@gmail.com/ + +Acked-by: Herbert Xu +Signed-off-by: Jakub Kicinski +Stable-dep-of: 95ecba62e2fd ("net: fix races in netdev_tx_sent_queue()/dev_watchdog()") +Signed-off-by: Sasha Levin +--- + Documentation/networking/driver.rst | 6 ++ + include/linux/netdevice.h | 1 + + include/net/netdev_queues.h | 144 ++++++++++++++++++++++++++++ + 3 files changed, 151 insertions(+) + create mode 100644 include/net/netdev_queues.h + +diff --git a/Documentation/networking/driver.rst b/Documentation/networking/driver.rst +index 3040a74d421c7..870f933e4a1a1 100644 +--- a/Documentation/networking/driver.rst ++++ b/Documentation/networking/driver.rst +@@ -67,6 +67,12 @@ and:: + TX_BUFFS_AVAIL(dp) > 0) + netif_wake_queue(dp->dev); + ++Lockless queue stop / wake helper macros ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ ++.. kernel-doc:: include/net/netdev_queues.h ++ :doc: Lockless queue stopping / waking helpers. ++ + No exclusive ownership + ---------------------- + +diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h +index 0373e09359905..8b67b266cce63 100644 +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -3274,6 +3274,7 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev) + + static __always_inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) + { ++ /* Must be an atomic op see netif_txq_try_stop() */ + set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); + } + +diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h +new file mode 100644 +index 0000000000000..5236d78bbdebb +--- /dev/null ++++ b/include/net/netdev_queues.h +@@ -0,0 +1,144 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _LINUX_NET_QUEUES_H ++#define _LINUX_NET_QUEUES_H ++ ++#include ++ ++/** ++ * DOC: Lockless queue stopping / waking helpers. ++ * ++ * The netif_txq_maybe_stop() and __netif_txq_completed_wake() ++ * macros are designed to safely implement stopping ++ * and waking netdev queues without full lock protection. ++ * ++ * We assume that there can be no concurrent stop attempts and no concurrent ++ * wake attempts. The try-stop should happen from the xmit handler, ++ * while wake up should be triggered from NAPI poll context. ++ * The two may run concurrently (single producer, single consumer). ++ * ++ * The try-stop side is expected to run from the xmit handler and therefore ++ * it does not reschedule Tx (netif_tx_start_queue() instead of ++ * netif_tx_wake_queue()). Uses of the ``stop`` macros outside of the xmit ++ * handler may lead to xmit queue being enabled but not run. ++ * The waking side does not have similar context restrictions. ++ * ++ * The macros guarantee that rings will not remain stopped if there's ++ * space available, but they do *not* prevent false wake ups when ++ * the ring is full! Drivers should check for ring full at the start ++ * for the xmit handler. ++ * ++ * All descriptor ring indexes (and other relevant shared state) must ++ * be updated before invoking the macros. ++ */ ++ ++#define netif_txq_try_stop(txq, get_desc, start_thrs) \ ++ ({ \ ++ int _res; \ ++ \ ++ netif_tx_stop_queue(txq); \ ++ /* Producer index and stop bit must be visible \ ++ * to consumer before we recheck. \ ++ * Pairs with a barrier in __netif_txq_maybe_wake(). \ ++ */ \ ++ smp_mb__after_atomic(); \ ++ \ ++ /* We need to check again in a case another \ ++ * CPU has just made room available. \ ++ */ \ ++ _res = 0; \ ++ if (unlikely(get_desc >= start_thrs)) { \ ++ netif_tx_start_queue(txq); \ ++ _res = -1; \ ++ } \ ++ _res; \ ++ }) \ ++ ++/** ++ * netif_txq_maybe_stop() - locklessly stop a Tx queue, if needed ++ * @txq: struct netdev_queue to stop/start ++ * @get_desc: get current number of free descriptors (see requirements below!) ++ * @stop_thrs: minimal number of available descriptors for queue to be left ++ * enabled ++ * @start_thrs: minimal number of descriptors to re-enable the queue, can be ++ * equal to @stop_thrs or higher to avoid frequent waking ++ * ++ * All arguments may be evaluated multiple times, beware of side effects. ++ * @get_desc must be a formula or a function call, it must always ++ * return up-to-date information when evaluated! ++ * Expected to be used from ndo_start_xmit, see the comment on top of the file. ++ * ++ * Returns: ++ * 0 if the queue was stopped ++ * 1 if the queue was left enabled ++ * -1 if the queue was re-enabled (raced with waking) ++ */ ++#define netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs) \ ++ ({ \ ++ int _res; \ ++ \ ++ _res = 1; \ ++ if (unlikely(get_desc < stop_thrs)) \ ++ _res = netif_txq_try_stop(txq, get_desc, start_thrs); \ ++ _res; \ ++ }) \ ++ ++ ++/** ++ * __netif_txq_maybe_wake() - locklessly wake a Tx queue, if needed ++ * @txq: struct netdev_queue to stop/start ++ * @get_desc: get current number of free descriptors (see requirements below!) ++ * @start_thrs: minimal number of descriptors to re-enable the queue ++ * @down_cond: down condition, predicate indicating that the queue should ++ * not be woken up even if descriptors are available ++ * ++ * All arguments may be evaluated multiple times. ++ * @get_desc must be a formula or a function call, it must always ++ * return up-to-date information when evaluated! ++ * ++ * Returns: ++ * 0 if the queue was woken up ++ * 1 if the queue was already enabled (or disabled but @down_cond is true) ++ * -1 if the queue was left unchanged (@start_thrs not reached) ++ */ ++#define __netif_txq_maybe_wake(txq, get_desc, start_thrs, down_cond) \ ++ ({ \ ++ int _res; \ ++ \ ++ _res = -1; \ ++ if (likely(get_desc > start_thrs)) { \ ++ /* Make sure that anybody stopping the queue after \ ++ * this sees the new next_to_clean. \ ++ */ \ ++ smp_mb(); \ ++ _res = 1; \ ++ if (unlikely(netif_tx_queue_stopped(txq)) && \ ++ !(down_cond)) { \ ++ netif_tx_wake_queue(txq); \ ++ _res = 0; \ ++ } \ ++ } \ ++ _res; \ ++ }) ++ ++#define netif_txq_maybe_wake(txq, get_desc, start_thrs) \ ++ __netif_txq_maybe_wake(txq, get_desc, start_thrs, false) ++ ++/* subqueue variants follow */ ++ ++#define netif_subqueue_try_stop(dev, idx, get_desc, start_thrs) \ ++ ({ \ ++ struct netdev_queue *txq; \ ++ \ ++ txq = netdev_get_tx_queue(dev, idx); \ ++ netif_txq_try_stop(txq, get_desc, start_thrs); \ ++ }) ++ ++#define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \ ++ ({ \ ++ struct netdev_queue *txq; \ ++ \ ++ txq = netdev_get_tx_queue(dev, idx); \ ++ netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs); \ ++ }) ++ ++#endif +-- +2.43.0 + diff --git a/queue-6.1/net-sched-act_api-deny-mismatched-skip_sw-skip_hw-fl.patch b/queue-6.1/net-sched-act_api-deny-mismatched-skip_sw-skip_hw-fl.patch new file mode 100644 index 00000000000..b8ca318cdd8 --- /dev/null +++ b/queue-6.1/net-sched-act_api-deny-mismatched-skip_sw-skip_hw-fl.patch @@ -0,0 +1,135 @@ +From 1e39b731c8c085ef04cc304f7e53f340b87e3d6b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Oct 2024 19:10:48 +0300 +Subject: net/sched: act_api: deny mismatched skip_sw/skip_hw flags for actions + created by classifiers + +From: Vladimir Oltean + +[ Upstream commit 34d35b4edbbe890a91bec939bfd29ad92517a52b ] + +tcf_action_init() has logic for checking mismatches between action and +filter offload flags (skip_sw/skip_hw). AFAIU, this is intended to run +on the transition between the new tc_act_bind(flags) returning true (aka +now gets bound to classifier) and tc_act_bind(act->tcfa_flags) returning +false (aka action was not bound to classifier before). Otherwise, the +check is skipped. + +For the case where an action is not standalone, but rather it was +created by a classifier and is bound to it, tcf_action_init() skips the +check entirely, and this means it allows mismatched flags to occur. + +Taking the matchall classifier code path as an example (with mirred as +an action), the reason is the following: + + 1 | mall_change() + 2 | -> mall_replace_hw_filter() + 3 | -> tcf_exts_validate_ex() + 4 | -> flags |= TCA_ACT_FLAGS_BIND; + 5 | -> tcf_action_init() + 6 | -> tcf_action_init_1() + 7 | -> a_o->init() + 8 | -> tcf_mirred_init() + 9 | -> tcf_idr_create_from_flags() +10 | -> tcf_idr_create() +11 | -> p->tcfa_flags = flags; +12 | -> tc_act_bind(flags)) +13 | -> tc_act_bind(act->tcfa_flags) + +When invoked from tcf_exts_validate_ex() like matchall does (but other +classifiers validate their extensions as well), tcf_action_init() runs +in a call path where "flags" always contains TCA_ACT_FLAGS_BIND (set by +line 4). So line 12 is always true, and line 13 is always true as well. +No transition ever takes place, and the check is skipped. + +The code was added in this form in commit c86e0209dc77 ("flow_offload: +validate flags of filter and actions"), but I'm attributing the blame +even earlier in that series, to when TCA_ACT_FLAGS_SKIP_HW and +TCA_ACT_FLAGS_SKIP_SW were added to the UAPI. + +Following the development process of this change, the check did not +always exist in this form. A change took place between v3 [1] and v4 [2], +AFAIU due to review feedback that it doesn't make sense for action flags +to be different than classifier flags. I think I agree with that +feedback, but it was translated into code that omits enforcing this for +"classic" actions created at the same time with the filters themselves. + +There are 3 more important cases to discuss. First there is this command: + +$ tc qdisc add dev eth0 clasct +$ tc filter add dev eth0 ingress matchall skip_sw \ + action mirred ingress mirror dev eth1 + +which should be allowed, because prior to the concept of dedicated +action flags, it used to work and it used to mean the action inherited +the skip_sw/skip_hw flags from the classifier. It's not a mismatch. + +Then we have this command: + +$ tc qdisc add dev eth0 clasct +$ tc filter add dev eth0 ingress matchall skip_sw \ + action mirred ingress mirror dev eth1 skip_hw + +where there is a mismatch and it should be rejected. + +Finally, we have: + +$ tc qdisc add dev eth0 clasct +$ tc filter add dev eth0 ingress matchall skip_sw \ + action mirred ingress mirror dev eth1 skip_sw + +where the offload flags coincide, and this should be treated the same as +the first command based on inheritance, and accepted. + +[1]: https://lore.kernel.org/netdev/20211028110646.13791-9-simon.horman@corigine.com/ +[2]: https://lore.kernel.org/netdev/20211118130805.23897-10-simon.horman@corigine.com/ +Fixes: 7adc57651211 ("flow_offload: add skip_hw and skip_sw to control if offload the action") +Signed-off-by: Vladimir Oltean +Reviewed-by: Simon Horman +Reviewed-by: Ido Schimmel +Tested-by: Ido Schimmel +Link: https://patch.msgid.link/20241017161049.3570037-1-vladimir.oltean@nxp.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/sched/act_api.c | 23 ++++++++++++++++++++++- + 1 file changed, 22 insertions(+), 1 deletion(-) + +diff --git a/net/sched/act_api.c b/net/sched/act_api.c +index 5a361deb804a3..05bd1e9bca36a 100644 +--- a/net/sched/act_api.c ++++ b/net/sched/act_api.c +@@ -1493,8 +1493,29 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, + bool skip_sw = tc_skip_sw(fl_flags); + bool skip_hw = tc_skip_hw(fl_flags); + +- if (tc_act_bind(act->tcfa_flags)) ++ if (tc_act_bind(act->tcfa_flags)) { ++ /* Action is created by classifier and is not ++ * standalone. Check that the user did not set ++ * any action flags different than the ++ * classifier flags, and inherit the flags from ++ * the classifier for the compatibility case ++ * where no flags were specified at all. ++ */ ++ if ((tc_act_skip_sw(act->tcfa_flags) && !skip_sw) || ++ (tc_act_skip_hw(act->tcfa_flags) && !skip_hw)) { ++ NL_SET_ERR_MSG(extack, ++ "Mismatch between action and filter offload flags"); ++ err = -EINVAL; ++ goto err; ++ } ++ if (skip_sw) ++ act->tcfa_flags |= TCA_ACT_FLAGS_SKIP_SW; ++ if (skip_hw) ++ act->tcfa_flags |= TCA_ACT_FLAGS_SKIP_HW; + continue; ++ } ++ ++ /* Action is standalone */ + if (skip_sw != tc_act_skip_sw(act->tcfa_flags) || + skip_hw != tc_act_skip_hw(act->tcfa_flags)) { + NL_SET_ERR_MSG(extack, +-- +2.43.0 + diff --git a/queue-6.1/net-sched-adjust-device-watchdog-timer-to-detect-sto.patch b/queue-6.1/net-sched-adjust-device-watchdog-timer-to-detect-sto.patch new file mode 100644 index 00000000000..77fe0329d62 --- /dev/null +++ b/queue-6.1/net-sched-adjust-device-watchdog-timer-to-detect-sto.patch @@ -0,0 +1,72 @@ +From bde764eabf47948525f388a54af5c6eb5ff90ac0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 8 May 2024 19:06:17 +0530 +Subject: net/sched: adjust device watchdog timer to detect stopped queue at + right time + +From: Praveen Kumar Kannoju + +[ Upstream commit 33fb988b67050d9bb512f77f08453fa00088943c ] + +Applications are sensitive to long network latency, particularly +heartbeat monitoring ones. Longer the tx timeout recovery higher the +risk with such applications on a production machines. This patch +remedies, yet honoring device set tx timeout. + +Modify watchdog next timeout to be shorter than the device specified. +Compute the next timeout be equal to device watchdog timeout less the +how long ago queue stop had been done. At next watchdog timeout tx +timeout handler is called into if still in stopped state. Either called +or not called, restore the watchdog timeout back to device specified. + +Signed-off-by: Praveen Kumar Kannoju +Link: https://lore.kernel.org/r/20240508133617.4424-1-praveen.kannoju@oracle.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 95ecba62e2fd ("net: fix races in netdev_tx_sent_queue()/dev_watchdog()") +Signed-off-by: Sasha Levin +--- + net/sched/sch_generic.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c +index 6ab9359c1706f..7f0c8df7b63e0 100644 +--- a/net/sched/sch_generic.c ++++ b/net/sched/sch_generic.c +@@ -505,19 +505,22 @@ static void dev_watchdog(struct timer_list *t) + unsigned int timedout_ms = 0; + unsigned int i; + unsigned long trans_start; ++ unsigned long oldest_start = jiffies; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq; + + txq = netdev_get_tx_queue(dev, i); + trans_start = READ_ONCE(txq->trans_start); +- if (netif_xmit_stopped(txq) && +- time_after(jiffies, (trans_start + +- dev->watchdog_timeo))) { ++ if (!netif_xmit_stopped(txq)) ++ continue; ++ if (time_after(jiffies, trans_start + dev->watchdog_timeo)) { + timedout_ms = jiffies_to_msecs(jiffies - trans_start); + atomic_long_inc(&txq->trans_timeout); + break; + } ++ if (time_after(oldest_start, trans_start)) ++ oldest_start = trans_start; + } + + if (unlikely(timedout_ms)) { +@@ -530,7 +533,7 @@ static void dev_watchdog(struct timer_list *t) + netif_unfreeze_queues(dev); + } + if (!mod_timer(&dev->watchdog_timer, +- round_jiffies(jiffies + ++ round_jiffies(oldest_start + + dev->watchdog_timeo))) + release = false; + } +-- +2.43.0 + diff --git a/queue-6.1/net-sched-fix-use-after-free-in-taprio_change.patch b/queue-6.1/net-sched-fix-use-after-free-in-taprio_change.patch new file mode 100644 index 00000000000..7fb7628bf36 --- /dev/null +++ b/queue-6.1/net-sched-fix-use-after-free-in-taprio_change.patch @@ -0,0 +1,45 @@ +From eb15215539e3dac0d80f7a85b8e3bc3978b57ec8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Oct 2024 08:13:38 +0300 +Subject: net: sched: fix use-after-free in taprio_change() + +From: Dmitry Antipov + +[ Upstream commit f504465970aebb2467da548f7c1efbbf36d0f44b ] + +In 'taprio_change()', 'admin' pointer may become dangling due to sched +switch / removal caused by 'advance_sched()', and critical section +protected by 'q->current_entry_lock' is too small to prevent from such +a scenario (which causes use-after-free detected by KASAN). Fix this +by prefer 'rcu_replace_pointer()' over 'rcu_assign_pointer()' to update +'admin' immediately before an attempt to schedule freeing. + +Fixes: a3d43c0d56f1 ("taprio: Add support adding an admin schedule") +Reported-by: syzbot+b65e0af58423fc8a73aa@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=b65e0af58423fc8a73aa +Acked-by: Vinicius Costa Gomes +Signed-off-by: Dmitry Antipov +Link: https://patch.msgid.link/20241018051339.418890-1-dmantipov@yandex.ru +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/sched/sch_taprio.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c +index 07f6f5343dd71..212fef2b72f50 100644 +--- a/net/sched/sch_taprio.c ++++ b/net/sched/sch_taprio.c +@@ -1681,7 +1681,8 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, + + taprio_start_sched(sch, start, new_admin); + +- rcu_assign_pointer(q->admin_sched, new_admin); ++ admin = rcu_replace_pointer(q->admin_sched, new_admin, ++ lockdep_rtnl_is_held()); + if (admin) + call_rcu(&admin->rcu, taprio_free_sched_cb); + +-- +2.43.0 + diff --git a/queue-6.1/net-sun3_82586-fix-potential-memory-leak-in-sun3_825.patch b/queue-6.1/net-sun3_82586-fix-potential-memory-leak-in-sun3_825.patch new file mode 100644 index 00000000000..f720560d3ce --- /dev/null +++ b/queue-6.1/net-sun3_82586-fix-potential-memory-leak-in-sun3_825.patch @@ -0,0 +1,37 @@ +From e9e5f21b0711a747ccaedbd37e6a0083957bf69b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Oct 2024 22:41:48 +0800 +Subject: net/sun3_82586: fix potential memory leak in sun3_82586_send_packet() + +From: Wang Hai + +[ Upstream commit 2cb3f56e827abb22c4168ad0c1bbbf401bb2f3b8 ] + +The sun3_82586_send_packet() returns NETDEV_TX_OK without freeing skb +in case of skb->len being too long, add dev_kfree_skb() to fix it. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Wang Hai +Reviewed-by: Simon Horman +Message-ID: <20241015144148.7918-1-wanghai38@huawei.com> +Signed-off-by: Andrew Lunn +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/i825xx/sun3_82586.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c +index 72d3b5328ebb4..54c83e66bf78b 100644 +--- a/drivers/net/ethernet/i825xx/sun3_82586.c ++++ b/drivers/net/ethernet/i825xx/sun3_82586.c +@@ -1011,6 +1011,7 @@ sun3_82586_send_packet(struct sk_buff *skb, struct net_device *dev) + if(skb->len > XMIT_BUFF_SIZE) + { + printk("%s: Sorry, max. framelength is %d bytes. The length of your frame is %d bytes.\n",dev->name,XMIT_BUFF_SIZE,skb->len); ++ dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + +-- +2.43.0 + diff --git a/queue-6.1/net-usb-usbnet-fix-name-regression.patch b/queue-6.1/net-usb-usbnet-fix-name-regression.patch new file mode 100644 index 00000000000..f04eb1fd17b --- /dev/null +++ b/queue-6.1/net-usb-usbnet-fix-name-regression.patch @@ -0,0 +1,46 @@ +From 06db4f5d113ac9fbf69a9a731199b552e0ece928 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Oct 2024 09:18:37 +0200 +Subject: net: usb: usbnet: fix name regression + +From: Oliver Neukum + +[ Upstream commit 8a7d12d674ac6f2147c18f36d1e15f1a48060edf ] + +The fix for MAC addresses broke detection of the naming convention +because it gave network devices no random MAC before bind() +was called. This means that the check for the local assignment bit +was always negative as the address was zeroed from allocation, +instead of from overwriting the MAC with a unique hardware address. + +The correct check for whether bind() has altered the MAC is +done with is_zero_ether_addr + +Signed-off-by: Oliver Neukum +Reported-by: Greg Thelen +Diagnosed-by: John Sperbeck +Fixes: bab8eb0dd4cb9 ("usbnet: modern method to get random MAC") +Link: https://patch.msgid.link/20241017071849.389636-1-oneukum@suse.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/usb/usbnet.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c +index ce587a12b894c..ae1282487b02a 100644 +--- a/drivers/net/usb/usbnet.c ++++ b/drivers/net/usb/usbnet.c +@@ -1766,7 +1766,8 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) + // can rename the link if it knows better. + if ((dev->driver_info->flags & FLAG_ETHER) != 0 && + ((dev->driver_info->flags & FLAG_POINTTOPOINT) == 0 || +- (net->dev_addr [0] & 0x02) == 0)) ++ /* somebody touched it*/ ++ !is_zero_ether_addr(net->dev_addr))) + strscpy(net->name, "eth%d", sizeof(net->name)); + /* WLAN devices should always be named "wlan%d" */ + if ((dev->driver_info->flags & FLAG_WLAN) != 0) +-- +2.43.0 + diff --git a/queue-6.1/net-wwan-fix-global-oob-in-wwan_rtnl_policy.patch b/queue-6.1/net-wwan-fix-global-oob-in-wwan_rtnl_policy.patch new file mode 100644 index 00000000000..8d4373f5de6 --- /dev/null +++ b/queue-6.1/net-wwan-fix-global-oob-in-wwan_rtnl_policy.patch @@ -0,0 +1,107 @@ +From 732427f87e1023f023e6b8bd5c016944db1bd104 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Oct 2024 21:16:21 +0800 +Subject: net: wwan: fix global oob in wwan_rtnl_policy + +From: Lin Ma + +[ Upstream commit 47dd5447cab8ce30a847a0337d5341ae4c7476a7 ] + +The variable wwan_rtnl_link_ops assign a *bigger* maxtype which leads to +a global out-of-bounds read when parsing the netlink attributes. Exactly +same bug cause as the oob fixed in commit b33fb5b801c6 ("net: qualcomm: +rmnet: fix global oob in rmnet_policy"). + +================================================================== +BUG: KASAN: global-out-of-bounds in validate_nla lib/nlattr.c:388 [inline] +BUG: KASAN: global-out-of-bounds in __nla_validate_parse+0x19d7/0x29a0 lib/nlattr.c:603 +Read of size 1 at addr ffffffff8b09cb60 by task syz.1.66276/323862 + +CPU: 0 PID: 323862 Comm: syz.1.66276 Not tainted 6.1.70 #1 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 +Call Trace: + + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0x177/0x231 lib/dump_stack.c:106 + print_address_description mm/kasan/report.c:284 [inline] + print_report+0x14f/0x750 mm/kasan/report.c:395 + kasan_report+0x139/0x170 mm/kasan/report.c:495 + validate_nla lib/nlattr.c:388 [inline] + __nla_validate_parse+0x19d7/0x29a0 lib/nlattr.c:603 + __nla_parse+0x3c/0x50 lib/nlattr.c:700 + nla_parse_nested_deprecated include/net/netlink.h:1269 [inline] + __rtnl_newlink net/core/rtnetlink.c:3514 [inline] + rtnl_newlink+0x7bc/0x1fd0 net/core/rtnetlink.c:3623 + rtnetlink_rcv_msg+0x794/0xef0 net/core/rtnetlink.c:6122 + netlink_rcv_skb+0x1de/0x420 net/netlink/af_netlink.c:2508 + netlink_unicast_kernel net/netlink/af_netlink.c:1326 [inline] + netlink_unicast+0x74b/0x8c0 net/netlink/af_netlink.c:1352 + netlink_sendmsg+0x882/0xb90 net/netlink/af_netlink.c:1874 + sock_sendmsg_nosec net/socket.c:716 [inline] + __sock_sendmsg net/socket.c:728 [inline] + ____sys_sendmsg+0x5cc/0x8f0 net/socket.c:2499 + ___sys_sendmsg+0x21c/0x290 net/socket.c:2553 + __sys_sendmsg net/socket.c:2582 [inline] + __do_sys_sendmsg net/socket.c:2591 [inline] + __se_sys_sendmsg+0x19e/0x270 net/socket.c:2589 + do_syscall_x64 arch/x86/entry/common.c:51 [inline] + do_syscall_64+0x45/0x90 arch/x86/entry/common.c:81 + entry_SYSCALL_64_after_hwframe+0x63/0xcd +RIP: 0033:0x7f67b19a24ad +RSP: 002b:00007f67b17febb8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00007f67b1b45f80 RCX: 00007f67b19a24ad +RDX: 0000000000000000 RSI: 0000000020005e40 RDI: 0000000000000004 +RBP: 00007f67b1a1e01d R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 +R13: 00007ffd2513764f R14: 00007ffd251376e0 R15: 00007f67b17fed40 + + +The buggy address belongs to the variable: + wwan_rtnl_policy+0x20/0x40 + +The buggy address belongs to the physical page: +page:ffffea00002c2700 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0xb09c +flags: 0xfff00000001000(reserved|node=0|zone=1|lastcpupid=0x7ff) +raw: 00fff00000001000 ffffea00002c2708 ffffea00002c2708 0000000000000000 +raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 +page dumped because: kasan: bad access detected +page_owner info is not present (never set?) + +Memory state around the buggy address: + ffffffff8b09ca00: 05 f9 f9 f9 05 f9 f9 f9 00 01 f9 f9 00 01 f9 f9 + ffffffff8b09ca80: 00 00 00 05 f9 f9 f9 f9 00 00 03 f9 f9 f9 f9 f9 +>ffffffff8b09cb00: 00 00 00 00 05 f9 f9 f9 00 00 00 00 f9 f9 f9 f9 + ^ + ffffffff8b09cb80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +================================================================== + +According to the comment of `nla_parse_nested_deprecated`, use correct size +`IFLA_WWAN_MAX` here to fix this issue. + +Fixes: 88b710532e53 ("wwan: add interface creation support") +Signed-off-by: Lin Ma +Reviewed-by: Loic Poulain +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/20241015131621.47503-1-linma@zju.edu.cn +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/wwan/wwan_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c +index 62e9f7d6c9fe9..7089cb103885d 100644 +--- a/drivers/net/wwan/wwan_core.c ++++ b/drivers/net/wwan/wwan_core.c +@@ -994,7 +994,7 @@ static const struct nla_policy wwan_rtnl_policy[IFLA_WWAN_MAX + 1] = { + + static struct rtnl_link_ops wwan_rtnl_link_ops __read_mostly = { + .kind = "wwan", +- .maxtype = __IFLA_WWAN_MAX, ++ .maxtype = IFLA_WWAN_MAX, + .alloc = wwan_rtnl_alloc, + .validate = wwan_rtnl_validate, + .newlink = wwan_rtnl_newlink, +-- +2.43.0 + diff --git a/queue-6.1/netfilter-xtables-fix-typo-causing-some-targets-not-.patch b/queue-6.1/netfilter-xtables-fix-typo-causing-some-targets-not-.patch new file mode 100644 index 00000000000..ee4c7fc1221 --- /dev/null +++ b/queue-6.1/netfilter-xtables-fix-typo-causing-some-targets-not-.patch @@ -0,0 +1,74 @@ +From cf1c77ac09349a33653ad5d9e49b619745dabf32 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 20 Oct 2024 14:49:51 +0200 +Subject: netfilter: xtables: fix typo causing some targets not to load on IPv6 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Pablo Neira Ayuso + +[ Upstream commit 306ed1728e8438caed30332e1ab46b28c25fe3d8 ] + +- There is no NFPROTO_IPV6 family for mark and NFLOG. +- TRACE is also missing module autoload with NFPROTO_IPV6. + +This results in ip6tables failing to restore a ruleset. This issue has been +reported by several users providing incomplete patches. + +Very similar to Ilya Katsnelson's patch including a missing chunk in the +TRACE extension. + +Fixes: 0bfcb7b71e73 ("netfilter: xtables: avoid NFPROTO_UNSPEC where needed") +Reported-by: Ignat Korchagin +Reported-by: Ilya Katsnelson +Reported-by: Krzysztof Olędzki +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/xt_NFLOG.c | 2 +- + net/netfilter/xt_TRACE.c | 1 + + net/netfilter/xt_mark.c | 2 +- + 3 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c +index d80abd6ccaf8f..6dcf4bc7e30b2 100644 +--- a/net/netfilter/xt_NFLOG.c ++++ b/net/netfilter/xt_NFLOG.c +@@ -79,7 +79,7 @@ static struct xt_target nflog_tg_reg[] __read_mostly = { + { + .name = "NFLOG", + .revision = 0, +- .family = NFPROTO_IPV4, ++ .family = NFPROTO_IPV6, + .checkentry = nflog_tg_check, + .destroy = nflog_tg_destroy, + .target = nflog_tg, +diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c +index f3fa4f11348cd..a642ff09fc8e8 100644 +--- a/net/netfilter/xt_TRACE.c ++++ b/net/netfilter/xt_TRACE.c +@@ -49,6 +49,7 @@ static struct xt_target trace_tg_reg[] __read_mostly = { + .target = trace_tg, + .checkentry = trace_tg_check, + .destroy = trace_tg_destroy, ++ .me = THIS_MODULE, + }, + #endif + }; +diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c +index f76fe04fc9a4e..65b965ca40ea7 100644 +--- a/net/netfilter/xt_mark.c ++++ b/net/netfilter/xt_mark.c +@@ -62,7 +62,7 @@ static struct xt_target mark_tg_reg[] __read_mostly = { + { + .name = "MARK", + .revision = 2, +- .family = NFPROTO_IPV4, ++ .family = NFPROTO_IPV6, + .target = mark_tg, + .targetsize = sizeof(struct xt_mark_tginfo2), + .me = THIS_MODULE, +-- +2.43.0 + diff --git a/queue-6.1/octeon_ep-add-skb-allocation-failures-handling-in-__.patch b/queue-6.1/octeon_ep-add-skb-allocation-failures-handling-in-__.patch new file mode 100644 index 00000000000..a9f617f9cda --- /dev/null +++ b/queue-6.1/octeon_ep-add-skb-allocation-failures-handling-in-__.patch @@ -0,0 +1,84 @@ +From 6ff9546d7d7dc85d1acc1345050ad7bbcb64aa5c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Oct 2024 13:06:51 +0300 +Subject: octeon_ep: Add SKB allocation failures handling in + __octep_oq_process_rx() + +From: Aleksandr Mishin + +[ Upstream commit eb592008f79be52ccef88cd9a5249b3fc0367278 ] + +build_skb() returns NULL in case of a memory allocation failure so handle +it inside __octep_oq_process_rx() to avoid NULL pointer dereference. + +__octep_oq_process_rx() is called during NAPI polling by the driver. If +skb allocation fails, keep on pulling packets out of the Rx DMA queue: we +shouldn't break the polling immediately and thus falsely indicate to the +octep_napi_poll() that the Rx pressure is going down. As there is no +associated skb in this case, don't process the packets and don't push them +up the network stack - they are skipped. + +Helper function is implemented to unmmap/flush all the fragment buffers +used by the dropped packet. 'alloc_failures' counter is incremented to +mark the skb allocation error in driver statistics. + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Fixes: 37d79d059606 ("octeon_ep: add Tx/Rx processing and interrupt support") +Suggested-by: Paolo Abeni +Signed-off-by: Aleksandr Mishin +Reviewed-by: Jacob Keller +Signed-off-by: Andrew Lunn +Signed-off-by: Sasha Levin +--- + .../net/ethernet/marvell/octeon_ep/octep_rx.c | 27 +++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c +index 03d46c28a009c..4f3c1187a6e82 100644 +--- a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c ++++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c +@@ -360,6 +360,27 @@ static void octep_oq_next_pkt(struct octep_oq *oq, + *read_idx = 0; + } + ++/** ++ * octep_oq_drop_rx() - Free the resources associated with a packet. ++ * ++ * @oq: Octeon Rx queue data structure. ++ * @buff_info: Current packet buffer info. ++ * @read_idx: Current packet index in the ring. ++ * @desc_used: Current packet descriptor number. ++ * ++ */ ++static void octep_oq_drop_rx(struct octep_oq *oq, ++ struct octep_rx_buffer *buff_info, ++ u32 *read_idx, u32 *desc_used) ++{ ++ int data_len = buff_info->len - oq->max_single_buffer_size; ++ ++ while (data_len > 0) { ++ octep_oq_next_pkt(oq, buff_info, read_idx, desc_used); ++ data_len -= oq->buffer_size; ++ }; ++} ++ + /** + * __octep_oq_process_rx() - Process hardware Rx queue and push to stack. + * +@@ -415,6 +436,12 @@ static int __octep_oq_process_rx(struct octep_device *oct, + octep_oq_next_pkt(oq, buff_info, &read_idx, &desc_used); + + skb = build_skb((void *)resp_hw, PAGE_SIZE); ++ if (!skb) { ++ octep_oq_drop_rx(oq, buff_info, ++ &read_idx, &desc_used); ++ oq->stats.alloc_failures++; ++ continue; ++ } + skb_reserve(skb, data_offset); + + rx_bytes += buff_info->len; +-- +2.43.0 + diff --git a/queue-6.1/octeon_ep-implement-helper-for-iterating-packets-in-.patch b/queue-6.1/octeon_ep-implement-helper-for-iterating-packets-in-.patch new file mode 100644 index 00000000000..5b4faba015b --- /dev/null +++ b/queue-6.1/octeon_ep-implement-helper-for-iterating-packets-in-.patch @@ -0,0 +1,132 @@ +From c82adea79e872034aedfb8b0950e7999ca542841 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Oct 2024 13:06:50 +0300 +Subject: octeon_ep: Implement helper for iterating packets in Rx queue + +From: Aleksandr Mishin + +[ Upstream commit bd28df26197b2bd0913bf1b36770836481975143 ] + +The common code with some packet and index manipulations is extracted and +moved to newly implemented helper to make the code more readable and avoid +duplication. This is a preparation for skb allocation failure handling. + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Suggested-by: Simon Horman +Suggested-by: Paolo Abeni +Signed-off-by: Aleksandr Mishin +Reviewed-by: Jacob Keller +Signed-off-by: Andrew Lunn +Stable-dep-of: eb592008f79b ("octeon_ep: Add SKB allocation failures handling in __octep_oq_process_rx()") +Signed-off-by: Sasha Levin +--- + .../net/ethernet/marvell/octeon_ep/octep_rx.c | 55 +++++++++++-------- + 1 file changed, 32 insertions(+), 23 deletions(-) + +diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c +index 392d9b0da0d7a..03d46c28a009c 100644 +--- a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c ++++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c +@@ -336,6 +336,30 @@ static int octep_oq_check_hw_for_pkts(struct octep_device *oct, + return new_pkts; + } + ++/** ++ * octep_oq_next_pkt() - Move to the next packet in Rx queue. ++ * ++ * @oq: Octeon Rx queue data structure. ++ * @buff_info: Current packet buffer info. ++ * @read_idx: Current packet index in the ring. ++ * @desc_used: Current packet descriptor number. ++ * ++ * Free the resources associated with a packet. ++ * Increment packet index in the ring and packet descriptor number. ++ */ ++static void octep_oq_next_pkt(struct octep_oq *oq, ++ struct octep_rx_buffer *buff_info, ++ u32 *read_idx, u32 *desc_used) ++{ ++ dma_unmap_page(oq->dev, oq->desc_ring[*read_idx].buffer_ptr, ++ PAGE_SIZE, DMA_FROM_DEVICE); ++ buff_info->page = NULL; ++ (*read_idx)++; ++ (*desc_used)++; ++ if (*read_idx == oq->max_count) ++ *read_idx = 0; ++} ++ + /** + * __octep_oq_process_rx() - Process hardware Rx queue and push to stack. + * +@@ -365,10 +389,7 @@ static int __octep_oq_process_rx(struct octep_device *oct, + desc_used = 0; + for (pkt = 0; pkt < pkts_to_process; pkt++) { + buff_info = (struct octep_rx_buffer *)&oq->buff_info[read_idx]; +- dma_unmap_page(oq->dev, oq->desc_ring[read_idx].buffer_ptr, +- PAGE_SIZE, DMA_FROM_DEVICE); + resp_hw = page_address(buff_info->page); +- buff_info->page = NULL; + + /* Swap the length field that is in Big-Endian to CPU */ + buff_info->len = be64_to_cpu(resp_hw->length); +@@ -390,36 +411,27 @@ static int __octep_oq_process_rx(struct octep_device *oct, + */ + data_offset = OCTEP_OQ_RESP_HW_SIZE; + } ++ ++ octep_oq_next_pkt(oq, buff_info, &read_idx, &desc_used); ++ ++ skb = build_skb((void *)resp_hw, PAGE_SIZE); ++ skb_reserve(skb, data_offset); ++ + rx_bytes += buff_info->len; + + if (buff_info->len <= oq->max_single_buffer_size) { +- skb = build_skb((void *)resp_hw, PAGE_SIZE); +- skb_reserve(skb, data_offset); + skb_put(skb, buff_info->len); +- read_idx++; +- desc_used++; +- if (read_idx == oq->max_count) +- read_idx = 0; + } else { + struct skb_shared_info *shinfo; + u16 data_len; + +- skb = build_skb((void *)resp_hw, PAGE_SIZE); +- skb_reserve(skb, data_offset); + /* Head fragment includes response header(s); + * subsequent fragments contains only data. + */ + skb_put(skb, oq->max_single_buffer_size); +- read_idx++; +- desc_used++; +- if (read_idx == oq->max_count) +- read_idx = 0; +- + shinfo = skb_shinfo(skb); + data_len = buff_info->len - oq->max_single_buffer_size; + while (data_len) { +- dma_unmap_page(oq->dev, oq->desc_ring[read_idx].buffer_ptr, +- PAGE_SIZE, DMA_FROM_DEVICE); + buff_info = (struct octep_rx_buffer *) + &oq->buff_info[read_idx]; + if (data_len < oq->buffer_size) { +@@ -434,11 +446,8 @@ static int __octep_oq_process_rx(struct octep_device *oct, + buff_info->page, 0, + buff_info->len, + buff_info->len); +- buff_info->page = NULL; +- read_idx++; +- desc_used++; +- if (read_idx == oq->max_count) +- read_idx = 0; ++ ++ octep_oq_next_pkt(oq, buff_info, &read_idx, &desc_used); + } + } + +-- +2.43.0 + diff --git a/queue-6.1/posix-clock-posix-clock-fix-unbalanced-locking-in-pc.patch b/queue-6.1/posix-clock-posix-clock-fix-unbalanced-locking-in-pc.patch new file mode 100644 index 00000000000..88a17b00821 --- /dev/null +++ b/queue-6.1/posix-clock-posix-clock-fix-unbalanced-locking-in-pc.patch @@ -0,0 +1,58 @@ +From 1dc5aaec946de46cb09d0e42028ebd846acbdcdd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Oct 2024 18:07:48 +0800 +Subject: posix-clock: posix-clock: Fix unbalanced locking in + pc_clock_settime() + +From: Jinjie Ruan + +[ Upstream commit 6e62807c7fbb3c758d233018caf94dfea9c65dbd ] + +If get_clock_desc() succeeds, it calls fget() for the clockid's fd, +and get the clk->rwsem read lock, so the error path should release +the lock to make the lock balance and fput the clockid's fd to make +the refcount balance and release the fd related resource. + +However the below commit left the error path locked behind resulting in +unbalanced locking. Check timespec64_valid_strict() before +get_clock_desc() to fix it, because the "ts" is not changed +after that. + +Fixes: d8794ac20a29 ("posix-clock: Fix missing timespec64 check in pc_clock_settime()") +Acked-by: Richard Cochran +Signed-off-by: Jinjie Ruan +Acked-by: Anna-Maria Behnsen +[pabeni@redhat.com: fixed commit message typo] +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + kernel/time/posix-clock.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c +index 8127673bfc45e..05e73d209aa87 100644 +--- a/kernel/time/posix-clock.c ++++ b/kernel/time/posix-clock.c +@@ -290,6 +290,9 @@ static int pc_clock_settime(clockid_t id, const struct timespec64 *ts) + struct posix_clock_desc cd; + int err; + ++ if (!timespec64_valid_strict(ts)) ++ return -EINVAL; ++ + err = get_clock_desc(id, &cd); + if (err) + return err; +@@ -299,9 +302,6 @@ static int pc_clock_settime(clockid_t id, const struct timespec64 *ts) + goto out; + } + +- if (!timespec64_valid_strict(ts)) +- return -EINVAL; +- + if (cd.clk->ops.clock_settime) + err = cd.clk->ops.clock_settime(cd.clk, ts); + else +-- +2.43.0 + diff --git a/queue-6.1/r8169-avoid-unsolicited-interrupts.patch b/queue-6.1/r8169-avoid-unsolicited-interrupts.patch new file mode 100644 index 00000000000..e50fa03a20b --- /dev/null +++ b/queue-6.1/r8169-avoid-unsolicited-interrupts.patch @@ -0,0 +1,49 @@ +From 2f3b4296152a7e33fa9d504d485634352d5ea64e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Oct 2024 11:08:16 +0200 +Subject: r8169: avoid unsolicited interrupts + +From: Heiner Kallweit + +[ Upstream commit 10ce0db787004875f4dba068ea952207d1d8abeb ] + +It was reported that after resume from suspend a PCI error is logged +and connectivity is broken. Error message is: +PCI error (cmd = 0x0407, status_errs = 0x0000) +The message seems to be a red herring as none of the error bits is set, +and the PCI command register value also is normal. Exception handling +for a PCI error includes a chip reset what apparently brakes connectivity +here. The interrupt status bit triggering the PCI error handling isn't +actually used on PCIe chip versions, so it's not clear why this bit is +set by the chip. Fix this by ignoring this bit on PCIe chip versions. + +Fixes: 0e4851502f84 ("r8169: merge with version 8.001.00 of Realtek's r8168 driver") +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219388 +Tested-by: Atlas Yu +Signed-off-by: Heiner Kallweit +Reviewed-by: Simon Horman +Link: https://patch.msgid.link/78e2f535-438f-4212-ad94-a77637ac6c9c@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/realtek/r8169_main.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c +index 8b35e14fba3a8..a74e33bf0302e 100644 +--- a/drivers/net/ethernet/realtek/r8169_main.c ++++ b/drivers/net/ethernet/realtek/r8169_main.c +@@ -4617,7 +4617,9 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) + if ((status & 0xffff) == 0xffff || !(status & tp->irq_mask)) + return IRQ_NONE; + +- if (unlikely(status & SYSErr)) { ++ /* At least RTL8168fp may unexpectedly set the SYSErr bit */ ++ if (unlikely(status & SYSErr && ++ tp->mac_version <= RTL_GIGA_MAC_VER_06)) { + rtl8169_pcierr_interrupt(tp->dev); + goto out; + } +-- +2.43.0 + diff --git a/queue-6.1/series b/queue-6.1/series index 6c432452763..038c15ba915 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -83,3 +83,25 @@ loongarch-add-support-to-clone-a-time-namespace.patch loongarch-don-t-crash-in-stack_top-for-tasks-without.patch jfs-fix-sanity-check-in-dbmount.patch tracing-consider-the-null-character-when-validating-.patch +xfrm-extract-dst-lookup-parameters-into-a-struct.patch +xfrm-respect-ip-protocols-rules-criteria-when-perfor.patch +net-sun3_82586-fix-potential-memory-leak-in-sun3_825.patch +be2net-fix-potential-memory-leak-in-be_xmit.patch +net-plip-fix-break-causing-plip-to-never-transmit.patch +octeon_ep-implement-helper-for-iterating-packets-in-.patch +octeon_ep-add-skb-allocation-failures-handling-in-__.patch +net-dsa-mv88e6xxx-fix-error-when-setting-port-policy.patch +netfilter-xtables-fix-typo-causing-some-targets-not-.patch +net-wwan-fix-global-oob-in-wwan_rtnl_policy.patch +docs-net-reformat-driver.rst-from-a-list-to-sections.patch +net-provide-macros-for-commonly-copied-lockless-queu.patch +net-sched-adjust-device-watchdog-timer-to-detect-sto.patch +net-fix-races-in-netdev_tx_sent_queue-dev_watchdog.patch +net-usb-usbnet-fix-name-regression.patch +net-sched-act_api-deny-mismatched-skip_sw-skip_hw-fl.patch +net-sched-fix-use-after-free-in-taprio_change.patch +r8169-avoid-unsolicited-interrupts.patch +posix-clock-posix-clock-fix-unbalanced-locking-in-pc.patch +bluetooth-sco-fix-uaf-on-sco_sock_timeout.patch +bluetooth-iso-fix-uaf-on-iso_sock_timeout.patch +bpf-perf-fix-perf_event_detach_bpf_prog-error-handli.patch diff --git a/queue-6.1/xfrm-extract-dst-lookup-parameters-into-a-struct.patch b/queue-6.1/xfrm-extract-dst-lookup-parameters-into-a-struct.patch new file mode 100644 index 00000000000..a816db81cfd --- /dev/null +++ b/queue-6.1/xfrm-extract-dst-lookup-parameters-into-a-struct.patch @@ -0,0 +1,325 @@ +From 81c7b3a968324203d18fe538b4d936ac29352ab9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 Sep 2024 17:07:09 -0700 +Subject: xfrm: extract dst lookup parameters into a struct + +From: Eyal Birger + +[ Upstream commit e509996b16728e37d5a909a5c63c1bd64f23b306 ] + +Preparation for adding more fields to dst lookup functions without +changing their signatures. + +Signed-off-by: Eyal Birger +Signed-off-by: Steffen Klassert +Stable-dep-of: b84697210343 ("xfrm: respect ip protocols rules criteria when performing dst lookups") +Signed-off-by: Sasha Levin +--- + include/net/xfrm.h | 26 +++++++++++++------------- + net/ipv4/xfrm4_policy.c | 38 ++++++++++++++++---------------------- + net/ipv6/xfrm6_policy.c | 28 +++++++++++++--------------- + net/xfrm/xfrm_device.c | 11 ++++++++--- + net/xfrm/xfrm_policy.c | 35 +++++++++++++++++++++++------------ + 5 files changed, 73 insertions(+), 65 deletions(-) + +diff --git a/include/net/xfrm.h b/include/net/xfrm.h +index 5b9c2c535702c..55ea15ccd5327 100644 +--- a/include/net/xfrm.h ++++ b/include/net/xfrm.h +@@ -326,20 +326,23 @@ struct xfrm_if_cb { + void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb); + void xfrm_if_unregister_cb(void); + ++struct xfrm_dst_lookup_params { ++ struct net *net; ++ int tos; ++ int oif; ++ xfrm_address_t *saddr; ++ xfrm_address_t *daddr; ++ u32 mark; ++}; ++ + struct net_device; + struct xfrm_type; + struct xfrm_dst; + struct xfrm_policy_afinfo { + struct dst_ops *dst_ops; +- struct dst_entry *(*dst_lookup)(struct net *net, +- int tos, int oif, +- const xfrm_address_t *saddr, +- const xfrm_address_t *daddr, +- u32 mark); +- int (*get_saddr)(struct net *net, int oif, +- xfrm_address_t *saddr, +- xfrm_address_t *daddr, +- u32 mark); ++ struct dst_entry *(*dst_lookup)(const struct xfrm_dst_lookup_params *params); ++ int (*get_saddr)(xfrm_address_t *saddr, ++ const struct xfrm_dst_lookup_params *params); + int (*fill_dst)(struct xfrm_dst *xdst, + struct net_device *dev, + const struct flowi *fl); +@@ -1659,10 +1662,7 @@ static inline int xfrm_user_policy(struct sock *sk, int optname, + } + #endif + +-struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, +- const xfrm_address_t *saddr, +- const xfrm_address_t *daddr, +- int family, u32 mark); ++struct dst_entry *__xfrm_dst_lookup(int family, const struct xfrm_dst_lookup_params *params); + + struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp); + +diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c +index 3d0dfa6cf9f96..9ac9ed9738068 100644 +--- a/net/ipv4/xfrm4_policy.c ++++ b/net/ipv4/xfrm4_policy.c +@@ -17,47 +17,41 @@ + #include + #include + +-static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, +- int tos, int oif, +- const xfrm_address_t *saddr, +- const xfrm_address_t *daddr, +- u32 mark) ++static struct dst_entry *__xfrm4_dst_lookup(struct flowi4 *fl4, ++ const struct xfrm_dst_lookup_params *params) + { + struct rtable *rt; + + memset(fl4, 0, sizeof(*fl4)); +- fl4->daddr = daddr->a4; +- fl4->flowi4_tos = tos; +- fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif); +- fl4->flowi4_mark = mark; +- if (saddr) +- fl4->saddr = saddr->a4; +- +- rt = __ip_route_output_key(net, fl4); ++ fl4->daddr = params->daddr->a4; ++ fl4->flowi4_tos = params->tos; ++ fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(params->net, ++ params->oif); ++ fl4->flowi4_mark = params->mark; ++ if (params->saddr) ++ fl4->saddr = params->saddr->a4; ++ ++ rt = __ip_route_output_key(params->net, fl4); + if (!IS_ERR(rt)) + return &rt->dst; + + return ERR_CAST(rt); + } + +-static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif, +- const xfrm_address_t *saddr, +- const xfrm_address_t *daddr, +- u32 mark) ++static struct dst_entry *xfrm4_dst_lookup(const struct xfrm_dst_lookup_params *params) + { + struct flowi4 fl4; + +- return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr, mark); ++ return __xfrm4_dst_lookup(&fl4, params); + } + +-static int xfrm4_get_saddr(struct net *net, int oif, +- xfrm_address_t *saddr, xfrm_address_t *daddr, +- u32 mark) ++static int xfrm4_get_saddr(xfrm_address_t *saddr, ++ const struct xfrm_dst_lookup_params *params) + { + struct dst_entry *dst; + struct flowi4 fl4; + +- dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr, mark); ++ dst = __xfrm4_dst_lookup(&fl4, params); + if (IS_ERR(dst)) + return -EHOSTUNREACH; + +diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c +index b7b5dbf5d037b..6e3e0f1bd81c9 100644 +--- a/net/ipv6/xfrm6_policy.c ++++ b/net/ipv6/xfrm6_policy.c +@@ -23,23 +23,21 @@ + #include + #include + +-static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, +- const xfrm_address_t *saddr, +- const xfrm_address_t *daddr, +- u32 mark) ++static struct dst_entry *xfrm6_dst_lookup(const struct xfrm_dst_lookup_params *params) + { + struct flowi6 fl6; + struct dst_entry *dst; + int err; + + memset(&fl6, 0, sizeof(fl6)); +- fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(net, oif); +- fl6.flowi6_mark = mark; +- memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); +- if (saddr) +- memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr)); ++ fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(params->net, ++ params->oif); ++ fl6.flowi6_mark = params->mark; ++ memcpy(&fl6.daddr, params->daddr, sizeof(fl6.daddr)); ++ if (params->saddr) ++ memcpy(&fl6.saddr, params->saddr, sizeof(fl6.saddr)); + +- dst = ip6_route_output(net, NULL, &fl6); ++ dst = ip6_route_output(params->net, NULL, &fl6); + + err = dst->error; + if (dst->error) { +@@ -50,15 +48,14 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, + return dst; + } + +-static int xfrm6_get_saddr(struct net *net, int oif, +- xfrm_address_t *saddr, xfrm_address_t *daddr, +- u32 mark) ++static int xfrm6_get_saddr(xfrm_address_t *saddr, ++ const struct xfrm_dst_lookup_params *params) + { + struct dst_entry *dst; + struct net_device *dev; + struct inet6_dev *idev; + +- dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr, mark); ++ dst = xfrm6_dst_lookup(params); + if (IS_ERR(dst)) + return -EHOSTUNREACH; + +@@ -68,7 +65,8 @@ static int xfrm6_get_saddr(struct net *net, int oif, + return -EHOSTUNREACH; + } + dev = idev->dev; +- ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6); ++ ipv6_dev_get_saddr(dev_net(dev), dev, ¶ms->daddr->in6, 0, ++ &saddr->in6); + dst_release(dst); + return 0; + } +diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c +index 21269e8f2db4b..2535ee034a5c8 100644 +--- a/net/xfrm/xfrm_device.c ++++ b/net/xfrm/xfrm_device.c +@@ -248,6 +248,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, + + dev = dev_get_by_index(net, xuo->ifindex); + if (!dev) { ++ struct xfrm_dst_lookup_params params; ++ + if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) { + saddr = &x->props.saddr; + daddr = &x->id.daddr; +@@ -256,9 +258,12 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, + daddr = &x->props.saddr; + } + +- dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr, +- x->props.family, +- xfrm_smark_get(0, x)); ++ memset(¶ms, 0, sizeof(params)); ++ params.net = net; ++ params.saddr = saddr; ++ params.daddr = daddr; ++ params.mark = xfrm_smark_get(0, x); ++ dst = __xfrm_dst_lookup(x->props.family, ¶ms); + if (IS_ERR(dst)) + return 0; + +diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c +index 5fddde2d5bc48..adb12f428be30 100644 +--- a/net/xfrm/xfrm_policy.c ++++ b/net/xfrm/xfrm_policy.c +@@ -251,10 +251,8 @@ static const struct xfrm_if_cb *xfrm_if_get_cb(void) + return rcu_dereference(xfrm_if_cb); + } + +-struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, +- const xfrm_address_t *saddr, +- const xfrm_address_t *daddr, +- int family, u32 mark) ++struct dst_entry *__xfrm_dst_lookup(int family, ++ const struct xfrm_dst_lookup_params *params) + { + const struct xfrm_policy_afinfo *afinfo; + struct dst_entry *dst; +@@ -263,7 +261,7 @@ struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, + if (unlikely(afinfo == NULL)) + return ERR_PTR(-EAFNOSUPPORT); + +- dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr, mark); ++ dst = afinfo->dst_lookup(params); + + rcu_read_unlock(); + +@@ -277,6 +275,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, + xfrm_address_t *prev_daddr, + int family, u32 mark) + { ++ struct xfrm_dst_lookup_params params; + struct net *net = xs_net(x); + xfrm_address_t *saddr = &x->props.saddr; + xfrm_address_t *daddr = &x->id.daddr; +@@ -291,7 +290,14 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, + daddr = x->coaddr; + } + +- dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family, mark); ++ params.net = net; ++ params.saddr = saddr; ++ params.daddr = daddr; ++ params.tos = tos; ++ params.oif = oif; ++ params.mark = mark; ++ ++ dst = __xfrm_dst_lookup(family, ¶ms); + + if (!IS_ERR(dst)) { + if (prev_saddr != saddr) +@@ -2346,15 +2352,15 @@ int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) + } + + static int +-xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local, +- xfrm_address_t *remote, unsigned short family, u32 mark) ++xfrm_get_saddr(unsigned short family, xfrm_address_t *saddr, ++ const struct xfrm_dst_lookup_params *params) + { + int err; + const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + + if (unlikely(afinfo == NULL)) + return -EINVAL; +- err = afinfo->get_saddr(net, oif, local, remote, mark); ++ err = afinfo->get_saddr(saddr, params); + rcu_read_unlock(); + return err; + } +@@ -2383,9 +2389,14 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, + remote = &tmpl->id.daddr; + local = &tmpl->saddr; + if (xfrm_addr_any(local, tmpl->encap_family)) { +- error = xfrm_get_saddr(net, fl->flowi_oif, +- &tmp, remote, +- tmpl->encap_family, 0); ++ struct xfrm_dst_lookup_params params; ++ ++ memset(¶ms, 0, sizeof(params)); ++ params.net = net; ++ params.oif = fl->flowi_oif; ++ params.daddr = remote; ++ error = xfrm_get_saddr(tmpl->encap_family, &tmp, ++ ¶ms); + if (error) + goto fail; + local = &tmp; +-- +2.43.0 + diff --git a/queue-6.1/xfrm-respect-ip-protocols-rules-criteria-when-perfor.patch b/queue-6.1/xfrm-respect-ip-protocols-rules-criteria-when-perfor.patch new file mode 100644 index 00000000000..4f36e36b5fd --- /dev/null +++ b/queue-6.1/xfrm-respect-ip-protocols-rules-criteria-when-perfor.patch @@ -0,0 +1,99 @@ +From 7dcff75260b6327c2f4c7f2b4b92b89a0e956dde Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 Sep 2024 17:07:10 -0700 +Subject: xfrm: respect ip protocols rules criteria when performing dst lookups + +From: Eyal Birger + +[ Upstream commit b8469721034300bbb6dec5b4bf32492c95e16a0c ] + +The series in the "fixes" tag added the ability to consider L4 attributes +in routing rules. + +The dst lookup on the outer packet of encapsulated traffic in the xfrm +code was not adapted to this change, thus routing behavior that relies +on L4 information is not respected. + +Pass the ip protocol information when performing dst lookups. + +Fixes: a25724b05af0 ("Merge branch 'fib_rules-support-sport-dport-and-proto-match'") +Signed-off-by: Eyal Birger +Tested-by: Antony Antony +Signed-off-by: Steffen Klassert +Signed-off-by: Sasha Levin +--- + include/net/xfrm.h | 2 ++ + net/ipv4/xfrm4_policy.c | 2 ++ + net/ipv6/xfrm6_policy.c | 3 +++ + net/xfrm/xfrm_policy.c | 15 +++++++++++++++ + 4 files changed, 22 insertions(+) + +diff --git a/include/net/xfrm.h b/include/net/xfrm.h +index 55ea15ccd5327..bf670929622dc 100644 +--- a/include/net/xfrm.h ++++ b/include/net/xfrm.h +@@ -333,6 +333,8 @@ struct xfrm_dst_lookup_params { + xfrm_address_t *saddr; + xfrm_address_t *daddr; + u32 mark; ++ __u8 ipproto; ++ union flowi_uli uli; + }; + + struct net_device; +diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c +index 9ac9ed9738068..76f3f38b110f2 100644 +--- a/net/ipv4/xfrm4_policy.c ++++ b/net/ipv4/xfrm4_policy.c +@@ -30,6 +30,8 @@ static struct dst_entry *__xfrm4_dst_lookup(struct flowi4 *fl4, + fl4->flowi4_mark = params->mark; + if (params->saddr) + fl4->saddr = params->saddr->a4; ++ fl4->flowi4_proto = params->ipproto; ++ fl4->uli = params->uli; + + rt = __ip_route_output_key(params->net, fl4); + if (!IS_ERR(rt)) +diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c +index 6e3e0f1bd81c9..8c1092ff3fe2e 100644 +--- a/net/ipv6/xfrm6_policy.c ++++ b/net/ipv6/xfrm6_policy.c +@@ -37,6 +37,9 @@ static struct dst_entry *xfrm6_dst_lookup(const struct xfrm_dst_lookup_params *p + if (params->saddr) + memcpy(&fl6.saddr, params->saddr, sizeof(fl6.saddr)); + ++ fl6.flowi4_proto = params->ipproto; ++ fl6.uli = params->uli; ++ + dst = ip6_route_output(params->net, NULL, &fl6); + + err = dst->error; +diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c +index adb12f428be30..a022f49846879 100644 +--- a/net/xfrm/xfrm_policy.c ++++ b/net/xfrm/xfrm_policy.c +@@ -296,6 +296,21 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, + params.tos = tos; + params.oif = oif; + params.mark = mark; ++ params.ipproto = x->id.proto; ++ if (x->encap) { ++ switch (x->encap->encap_type) { ++ case UDP_ENCAP_ESPINUDP: ++ params.ipproto = IPPROTO_UDP; ++ params.uli.ports.sport = x->encap->encap_sport; ++ params.uli.ports.dport = x->encap->encap_dport; ++ break; ++ case TCP_ENCAP_ESPINTCP: ++ params.ipproto = IPPROTO_TCP; ++ params.uli.ports.sport = x->encap->encap_sport; ++ params.uli.ports.dport = x->encap->encap_dport; ++ break; ++ } ++ } + + dst = __xfrm_dst_lookup(family, ¶ms); + +-- +2.43.0 +