From 0c2b7ff980743142d71f876a21b17ffe766aa694 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 29 Jun 2024 07:50:00 -0400 Subject: [PATCH] Fixes for 5.15 Signed-off-by: Sasha Levin --- ...c-card-set-priv-pdev-before-using-it.patch | 54 ++++ ...for-duplicate-reqsk-on-identical-syn.patch | 195 +++++++++++ ...-outstanding-tx-skbs-during-scrq-res.patch | 60 ++++ ...ochip-fix-initial-port-flush-problem.patch | 49 +++ ...dd-microchip-ksz-9477-to-the-device-.patch | 36 +++ ...les-fully-validate-nft_data_value-on.patch | 92 ++++++ ...orrect-compat-recv-recvfrom-syscalls.patch | 48 +++ queue-5.15/series | 13 + ...rc-fix-compat-recv-recvfrom-syscalls.patch | 279 ++++++++++++++++ .../sparc-fix-old-compat_sys_select.patch | 39 +++ ...fastopen_synack-to-enter-tcp_ca_loss.patch | 168 ++++++++++ ...-bpf-timeout-setting-for-syn-ack-rto.patch | 160 +++++++++ ...d-null-pointer-dereference-in-perf_t.patch | 306 ++++++++++++++++++ ...remove-warn-from-__xdp_reg_mem_model.patch | 76 +++++ 14 files changed, 1575 insertions(+) create mode 100644 queue-5.15/asoc-fsl-asoc-card-set-priv-pdev-before-using-it.patch create mode 100644 queue-5.15/fix-race-for-duplicate-reqsk-on-identical-syn.patch create mode 100644 queue-5.15/ibmvnic-free-any-outstanding-tx-skbs-during-scrq-res.patch create mode 100644 queue-5.15/net-dsa-microchip-fix-initial-port-flush-problem.patch create mode 100644 queue-5.15/net-phy-micrel-add-microchip-ksz-9477-to-the-device-.patch create mode 100644 queue-5.15/netfilter-nf_tables-fully-validate-nft_data_value-on.patch create mode 100644 queue-5.15/parisc-use-correct-compat-recv-recvfrom-syscalls.patch create mode 100644 queue-5.15/sparc-fix-compat-recv-recvfrom-syscalls.patch create mode 100644 queue-5.15/sparc-fix-old-compat_sys_select.patch create mode 100644 queue-5.15/tcp-fix-tcp_rcv_fastopen_synack-to-enter-tcp_ca_loss.patch create mode 100644 queue-5.15/tcp-use-bpf-timeout-setting-for-syn-ack-rto.patch create mode 100644 queue-5.15/tracing-net_sched-null-pointer-dereference-in-perf_t.patch create mode 100644 queue-5.15/xdp-remove-warn-from-__xdp_reg_mem_model.patch diff --git a/queue-5.15/asoc-fsl-asoc-card-set-priv-pdev-before-using-it.patch b/queue-5.15/asoc-fsl-asoc-card-set-priv-pdev-before-using-it.patch new file mode 100644 index 00000000000..e01ddfaae91 --- /dev/null +++ b/queue-5.15/asoc-fsl-asoc-card-set-priv-pdev-before-using-it.patch @@ -0,0 +1,54 @@ +From aeec77fea82b8c85544f48b36d4ffe869d1c58a7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Jun 2024 15:25:03 +0200 +Subject: ASoC: fsl-asoc-card: set priv->pdev before using it + +From: Elinor Montmasson + +[ Upstream commit 90f3feb24172185f1832636264943e8b5e289245 ] + +priv->pdev pointer was set after being used in +fsl_asoc_card_audmux_init(). +Move this assignment at the start of the probe function, so +sub-functions can correctly use pdev through priv. + +fsl_asoc_card_audmux_init() dereferences priv->pdev to get access to the +dev struct, used with dev_err macros. +As priv is zero-initialised, there would be a NULL pointer dereference. +Note that if priv->dev is dereferenced before assignment but never used, +for example if there is no error to be printed, the driver won't crash +probably due to compiler optimisations. + +Fixes: 708b4351f08c ("ASoC: fsl: Add Freescale Generic ASoC Sound Card with ASRC support") +Signed-off-by: Elinor Montmasson +Link: https://patch.msgid.link/20240620132511.4291-2-elinor.montmasson@savoirfairelinux.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/fsl/fsl-asoc-card.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c +index 5000d779aade2..98a157e46637a 100644 +--- a/sound/soc/fsl/fsl-asoc-card.c ++++ b/sound/soc/fsl/fsl-asoc-card.c +@@ -548,6 +548,8 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) + if (!priv) + return -ENOMEM; + ++ priv->pdev = pdev; ++ + cpu_np = of_parse_phandle(np, "audio-cpu", 0); + /* Give a chance to old DT binding */ + if (!cpu_np) +@@ -742,7 +744,6 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) + } + + /* Initialize sound card */ +- priv->pdev = pdev; + priv->card.dev = &pdev->dev; + priv->card.owner = THIS_MODULE; + ret = snd_soc_of_parse_card_name(&priv->card, "model"); +-- +2.43.0 + diff --git a/queue-5.15/fix-race-for-duplicate-reqsk-on-identical-syn.patch b/queue-5.15/fix-race-for-duplicate-reqsk-on-identical-syn.patch new file mode 100644 index 00000000000..d1084281289 --- /dev/null +++ b/queue-5.15/fix-race-for-duplicate-reqsk-on-identical-syn.patch @@ -0,0 +1,195 @@ +From a3098f03db196fb7fafaa6a33c34553c82e206f6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 21 Jun 2024 09:39:29 +0800 +Subject: Fix race for duplicate reqsk on identical SYN + +From: luoxuanqiang + +[ Upstream commit ff46e3b4421923937b7f6e44ffcd3549a074f321 ] + +When bonding is configured in BOND_MODE_BROADCAST mode, if two identical +SYN packets are received at the same time and processed on different CPUs, +it can potentially create the same sk (sock) but two different reqsk +(request_sock) in tcp_conn_request(). + +These two different reqsk will respond with two SYNACK packets, and since +the generation of the seq (ISN) incorporates a timestamp, the final two +SYNACK packets will have different seq values. + +The consequence is that when the Client receives and replies with an ACK +to the earlier SYNACK packet, we will reset(RST) it. + +======================================================================== + +This behavior is consistently reproducible in my local setup, +which comprises: + + | NETA1 ------ NETB1 | +PC_A --- bond --- | | --- bond --- PC_B + | NETA2 ------ NETB2 | + +- PC_A is the Server and has two network cards, NETA1 and NETA2. I have + bonded these two cards using BOND_MODE_BROADCAST mode and configured + them to be handled by different CPU. + +- PC_B is the Client, also equipped with two network cards, NETB1 and + NETB2, which are also bonded and configured in BOND_MODE_BROADCAST mode. + +If the client attempts a TCP connection to the server, it might encounter +a failure. Capturing packets from the server side reveals: + +10.10.10.10.45182 > localhost: Flags [S], seq 320236027, +10.10.10.10.45182 > localhost: Flags [S], seq 320236027, +localhost > 10.10.10.10.45182: Flags [S.], seq 2967855116, +localhost > 10.10.10.10.45182: Flags [S.], seq 2967855123, <== +10.10.10.10.45182 > localhost: Flags [.], ack 4294967290, +10.10.10.10.45182 > localhost: Flags [.], ack 4294967290, +localhost > 10.10.10.10.45182: Flags [R], seq 2967855117, <== +localhost > 10.10.10.10.45182: Flags [R], seq 2967855117, + +Two SYNACKs with different seq numbers are sent by localhost, +resulting in an anomaly. + +======================================================================== + +The attempted solution is as follows: +Add a return value to inet_csk_reqsk_queue_hash_add() to confirm if the +ehash insertion is successful (Up to now, the reason for unsuccessful +insertion is that a reqsk for the same connection has already been +inserted). If the insertion fails, release the reqsk. + +Due to the refcnt, Kuniyuki suggests also adding a return value check +for the DCCP module; if ehash insertion fails, indicating a successful +insertion of the same connection, simply release the reqsk as well. + +Simultaneously, In the reqsk_queue_hash_req(), the start of the +req->rsk_timer is adjusted to be after successful insertion. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: luoxuanqiang +Reviewed-by: Kuniyuki Iwashima +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/20240621013929.1386815-1-luoxuanqiang@kylinos.cn +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + include/net/inet_connection_sock.h | 2 +- + net/dccp/ipv4.c | 7 +++++-- + net/dccp/ipv6.c | 7 +++++-- + net/ipv4/inet_connection_sock.c | 17 +++++++++++++---- + net/ipv4/tcp_input.c | 7 ++++++- + 5 files changed, 30 insertions(+), 10 deletions(-) + +diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h +index 7794cf2b5ef50..53ec06703fe4e 100644 +--- a/include/net/inet_connection_sock.h ++++ b/include/net/inet_connection_sock.h +@@ -260,7 +260,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, + struct sock *inet_csk_reqsk_queue_add(struct sock *sk, + struct request_sock *req, + struct sock *child); +-void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, ++bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, + unsigned long timeout); + struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, + struct request_sock *req, +diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c +index cab82344de9be..aaef9557d9425 100644 +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -649,8 +649,11 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) + if (dccp_v4_send_response(sk, req)) + goto drop_and_free; + +- inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); +- reqsk_put(req); ++ if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT))) ++ reqsk_free(req); ++ else ++ reqsk_put(req); ++ + return 0; + + drop_and_free: +diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c +index 6f05e9d0d4287..f7c88b860d5d5 100644 +--- a/net/dccp/ipv6.c ++++ b/net/dccp/ipv6.c +@@ -397,8 +397,11 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) + if (dccp_v6_send_response(sk, req)) + goto drop_and_free; + +- inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); +- reqsk_put(req); ++ if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT))) ++ reqsk_free(req); ++ else ++ reqsk_put(req); ++ + return 0; + + drop_and_free: +diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c +index 43e370f45b81d..75c2f7ffe5be4 100644 +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -927,25 +927,34 @@ static void reqsk_timer_handler(struct timer_list *t) + inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq); + } + +-static void reqsk_queue_hash_req(struct request_sock *req, ++static bool reqsk_queue_hash_req(struct request_sock *req, + unsigned long timeout) + { ++ bool found_dup_sk = false; ++ ++ if (!inet_ehash_insert(req_to_sk(req), NULL, &found_dup_sk)) ++ return false; ++ ++ /* The timer needs to be setup after a successful insertion. */ + timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED); + mod_timer(&req->rsk_timer, jiffies + timeout); + +- inet_ehash_insert(req_to_sk(req), NULL, NULL); + /* before letting lookups find us, make sure all req fields + * are committed to memory and refcnt initialized. + */ + smp_wmb(); + refcount_set(&req->rsk_refcnt, 2 + 1); ++ return true; + } + +-void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, ++bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, + unsigned long timeout) + { +- reqsk_queue_hash_req(req, timeout); ++ if (!reqsk_queue_hash_req(req, timeout)) ++ return false; ++ + inet_csk_reqsk_queue_added(sk); ++ return true; + } + EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index ca72dbaa27b46..d6330c26e0e10 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -6994,7 +6994,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, + tcp_rsk(req)->tfo_listener = false; + if (!want_cookie) { + req->timeout = tcp_timeout_init((struct sock *)req); +- inet_csk_reqsk_queue_hash_add(sk, req, req->timeout); ++ if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, ++ req->timeout))) { ++ reqsk_free(req); ++ return 0; ++ } ++ + } + af_ops->send_synack(sk, dst, &fl, req, &foc, + !want_cookie ? TCP_SYNACK_NORMAL : +-- +2.43.0 + diff --git a/queue-5.15/ibmvnic-free-any-outstanding-tx-skbs-during-scrq-res.patch b/queue-5.15/ibmvnic-free-any-outstanding-tx-skbs-during-scrq-res.patch new file mode 100644 index 00000000000..e651502b6bb --- /dev/null +++ b/queue-5.15/ibmvnic-free-any-outstanding-tx-skbs-during-scrq-res.patch @@ -0,0 +1,60 @@ +From 896614fe1dcd9aa75c7ce53c3c75ecf82f96a47c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Jun 2024 10:23:12 -0500 +Subject: ibmvnic: Free any outstanding tx skbs during scrq reset + +From: Nick Child + +[ Upstream commit 49bbeb5719c2f56907d3a9623b47c6c15c2c431d ] + +There are 2 types of outstanding tx skb's: +Type 1: Packets that are sitting in the drivers ind_buff that are +waiting to be batch sent to the NIC. During a device reset, these are +freed with a call to ibmvnic_tx_scrq_clean_buffer() +Type 2: Packets that have been sent to the NIC and are awaiting a TX +completion IRQ. These are free'd during a reset with a call to +clean_tx_pools() + +During any reset which requires us to free the tx irq, ensure that the +Type 2 skb references are freed. Since the irq is released, it is +impossible for the NIC to inform of any completions. + +Furthermore, later in the reset process is a call to init_tx_pools() +which marks every entry in the tx pool as free (ie not outstanding). +So if the driver is to make a call to init_tx_pools(), it must first +be sure that the tx pool is empty of skb references. + +This issue was discovered by observing the following in the logs during +EEH testing: + TX free map points to untracked skb (tso_pool 0 idx=4) + TX free map points to untracked skb (tso_pool 0 idx=5) + TX free map points to untracked skb (tso_pool 1 idx=36) + +Fixes: 65d6470d139a ("ibmvnic: clean pending indirect buffs during reset") +Signed-off-by: Nick Child +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/ibm/ibmvnic.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c +index 890e27b986e2a..7f4539a2e5517 100644 +--- a/drivers/net/ethernet/ibm/ibmvnic.c ++++ b/drivers/net/ethernet/ibm/ibmvnic.c +@@ -3409,6 +3409,12 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free) + adapter->num_active_tx_scrqs = 0; + } + ++ /* Clean any remaining outstanding SKBs ++ * we freed the irq so we won't be hearing ++ * from them ++ */ ++ clean_tx_pools(adapter); ++ + if (adapter->rx_scrq) { + for (i = 0; i < adapter->num_active_rx_scrqs; i++) { + if (!adapter->rx_scrq[i]) +-- +2.43.0 + diff --git a/queue-5.15/net-dsa-microchip-fix-initial-port-flush-problem.patch b/queue-5.15/net-dsa-microchip-fix-initial-port-flush-problem.patch new file mode 100644 index 00000000000..b95442f2268 --- /dev/null +++ b/queue-5.15/net-dsa-microchip-fix-initial-port-flush-problem.patch @@ -0,0 +1,49 @@ +From 58f1fb0ba7535de997c460083487afc05f8765e6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Jun 2024 17:16:42 -0700 +Subject: net: dsa: microchip: fix initial port flush problem + +From: Tristram Ha + +[ Upstream commit ad53f5f54f351e967128edbc431f0f26427172cf ] + +The very first flush in any port will flush all learned addresses in all +ports. This can be observed by unplugging the cable from one port while +additional ports are connected and dumping the fdb entries. + +This problem is caused by the initially wrong value programmed to the +REG_SW_LUE_CTRL_1 register. Setting SW_FLUSH_STP_TABLE and +SW_FLUSH_MSTP_TABLE bits does not have an immediate effect. It is when +ksz9477_flush_dyn_mac_table() is called then the SW_FLUSH_STP_TABLE bit +takes effect and flushes all learned entries. After that call both bits +are reset and so the next port flush will not cause such problem again. + +Fixes: b987e98e50ab ("dsa: add DSA switch driver for Microchip KSZ9477") +Signed-off-by: Tristram Ha +Link: https://patch.msgid.link/1718756202-2731-1-git-send-email-Tristram.Ha@microchip.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/microchip/ksz9477.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c +index bf788e17f408f..293c327db2eef 100644 +--- a/drivers/net/dsa/microchip/ksz9477.c ++++ b/drivers/net/dsa/microchip/ksz9477.c +@@ -206,10 +206,8 @@ static int ksz9477_reset_switch(struct ksz_device *dev) + SPI_AUTO_EDGE_DETECTION, 0); + + /* default configuration */ +- ksz_read8(dev, REG_SW_LUE_CTRL_1, &data8); +- data8 = SW_AGING_ENABLE | SW_LINK_AUTO_AGING | +- SW_SRC_ADDR_FILTER | SW_FLUSH_STP_TABLE | SW_FLUSH_MSTP_TABLE; +- ksz_write8(dev, REG_SW_LUE_CTRL_1, data8); ++ ksz_write8(dev, REG_SW_LUE_CTRL_1, ++ SW_AGING_ENABLE | SW_LINK_AUTO_AGING | SW_SRC_ADDR_FILTER); + + /* disable interrupts */ + ksz_write32(dev, REG_SW_INT_MASK__4, SWITCH_INT_MASK); +-- +2.43.0 + diff --git a/queue-5.15/net-phy-micrel-add-microchip-ksz-9477-to-the-device-.patch b/queue-5.15/net-phy-micrel-add-microchip-ksz-9477-to-the-device-.patch new file mode 100644 index 00000000000..0d70e6fa849 --- /dev/null +++ b/queue-5.15/net-phy-micrel-add-microchip-ksz-9477-to-the-device-.patch @@ -0,0 +1,36 @@ +From fe70174ee1c821e38ad029dca8d85db0d6796ca3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 21 Jun 2024 16:43:20 +0200 +Subject: net: phy: micrel: add Microchip KSZ 9477 to the device table + +From: Enguerrand de Ribaucourt + +[ Upstream commit 54a4e5c16382e871c01dd82b47e930fdce30406b ] + +PHY_ID_KSZ9477 was supported but not added to the device table passed to +MODULE_DEVICE_TABLE. + +Fixes: fc3973a1fa09 ("phy: micrel: add Microchip KSZ 9477 Switch PHY support") +Signed-off-by: Enguerrand de Ribaucourt +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/phy/micrel.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c +index 59d05a1672ece..f1a6cc7ccf1a3 100644 +--- a/drivers/net/phy/micrel.c ++++ b/drivers/net/phy/micrel.c +@@ -1798,6 +1798,7 @@ static struct mdio_device_id __maybe_unused micrel_tbl[] = { + { PHY_ID_KSZ8081, MICREL_PHY_ID_MASK }, + { PHY_ID_KSZ8873MLL, MICREL_PHY_ID_MASK }, + { PHY_ID_KSZ886X, MICREL_PHY_ID_MASK }, ++ { PHY_ID_KSZ9477, MICREL_PHY_ID_MASK }, + { PHY_ID_LAN8814, MICREL_PHY_ID_MASK }, + { } + }; +-- +2.43.0 + diff --git a/queue-5.15/netfilter-nf_tables-fully-validate-nft_data_value-on.patch b/queue-5.15/netfilter-nf_tables-fully-validate-nft_data_value-on.patch new file mode 100644 index 00000000000..f21e8861fa8 --- /dev/null +++ b/queue-5.15/netfilter-nf_tables-fully-validate-nft_data_value-on.patch @@ -0,0 +1,92 @@ +From 0ab322408fcb3d57e026598086da97d82fd7471c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 26 Jun 2024 23:15:38 +0200 +Subject: netfilter: nf_tables: fully validate NFT_DATA_VALUE on store to data + registers + +From: Pablo Neira Ayuso + +[ Upstream commit 7931d32955e09d0a11b1fe0b6aac1bfa061c005c ] + +register store validation for NFT_DATA_VALUE is conditional, however, +the datatype is always either NFT_DATA_VALUE or NFT_DATA_VERDICT. This +only requires a new helper function to infer the register type from the +set datatype so this conditional check can be removed. Otherwise, +pointer to chain object can be leaked through the registers. + +Fixes: 96518518cc41 ("netfilter: add nftables") +Reported-by: Linus Torvalds +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + include/net/netfilter/nf_tables.h | 5 +++++ + net/netfilter/nf_tables_api.c | 8 ++++---- + net/netfilter/nft_lookup.c | 3 ++- + 3 files changed, 11 insertions(+), 5 deletions(-) + +diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h +index 1e6e4af4df0ae..3ff6b3362800b 100644 +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -567,6 +567,11 @@ static inline void *nft_set_priv(const struct nft_set *set) + return (void *)set->data; + } + ++static inline enum nft_data_types nft_set_datatype(const struct nft_set *set) ++{ ++ return set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE; ++} ++ + static inline bool nft_set_gc_is_pending(const struct nft_set *s) + { + return refcount_read(&s->refs) != 1; +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 3999b89793fce..506dc5c4cdccb 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -5328,8 +5328,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, + + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && + nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext), +- set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE, +- set->dlen) < 0) ++ nft_set_datatype(set), set->dlen) < 0) + goto nla_put_failure; + + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS) && +@@ -10249,6 +10248,9 @@ static int nft_validate_register_store(const struct nft_ctx *ctx, + + return 0; + default: ++ if (type != NFT_DATA_VALUE) ++ return -EINVAL; ++ + if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE) + return -EINVAL; + if (len == 0) +@@ -10257,8 +10259,6 @@ static int nft_validate_register_store(const struct nft_ctx *ctx, + sizeof_field(struct nft_regs, data)) + return -ERANGE; + +- if (data != NULL && type != NFT_DATA_VALUE) +- return -EINVAL; + return 0; + } + } +diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c +index 9d18c5428d53c..b9df27c2718be 100644 +--- a/net/netfilter/nft_lookup.c ++++ b/net/netfilter/nft_lookup.c +@@ -136,7 +136,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx, + return -EINVAL; + + err = nft_parse_register_store(ctx, tb[NFTA_LOOKUP_DREG], +- &priv->dreg, NULL, set->dtype, ++ &priv->dreg, NULL, ++ nft_set_datatype(set), + set->dlen); + if (err < 0) + return err; +-- +2.43.0 + diff --git a/queue-5.15/parisc-use-correct-compat-recv-recvfrom-syscalls.patch b/queue-5.15/parisc-use-correct-compat-recv-recvfrom-syscalls.patch new file mode 100644 index 00000000000..13a8944e043 --- /dev/null +++ b/queue-5.15/parisc-use-correct-compat-recv-recvfrom-syscalls.patch @@ -0,0 +1,48 @@ +From 55593a199a235b76ca99c4ddf793b9e8ca7e2e60 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jun 2024 14:27:55 +0200 +Subject: parisc: use correct compat recv/recvfrom syscalls + +From: Arnd Bergmann + +[ Upstream commit 20a50787349fadf66ac5c48f62e58d753878d2bb ] + +Johannes missed parisc back when he introduced the compat version +of these syscalls, so receiving cmsg messages that require a compat +conversion is still broken. + +Use the correct calls like the other architectures do. + +Fixes: 1dacc76d0014 ("net/compat/wext: send different messages to compat tasks") +Acked-by: Helge Deller +Signed-off-by: Arnd Bergmann +Signed-off-by: Sasha Levin +--- + arch/parisc/kernel/syscalls/syscall.tbl | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl +index 50c759f11c25d..3e7ded09e00a5 100644 +--- a/arch/parisc/kernel/syscalls/syscall.tbl ++++ b/arch/parisc/kernel/syscalls/syscall.tbl +@@ -108,7 +108,7 @@ + 95 common fchown sys_fchown + 96 common getpriority sys_getpriority + 97 common setpriority sys_setpriority +-98 common recv sys_recv ++98 common recv sys_recv compat_sys_recv + 99 common statfs sys_statfs compat_sys_statfs + 100 common fstatfs sys_fstatfs compat_sys_fstatfs + 101 common stat64 sys_stat64 +@@ -135,7 +135,7 @@ + 120 common clone sys_clone_wrapper + 121 common setdomainname sys_setdomainname + 122 common sendfile sys_sendfile compat_sys_sendfile +-123 common recvfrom sys_recvfrom ++123 common recvfrom sys_recvfrom compat_sys_recvfrom + 124 32 adjtimex sys_adjtimex_time32 + 124 64 adjtimex sys_adjtimex + 125 common mprotect sys_mprotect +-- +2.43.0 + diff --git a/queue-5.15/series b/queue-5.15/series index 31fc100565e..13119c3193d 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -300,3 +300,16 @@ cifs-fix-typo-in-module-parameter-enable_gcm_256.patch drm-amdgpu-fix-ubsan-warning-in-kv_dpm.c.patch net-mdio-add-helpers-to-extract-clause-45-regad-and-.patch net-stmmac-assign-configured-channel-value-to-extts-.patch +asoc-fsl-asoc-card-set-priv-pdev-before-using-it.patch +net-dsa-microchip-fix-initial-port-flush-problem.patch +ibmvnic-free-any-outstanding-tx-skbs-during-scrq-res.patch +net-phy-micrel-add-microchip-ksz-9477-to-the-device-.patch +xdp-remove-warn-from-__xdp_reg_mem_model.patch +tcp-use-bpf-timeout-setting-for-syn-ack-rto.patch +fix-race-for-duplicate-reqsk-on-identical-syn.patch +sparc-fix-old-compat_sys_select.patch +sparc-fix-compat-recv-recvfrom-syscalls.patch +parisc-use-correct-compat-recv-recvfrom-syscalls.patch +tcp-fix-tcp_rcv_fastopen_synack-to-enter-tcp_ca_loss.patch +netfilter-nf_tables-fully-validate-nft_data_value-on.patch +tracing-net_sched-null-pointer-dereference-in-perf_t.patch diff --git a/queue-5.15/sparc-fix-compat-recv-recvfrom-syscalls.patch b/queue-5.15/sparc-fix-compat-recv-recvfrom-syscalls.patch new file mode 100644 index 00000000000..af9bef8ae7b --- /dev/null +++ b/queue-5.15/sparc-fix-compat-recv-recvfrom-syscalls.patch @@ -0,0 +1,279 @@ +From fcf3afa7529d73f665c0b5253b517369936a2a08 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jun 2024 12:49:39 +0200 +Subject: sparc: fix compat recv/recvfrom syscalls + +From: Arnd Bergmann + +[ Upstream commit d6fbd26fb872ec518d25433a12e8ce8163e20909 ] + +sparc has the wrong compat version of recv() and recvfrom() for both the +direct syscalls and socketcall(). + +The direct syscalls just need to use the compat version. For socketcall, +the same thing could be done, but it seems better to completely remove +the custom assembler code for it and just use the same implementation that +everyone else has. + +Fixes: 1dacc76d0014 ("net/compat/wext: send different messages to compat tasks") +Signed-off-by: Arnd Bergmann +Signed-off-by: Sasha Levin +--- + arch/sparc/kernel/sys32.S | 221 ------------------------- + arch/sparc/kernel/syscalls/syscall.tbl | 4 +- + 2 files changed, 2 insertions(+), 223 deletions(-) + +diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S +index a45f0f31fe51a..a3d308f2043e5 100644 +--- a/arch/sparc/kernel/sys32.S ++++ b/arch/sparc/kernel/sys32.S +@@ -18,224 +18,3 @@ sys32_mmap2: + sethi %hi(sys_mmap), %g1 + jmpl %g1 + %lo(sys_mmap), %g0 + sllx %o5, 12, %o5 +- +- .align 32 +- .globl sys32_socketcall +-sys32_socketcall: /* %o0=call, %o1=args */ +- cmp %o0, 1 +- bl,pn %xcc, do_einval +- cmp %o0, 18 +- bg,pn %xcc, do_einval +- sub %o0, 1, %o0 +- sllx %o0, 5, %o0 +- sethi %hi(__socketcall_table_begin), %g2 +- or %g2, %lo(__socketcall_table_begin), %g2 +- jmpl %g2 + %o0, %g0 +- nop +-do_einval: +- retl +- mov -EINVAL, %o0 +- +- .align 32 +-__socketcall_table_begin: +- +- /* Each entry is exactly 32 bytes. */ +-do_sys_socket: /* sys_socket(int, int, int) */ +-1: ldswa [%o1 + 0x0] %asi, %o0 +- sethi %hi(sys_socket), %g1 +-2: ldswa [%o1 + 0x8] %asi, %o2 +- jmpl %g1 + %lo(sys_socket), %g0 +-3: ldswa [%o1 + 0x4] %asi, %o1 +- nop +- nop +- nop +-do_sys_bind: /* sys_bind(int fd, struct sockaddr *, int) */ +-4: ldswa [%o1 + 0x0] %asi, %o0 +- sethi %hi(sys_bind), %g1 +-5: ldswa [%o1 + 0x8] %asi, %o2 +- jmpl %g1 + %lo(sys_bind), %g0 +-6: lduwa [%o1 + 0x4] %asi, %o1 +- nop +- nop +- nop +-do_sys_connect: /* sys_connect(int, struct sockaddr *, int) */ +-7: ldswa [%o1 + 0x0] %asi, %o0 +- sethi %hi(sys_connect), %g1 +-8: ldswa [%o1 + 0x8] %asi, %o2 +- jmpl %g1 + %lo(sys_connect), %g0 +-9: lduwa [%o1 + 0x4] %asi, %o1 +- nop +- nop +- nop +-do_sys_listen: /* sys_listen(int, int) */ +-10: ldswa [%o1 + 0x0] %asi, %o0 +- sethi %hi(sys_listen), %g1 +- jmpl %g1 + %lo(sys_listen), %g0 +-11: ldswa [%o1 + 0x4] %asi, %o1 +- nop +- nop +- nop +- nop +-do_sys_accept: /* sys_accept(int, struct sockaddr *, int *) */ +-12: ldswa [%o1 + 0x0] %asi, %o0 +- sethi %hi(sys_accept), %g1 +-13: lduwa [%o1 + 0x8] %asi, %o2 +- jmpl %g1 + %lo(sys_accept), %g0 +-14: lduwa [%o1 + 0x4] %asi, %o1 +- nop +- nop +- nop +-do_sys_getsockname: /* sys_getsockname(int, struct sockaddr *, int *) */ +-15: ldswa [%o1 + 0x0] %asi, %o0 +- sethi %hi(sys_getsockname), %g1 +-16: lduwa [%o1 + 0x8] %asi, %o2 +- jmpl %g1 + %lo(sys_getsockname), %g0 +-17: lduwa [%o1 + 0x4] %asi, %o1 +- nop +- nop +- nop +-do_sys_getpeername: /* sys_getpeername(int, struct sockaddr *, int *) */ +-18: ldswa [%o1 + 0x0] %asi, %o0 +- sethi %hi(sys_getpeername), %g1 +-19: lduwa [%o1 + 0x8] %asi, %o2 +- jmpl %g1 + %lo(sys_getpeername), %g0 +-20: lduwa [%o1 + 0x4] %asi, %o1 +- nop +- nop +- nop +-do_sys_socketpair: /* sys_socketpair(int, int, int, int *) */ +-21: ldswa [%o1 + 0x0] %asi, %o0 +- sethi %hi(sys_socketpair), %g1 +-22: ldswa [%o1 + 0x8] %asi, %o2 +-23: lduwa [%o1 + 0xc] %asi, %o3 +- jmpl %g1 + %lo(sys_socketpair), %g0 +-24: ldswa [%o1 + 0x4] %asi, %o1 +- nop +- nop +-do_sys_send: /* sys_send(int, void *, size_t, unsigned int) */ +-25: ldswa [%o1 + 0x0] %asi, %o0 +- sethi %hi(sys_send), %g1 +-26: lduwa [%o1 + 0x8] %asi, %o2 +-27: lduwa [%o1 + 0xc] %asi, %o3 +- jmpl %g1 + %lo(sys_send), %g0 +-28: lduwa [%o1 + 0x4] %asi, %o1 +- nop +- nop +-do_sys_recv: /* sys_recv(int, void *, size_t, unsigned int) */ +-29: ldswa [%o1 + 0x0] %asi, %o0 +- sethi %hi(sys_recv), %g1 +-30: lduwa [%o1 + 0x8] %asi, %o2 +-31: lduwa [%o1 + 0xc] %asi, %o3 +- jmpl %g1 + %lo(sys_recv), %g0 +-32: lduwa [%o1 + 0x4] %asi, %o1 +- nop +- nop +-do_sys_sendto: /* sys_sendto(int, u32, compat_size_t, unsigned int, u32, int) */ +-33: ldswa [%o1 + 0x0] %asi, %o0 +- sethi %hi(sys_sendto), %g1 +-34: lduwa [%o1 + 0x8] %asi, %o2 +-35: lduwa [%o1 + 0xc] %asi, %o3 +-36: lduwa [%o1 + 0x10] %asi, %o4 +-37: ldswa [%o1 + 0x14] %asi, %o5 +- jmpl %g1 + %lo(sys_sendto), %g0 +-38: lduwa [%o1 + 0x4] %asi, %o1 +-do_sys_recvfrom: /* sys_recvfrom(int, u32, compat_size_t, unsigned int, u32, u32) */ +-39: ldswa [%o1 + 0x0] %asi, %o0 +- sethi %hi(sys_recvfrom), %g1 +-40: lduwa [%o1 + 0x8] %asi, %o2 +-41: lduwa [%o1 + 0xc] %asi, %o3 +-42: lduwa [%o1 + 0x10] %asi, %o4 +-43: lduwa [%o1 + 0x14] %asi, %o5 +- jmpl %g1 + %lo(sys_recvfrom), %g0 +-44: lduwa [%o1 + 0x4] %asi, %o1 +-do_sys_shutdown: /* sys_shutdown(int, int) */ +-45: ldswa [%o1 + 0x0] %asi, %o0 +- sethi %hi(sys_shutdown), %g1 +- jmpl %g1 + %lo(sys_shutdown), %g0 +-46: ldswa [%o1 + 0x4] %asi, %o1 +- nop +- nop +- nop +- nop +-do_sys_setsockopt: /* sys_setsockopt(int, int, int, char *, int) */ +-47: ldswa [%o1 + 0x0] %asi, %o0 +- sethi %hi(sys_setsockopt), %g1 +-48: ldswa [%o1 + 0x8] %asi, %o2 +-49: lduwa [%o1 + 0xc] %asi, %o3 +-50: ldswa [%o1 + 0x10] %asi, %o4 +- jmpl %g1 + %lo(sys_setsockopt), %g0 +-51: ldswa [%o1 + 0x4] %asi, %o1 +- nop +-do_sys_getsockopt: /* sys_getsockopt(int, int, int, u32, u32) */ +-52: ldswa [%o1 + 0x0] %asi, %o0 +- sethi %hi(sys_getsockopt), %g1 +-53: ldswa [%o1 + 0x8] %asi, %o2 +-54: lduwa [%o1 + 0xc] %asi, %o3 +-55: lduwa [%o1 + 0x10] %asi, %o4 +- jmpl %g1 + %lo(sys_getsockopt), %g0 +-56: ldswa [%o1 + 0x4] %asi, %o1 +- nop +-do_sys_sendmsg: /* compat_sys_sendmsg(int, struct compat_msghdr *, unsigned int) */ +-57: ldswa [%o1 + 0x0] %asi, %o0 +- sethi %hi(compat_sys_sendmsg), %g1 +-58: lduwa [%o1 + 0x8] %asi, %o2 +- jmpl %g1 + %lo(compat_sys_sendmsg), %g0 +-59: lduwa [%o1 + 0x4] %asi, %o1 +- nop +- nop +- nop +-do_sys_recvmsg: /* compat_sys_recvmsg(int, struct compat_msghdr *, unsigned int) */ +-60: ldswa [%o1 + 0x0] %asi, %o0 +- sethi %hi(compat_sys_recvmsg), %g1 +-61: lduwa [%o1 + 0x8] %asi, %o2 +- jmpl %g1 + %lo(compat_sys_recvmsg), %g0 +-62: lduwa [%o1 + 0x4] %asi, %o1 +- nop +- nop +- nop +-do_sys_accept4: /* sys_accept4(int, struct sockaddr *, int *, int) */ +-63: ldswa [%o1 + 0x0] %asi, %o0 +- sethi %hi(sys_accept4), %g1 +-64: lduwa [%o1 + 0x8] %asi, %o2 +-65: ldswa [%o1 + 0xc] %asi, %o3 +- jmpl %g1 + %lo(sys_accept4), %g0 +-66: lduwa [%o1 + 0x4] %asi, %o1 +- nop +- nop +- +- .section __ex_table,"a" +- .align 4 +- .word 1b, __retl_efault, 2b, __retl_efault +- .word 3b, __retl_efault, 4b, __retl_efault +- .word 5b, __retl_efault, 6b, __retl_efault +- .word 7b, __retl_efault, 8b, __retl_efault +- .word 9b, __retl_efault, 10b, __retl_efault +- .word 11b, __retl_efault, 12b, __retl_efault +- .word 13b, __retl_efault, 14b, __retl_efault +- .word 15b, __retl_efault, 16b, __retl_efault +- .word 17b, __retl_efault, 18b, __retl_efault +- .word 19b, __retl_efault, 20b, __retl_efault +- .word 21b, __retl_efault, 22b, __retl_efault +- .word 23b, __retl_efault, 24b, __retl_efault +- .word 25b, __retl_efault, 26b, __retl_efault +- .word 27b, __retl_efault, 28b, __retl_efault +- .word 29b, __retl_efault, 30b, __retl_efault +- .word 31b, __retl_efault, 32b, __retl_efault +- .word 33b, __retl_efault, 34b, __retl_efault +- .word 35b, __retl_efault, 36b, __retl_efault +- .word 37b, __retl_efault, 38b, __retl_efault +- .word 39b, __retl_efault, 40b, __retl_efault +- .word 41b, __retl_efault, 42b, __retl_efault +- .word 43b, __retl_efault, 44b, __retl_efault +- .word 45b, __retl_efault, 46b, __retl_efault +- .word 47b, __retl_efault, 48b, __retl_efault +- .word 49b, __retl_efault, 50b, __retl_efault +- .word 51b, __retl_efault, 52b, __retl_efault +- .word 53b, __retl_efault, 54b, __retl_efault +- .word 55b, __retl_efault, 56b, __retl_efault +- .word 57b, __retl_efault, 58b, __retl_efault +- .word 59b, __retl_efault, 60b, __retl_efault +- .word 61b, __retl_efault, 62b, __retl_efault +- .word 63b, __retl_efault, 64b, __retl_efault +- .word 65b, __retl_efault, 66b, __retl_efault +- .previous +diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl +index 7925e762e7b7e..4c3ee869ea46d 100644 +--- a/arch/sparc/kernel/syscalls/syscall.tbl ++++ b/arch/sparc/kernel/syscalls/syscall.tbl +@@ -155,7 +155,7 @@ + 123 32 fchown sys_fchown16 + 123 64 fchown sys_fchown + 124 common fchmod sys_fchmod +-125 common recvfrom sys_recvfrom ++125 common recvfrom sys_recvfrom compat_sys_recvfrom + 126 32 setreuid sys_setreuid16 + 126 64 setreuid sys_setreuid + 127 32 setregid sys_setregid16 +@@ -247,7 +247,7 @@ + 204 32 readdir sys_old_readdir compat_sys_old_readdir + 204 64 readdir sys_nis_syscall + 205 common readahead sys_readahead compat_sys_readahead +-206 common socketcall sys_socketcall sys32_socketcall ++206 common socketcall sys_socketcall compat_sys_socketcall + 207 common syslog sys_syslog + 208 common lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie + 209 common fadvise64 sys_fadvise64 compat_sys_fadvise64 +-- +2.43.0 + diff --git a/queue-5.15/sparc-fix-old-compat_sys_select.patch b/queue-5.15/sparc-fix-old-compat_sys_select.patch new file mode 100644 index 00000000000..c208c93d5f6 --- /dev/null +++ b/queue-5.15/sparc-fix-old-compat_sys_select.patch @@ -0,0 +1,39 @@ +From c752ffc24514e96a0c268aacb9f9dc23c2982939 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 19 Jun 2024 14:07:30 +0200 +Subject: sparc: fix old compat_sys_select() + +From: Arnd Bergmann + +[ Upstream commit bae6428a9fffb2023191b0723e276cf1377a7c9f ] + +sparc has two identical select syscalls at numbers 93 and 230, respectively. +During the conversion to the modern syscall.tbl format, the older one of the +two broke in compat mode, and now refers to the native 64-bit syscall. + +Restore the correct behavior. This has very little effect, as glibc has +been using the newer number anyway. + +Fixes: 6ff645dd683a ("sparc: add system call table generation support") +Signed-off-by: Arnd Bergmann +Signed-off-by: Sasha Levin +--- + arch/sparc/kernel/syscalls/syscall.tbl | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl +index c37764dc764d1..7925e762e7b7e 100644 +--- a/arch/sparc/kernel/syscalls/syscall.tbl ++++ b/arch/sparc/kernel/syscalls/syscall.tbl +@@ -117,7 +117,7 @@ + 90 common dup2 sys_dup2 + 91 32 setfsuid32 sys_setfsuid + 92 common fcntl sys_fcntl compat_sys_fcntl +-93 common select sys_select ++93 common select sys_select compat_sys_select + 94 32 setfsgid32 sys_setfsgid + 95 common fsync sys_fsync + 96 common setpriority sys_setpriority +-- +2.43.0 + diff --git a/queue-5.15/tcp-fix-tcp_rcv_fastopen_synack-to-enter-tcp_ca_loss.patch b/queue-5.15/tcp-fix-tcp_rcv_fastopen_synack-to-enter-tcp_ca_loss.patch new file mode 100644 index 00000000000..32854cb2340 --- /dev/null +++ b/queue-5.15/tcp-fix-tcp_rcv_fastopen_synack-to-enter-tcp_ca_loss.patch @@ -0,0 +1,168 @@ +From ddd2a3040c0a3aa030bda82e66af9a07f1e3c851 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 24 Jun 2024 14:43:23 +0000 +Subject: tcp: fix tcp_rcv_fastopen_synack() to enter TCP_CA_Loss for failed + TFO + +From: Neal Cardwell + +[ Upstream commit 5dfe9d273932c647bdc9d664f939af9a5a398cbc ] + +Testing determined that the recent commit 9e046bb111f1 ("tcp: clear +tp->retrans_stamp in tcp_rcv_fastopen_synack()") has a race, and does +not always ensure retrans_stamp is 0 after a TFO payload retransmit. + +If transmit completion for the SYN+data skb happens after the client +TCP stack receives the SYNACK (which sometimes happens), then +retrans_stamp can erroneously remain non-zero for the lifetime of the +connection, causing a premature ETIMEDOUT later. + +Testing and tracing showed that the buggy scenario is the following +somewhat tricky sequence: + ++ Client attempts a TFO handshake. tcp_send_syn_data() sends SYN + TFO + cookie + data in a single packet in the syn_data skb. It hands the + syn_data skb to tcp_transmit_skb(), which makes a clone. Crucially, + it then reuses the same original (non-clone) syn_data skb, + transforming it by advancing the seq by one byte and removing the + FIN bit, and enques the resulting payload-only skb in the + sk->tcp_rtx_queue. + ++ Client sets retrans_stamp to the start time of the three-way + handshake. + ++ Cookie mismatches or server has TFO disabled, and server only ACKs + SYN. + ++ tcp_ack() sees SYN is acked, tcp_clean_rtx_queue() clears + retrans_stamp. + ++ Since the client SYN was acked but not the payload, the TFO failure + code path in tcp_rcv_fastopen_synack() tries to retransmit the + payload skb. However, in some cases the transmit completion for the + clone of the syn_data (which had SYN + TFO cookie + data) hasn't + happened. In those cases, skb_still_in_host_queue() returns true + for the retransmitted TFO payload, because the clone of the syn_data + skb has not had its tx completetion. + ++ Because skb_still_in_host_queue() finds skb_fclone_busy() is true, + it sets the TSQ_THROTTLED bit and the retransmit does not happen in + the tcp_rcv_fastopen_synack() call chain. + ++ The tcp_rcv_fastopen_synack() code next implicitly assumes the + retransmit process is finished, and sets retrans_stamp to 0 to clear + it, but this is later overwritten (see below). + ++ Later, upon tx completion, tcp_tsq_write() calls + tcp_xmit_retransmit_queue(), which puts the retransmit in flight and + sets retrans_stamp to a non-zero value. + ++ The client receives an ACK for the retransmitted TFO payload data. + ++ Since we're in CA_Open and there are no dupacks/SACKs/DSACKs/ECN to + make tcp_ack_is_dubious() true and make us call + tcp_fastretrans_alert() and reach a code path that clears + retrans_stamp, retrans_stamp stays nonzero. + ++ Later, if there is a TLP, RTO, RTO sequence, then the connection + will suffer an early ETIMEDOUT due to the erroneously ancient + retrans_stamp. + +The fix: this commit refactors the code to have +tcp_rcv_fastopen_synack() retransmit by reusing the relevant parts of +tcp_simple_retransmit() that enter CA_Loss (without changing cwnd) and +call tcp_xmit_retransmit_queue(). We have tcp_simple_retransmit() and +tcp_rcv_fastopen_synack() share code in this way because in both cases +we get a packet indicating non-congestion loss (MTU reduction or TFO +failure) and thus in both cases we want to retransmit as many packets +as cwnd allows, without reducing cwnd. And given that retransmits will +set retrans_stamp to a non-zero value (and may do so in a later +calling context due to TSQ), we also want to enter CA_Loss so that we +track when all retransmitted packets are ACked and clear retrans_stamp +when that happens (to ensure later recurring RTOs are using the +correct retrans_stamp and don't declare ETIMEDOUT prematurely). + +Fixes: 9e046bb111f1 ("tcp: clear tp->retrans_stamp in tcp_rcv_fastopen_synack()") +Fixes: a7abf3cd76e1 ("tcp: consider using standard rtx logic in tcp_rcv_fastopen_synack()") +Signed-off-by: Neal Cardwell +Signed-off-by: Eric Dumazet +Cc: Yuchung Cheng +Link: https://patch.msgid.link/20240624144323.2371403-1-ncardwell.sw@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 38 +++++++++++++++++++++++++++----------- + 1 file changed, 27 insertions(+), 11 deletions(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index d6330c26e0e10..eaa66f51c6a84 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -2747,13 +2747,37 @@ static void tcp_mtup_probe_success(struct sock *sk) + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS); + } + ++/* Sometimes we deduce that packets have been dropped due to reasons other than ++ * congestion, like path MTU reductions or failed client TFO attempts. In these ++ * cases we call this function to retransmit as many packets as cwnd allows, ++ * without reducing cwnd. Given that retransmits will set retrans_stamp to a ++ * non-zero value (and may do so in a later calling context due to TSQ), we ++ * also enter CA_Loss so that we track when all retransmitted packets are ACKed ++ * and clear retrans_stamp when that happens (to ensure later recurring RTOs ++ * are using the correct retrans_stamp and don't declare ETIMEDOUT ++ * prematurely). ++ */ ++static void tcp_non_congestion_loss_retransmit(struct sock *sk) ++{ ++ const struct inet_connection_sock *icsk = inet_csk(sk); ++ struct tcp_sock *tp = tcp_sk(sk); ++ ++ if (icsk->icsk_ca_state != TCP_CA_Loss) { ++ tp->high_seq = tp->snd_nxt; ++ tp->snd_ssthresh = tcp_current_ssthresh(sk); ++ tp->prior_ssthresh = 0; ++ tp->undo_marker = 0; ++ tcp_set_ca_state(sk, TCP_CA_Loss); ++ } ++ tcp_xmit_retransmit_queue(sk); ++} ++ + /* Do a simple retransmit without using the backoff mechanisms in + * tcp_timer. This is used for path mtu discovery. + * The socket is already locked here. + */ + void tcp_simple_retransmit(struct sock *sk) + { +- const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + int mss; +@@ -2793,14 +2817,7 @@ void tcp_simple_retransmit(struct sock *sk) + * in network, but units changed and effective + * cwnd/ssthresh really reduced now. + */ +- if (icsk->icsk_ca_state != TCP_CA_Loss) { +- tp->high_seq = tp->snd_nxt; +- tp->snd_ssthresh = tcp_current_ssthresh(sk); +- tp->prior_ssthresh = 0; +- tp->undo_marker = 0; +- tcp_set_ca_state(sk, TCP_CA_Loss); +- } +- tcp_xmit_retransmit_queue(sk); ++ tcp_non_congestion_loss_retransmit(sk); + } + EXPORT_SYMBOL(tcp_simple_retransmit); + +@@ -6103,8 +6120,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, + tp->fastopen_client_fail = TFO_DATA_NOT_ACKED; + skb_rbtree_walk_from(data) + tcp_mark_skb_lost(sk, data); +- tcp_xmit_retransmit_queue(sk); +- tp->retrans_stamp = 0; ++ tcp_non_congestion_loss_retransmit(sk); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPFASTOPENACTIVEFAIL); + return true; +-- +2.43.0 + diff --git a/queue-5.15/tcp-use-bpf-timeout-setting-for-syn-ack-rto.patch b/queue-5.15/tcp-use-bpf-timeout-setting-for-syn-ack-rto.patch new file mode 100644 index 00000000000..55ff8700b95 --- /dev/null +++ b/queue-5.15/tcp-use-bpf-timeout-setting-for-syn-ack-rto.patch @@ -0,0 +1,160 @@ +From 51aa83486c6ba6445c1fd90207ab09714f7e1b99 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Jan 2022 22:26:21 +0300 +Subject: tcp: Use BPF timeout setting for SYN ACK RTO + +From: Akhmat Karakotov + +[ Upstream commit 5903123f662ed18483f05cac3f9e800a074c29ff ] + +When setting RTO through BPF program, some SYN ACK packets were unaffected +and continued to use TCP_TIMEOUT_INIT constant. This patch adds timeout +option to struct request_sock. Option is initialized with TCP_TIMEOUT_INIT +and is reassigned through BPF using tcp_timeout_init call. SYN ACK +retransmits now use newly added timeout option. + +Signed-off-by: Akhmat Karakotov +Acked-by: Martin KaFai Lau + +v2: + - Add timeout option to struct request_sock. Do not call + tcp_timeout_init on every syn ack retransmit. + +v3: + - Use unsigned long for min. Bound tcp_timeout_init to TCP_RTO_MAX. + +v4: - Refactor duplicate code by adding reqsk_timeout function. +Signed-off-by: David S. Miller +Stable-dep-of: ff46e3b44219 ("Fix race for duplicate reqsk on identical SYN") +Signed-off-by: Sasha Levin +--- + include/net/inet_connection_sock.h | 8 ++++++++ + include/net/request_sock.h | 2 ++ + include/net/tcp.h | 2 +- + net/ipv4/inet_connection_sock.c | 5 +---- + net/ipv4/tcp_input.c | 8 +++++--- + net/ipv4/tcp_minisocks.c | 5 ++--- + 6 files changed, 19 insertions(+), 11 deletions(-) + +diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h +index b6b7e210f9d7a..7794cf2b5ef50 100644 +--- a/include/net/inet_connection_sock.h ++++ b/include/net/inet_connection_sock.h +@@ -284,6 +284,14 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) + bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req); + void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req); + ++static inline unsigned long ++reqsk_timeout(struct request_sock *req, unsigned long max_timeout) ++{ ++ u64 timeout = (u64)req->timeout << req->num_timeout; ++ ++ return (unsigned long)min_t(u64, timeout, max_timeout); ++} ++ + static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk) + { + /* The below has to be done to allow calling inet_csk_destroy_sock */ +diff --git a/include/net/request_sock.h b/include/net/request_sock.h +index 29e41ff3ec933..144c39db9898a 100644 +--- a/include/net/request_sock.h ++++ b/include/net/request_sock.h +@@ -70,6 +70,7 @@ struct request_sock { + struct saved_syn *saved_syn; + u32 secid; + u32 peer_secid; ++ u32 timeout; + }; + + static inline struct request_sock *inet_reqsk(const struct sock *sk) +@@ -104,6 +105,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, + sk_node_init(&req_to_sk(req)->sk_node); + sk_tx_queue_clear(req_to_sk(req)); + req->saved_syn = NULL; ++ req->timeout = 0; + req->num_timeout = 0; + req->num_retrans = 0; + req->sk = NULL; +diff --git a/include/net/tcp.h b/include/net/tcp.h +index 08923ed4278f0..30f8111f750b5 100644 +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -2362,7 +2362,7 @@ static inline u32 tcp_timeout_init(struct sock *sk) + + if (timeout <= 0) + timeout = TCP_TIMEOUT_INIT; +- return timeout; ++ return min_t(int, timeout, TCP_RTO_MAX); + } + + static inline u32 tcp_rwnd_init_bpf(struct sock *sk) +diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c +index 27975a44d1f9d..43e370f45b81d 100644 +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -889,12 +889,9 @@ static void reqsk_timer_handler(struct timer_list *t) + (!resend || + !inet_rtx_syn_ack(sk_listener, req) || + inet_rsk(req)->acked)) { +- unsigned long timeo; +- + if (req->num_timeout++ == 0) + atomic_dec(&queue->young); +- timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); +- mod_timer(&req->rsk_timer, jiffies + timeo); ++ mod_timer(&req->rsk_timer, jiffies + reqsk_timeout(req, TCP_RTO_MAX)); + + if (!nreq) + return; +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 5ad7e6965a645..ca72dbaa27b46 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -6769,6 +6769,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, + ireq->ireq_state = TCP_NEW_SYN_RECV; + write_pnet(&ireq->ireq_net, sock_net(sk_listener)); + ireq->ireq_family = sk_listener->sk_family; ++ req->timeout = TCP_TIMEOUT_INIT; + } + + return req; +@@ -6991,9 +6992,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, + sock_put(fastopen_sk); + } else { + tcp_rsk(req)->tfo_listener = false; +- if (!want_cookie) +- inet_csk_reqsk_queue_hash_add(sk, req, +- tcp_timeout_init((struct sock *)req)); ++ if (!want_cookie) { ++ req->timeout = tcp_timeout_init((struct sock *)req); ++ inet_csk_reqsk_queue_hash_add(sk, req, req->timeout); ++ } + af_ops->send_synack(sk, dst, &fl, req, &foc, + !want_cookie ? TCP_SYNACK_NORMAL : + TCP_SYNACK_COOKIE, +diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c +index 2606a5571116a..d84b71f70766b 100644 +--- a/net/ipv4/tcp_minisocks.c ++++ b/net/ipv4/tcp_minisocks.c +@@ -593,7 +593,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, + * it can be estimated (approximately) + * from another data. + */ +- tmp_opt.ts_recent_stamp = ktime_get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<num_timeout); ++ tmp_opt.ts_recent_stamp = ktime_get_seconds() - reqsk_timeout(req, TCP_RTO_MAX) / HZ; + paws_reject = tcp_paws_reject(&tmp_opt, th->rst); + } + } +@@ -632,8 +632,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, + !inet_rtx_syn_ack(sk, req)) { + unsigned long expires = jiffies; + +- expires += min(TCP_TIMEOUT_INIT << req->num_timeout, +- TCP_RTO_MAX); ++ expires += reqsk_timeout(req, TCP_RTO_MAX); + if (!fastopen) + mod_timer_pending(&req->rsk_timer, expires); + else +-- +2.43.0 + diff --git a/queue-5.15/tracing-net_sched-null-pointer-dereference-in-perf_t.patch b/queue-5.15/tracing-net_sched-null-pointer-dereference-in-perf_t.patch new file mode 100644 index 00000000000..469eb817be4 --- /dev/null +++ b/queue-5.15/tracing-net_sched-null-pointer-dereference-in-perf_t.patch @@ -0,0 +1,306 @@ +From b71a8290784ee1b486421dc1ed8bd1e14b321533 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 25 Jun 2024 02:33:23 +0900 +Subject: tracing/net_sched: NULL pointer dereference in + perf_trace_qdisc_reset() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Yunseong Kim + +[ Upstream commit bab4923132feb3e439ae45962979c5d9d5c7c1f1 ] + +In the TRACE_EVENT(qdisc_reset) NULL dereference occurred from + + qdisc->dev_queue->dev ->name + +This situation simulated from bunch of veths and Bluetooth disconnection +and reconnection. + +During qdisc initialization, qdisc was being set to noop_queue. +In veth_init_queue, the initial tx_num was reduced back to one, +causing the qdisc reset to be called with noop, which led to the kernel +panic. + +I've attached the GitHub gist link that C converted syz-execprogram +source code and 3 log of reproduced vmcore-dmesg. + + https://gist.github.com/yskelg/cc64562873ce249cdd0d5a358b77d740 + +Yeoreum and I use two fuzzing tool simultaneously. + +One process with syz-executor : https://github.com/google/syzkaller + + $ ./syz-execprog -executor=./syz-executor -repeat=1 -sandbox=setuid \ + -enable=none -collide=false log1 + +The other process with perf fuzzer: + https://github.com/deater/perf_event_tests/tree/master/fuzzer + + $ perf_event_tests/fuzzer/perf_fuzzer + +I think this will happen on the kernel version. + + Linux kernel version +v6.7.10, +v6.8, +v6.9 and it could happen in v6.10. + +This occurred from 51270d573a8d. I think this patch is absolutely +necessary. Previously, It was showing not intended string value of name. + +I've reproduced 3 time from my fedora 40 Debug Kernel with any other module +or patched. + + version: 6.10.0-0.rc2.20240608gitdc772f8237f9.29.fc41.aarch64+debug + +[ 5287.164555] veth0_vlan: left promiscuous mode +[ 5287.164929] veth1_macvtap: left promiscuous mode +[ 5287.164950] veth0_macvtap: left promiscuous mode +[ 5287.164983] veth1_vlan: left promiscuous mode +[ 5287.165008] veth0_vlan: left promiscuous mode +[ 5287.165450] veth1_macvtap: left promiscuous mode +[ 5287.165472] veth0_macvtap: left promiscuous mode +[ 5287.165502] veth1_vlan: left promiscuous mode +… +[ 5297.598240] bridge0: port 2(bridge_slave_1) entered blocking state +[ 5297.598262] bridge0: port 2(bridge_slave_1) entered forwarding state +[ 5297.598296] bridge0: port 1(bridge_slave_0) entered blocking state +[ 5297.598313] bridge0: port 1(bridge_slave_0) entered forwarding state +[ 5297.616090] 8021q: adding VLAN 0 to HW filter on device bond0 +[ 5297.620405] bridge0: port 1(bridge_slave_0) entered disabled state +[ 5297.620730] bridge0: port 2(bridge_slave_1) entered disabled state +[ 5297.627247] 8021q: adding VLAN 0 to HW filter on device team0 +[ 5297.629636] bridge0: port 1(bridge_slave_0) entered blocking state +… +[ 5298.002798] bridge_slave_0: left promiscuous mode +[ 5298.002869] bridge0: port 1(bridge_slave_0) entered disabled state +[ 5298.309444] bond0 (unregistering): (slave bond_slave_0): Releasing backup interface +[ 5298.315206] bond0 (unregistering): (slave bond_slave_1): Releasing backup interface +[ 5298.320207] bond0 (unregistering): Released all slaves +[ 5298.354296] hsr_slave_0: left promiscuous mode +[ 5298.360750] hsr_slave_1: left promiscuous mode +[ 5298.374889] veth1_macvtap: left promiscuous mode +[ 5298.374931] veth0_macvtap: left promiscuous mode +[ 5298.374988] veth1_vlan: left promiscuous mode +[ 5298.375024] veth0_vlan: left promiscuous mode +[ 5299.109741] team0 (unregistering): Port device team_slave_1 removed +[ 5299.185870] team0 (unregistering): Port device team_slave_0 removed +… +[ 5300.155443] Bluetooth: hci3: unexpected cc 0x0c03 length: 249 > 1 +[ 5300.155724] Bluetooth: hci3: unexpected cc 0x1003 length: 249 > 9 +[ 5300.155988] Bluetooth: hci3: unexpected cc 0x1001 length: 249 > 9 +…. +[ 5301.075531] team0: Port device team_slave_1 added +[ 5301.085515] bridge0: port 1(bridge_slave_0) entered blocking state +[ 5301.085531] bridge0: port 1(bridge_slave_0) entered disabled state +[ 5301.085588] bridge_slave_0: entered allmulticast mode +[ 5301.085800] bridge_slave_0: entered promiscuous mode +[ 5301.095617] bridge0: port 1(bridge_slave_0) entered blocking state +[ 5301.095633] bridge0: port 1(bridge_slave_0) entered disabled state +… +[ 5301.149734] bond0: (slave bond_slave_0): Enslaving as an active interface with an up link +[ 5301.173234] bond0: (slave bond_slave_0): Enslaving as an active interface with an up link +[ 5301.180517] bond0: (slave bond_slave_1): Enslaving as an active interface with an up link +[ 5301.193481] hsr_slave_0: entered promiscuous mode +[ 5301.204425] hsr_slave_1: entered promiscuous mode +[ 5301.210172] debugfs: Directory 'hsr0' with parent 'hsr' already present! +[ 5301.210185] Cannot create hsr debugfs directory +[ 5301.224061] bond0: (slave bond_slave_1): Enslaving as an active interface with an up link +[ 5301.246901] bond0: (slave bond_slave_0): Enslaving as an active interface with an up link +[ 5301.255934] team0: Port device team_slave_0 added +[ 5301.256480] team0: Port device team_slave_1 added +[ 5301.256948] team0: Port device team_slave_0 added +… +[ 5301.435928] hsr_slave_0: entered promiscuous mode +[ 5301.446029] hsr_slave_1: entered promiscuous mode +[ 5301.455872] debugfs: Directory 'hsr0' with parent 'hsr' already present! +[ 5301.455884] Cannot create hsr debugfs directory +[ 5301.502664] hsr_slave_0: entered promiscuous mode +[ 5301.513675] hsr_slave_1: entered promiscuous mode +[ 5301.526155] debugfs: Directory 'hsr0' with parent 'hsr' already present! +[ 5301.526164] Cannot create hsr debugfs directory +[ 5301.563662] hsr_slave_0: entered promiscuous mode +[ 5301.576129] hsr_slave_1: entered promiscuous mode +[ 5301.580259] debugfs: Directory 'hsr0' with parent 'hsr' already present! +[ 5301.580270] Cannot create hsr debugfs directory +[ 5301.590269] 8021q: adding VLAN 0 to HW filter on device bond0 + +[ 5301.595872] KASAN: null-ptr-deref in range [0x0000000000000130-0x0000000000000137] +[ 5301.595877] Mem abort info: +[ 5301.595881] ESR = 0x0000000096000006 +[ 5301.595885] EC = 0x25: DABT (current EL), IL = 32 bits +[ 5301.595889] SET = 0, FnV = 0 +[ 5301.595893] EA = 0, S1PTW = 0 +[ 5301.595896] FSC = 0x06: level 2 translation fault +[ 5301.595900] Data abort info: +[ 5301.595903] ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000 +[ 5301.595907] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 +[ 5301.595911] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 +[ 5301.595915] [dfff800000000026] address between user and kernel address ranges +[ 5301.595971] Internal error: Oops: 0000000096000006 [#1] SMP +… +[ 5301.596076] CPU: 2 PID: 102769 Comm: +syz-executor.3 Kdump: loaded Tainted: + G W ------- --- 6.10.0-0.rc2.20240608gitdc772f8237f9.29.fc41.aarch64+debug #1 +[ 5301.596080] Hardware name: VMware, Inc. VMware20,1/VBSA, + BIOS VMW201.00V.21805430.BA64.2305221830 05/22/2023 +[ 5301.596082] pstate: 01400005 (nzcv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) +[ 5301.596085] pc : strnlen+0x40/0x88 +[ 5301.596114] lr : trace_event_get_offsets_qdisc_reset+0x6c/0x2b0 +[ 5301.596124] sp : ffff8000beef6b40 +[ 5301.596126] x29: ffff8000beef6b40 x28: dfff800000000000 x27: 0000000000000001 +[ 5301.596131] x26: 6de1800082c62bd0 x25: 1ffff000110aa9e0 x24: ffff800088554f00 +[ 5301.596136] x23: ffff800088554ec0 x22: 0000000000000130 x21: 0000000000000140 +[ 5301.596140] x20: dfff800000000000 x19: ffff8000beef6c60 x18: ffff7000115106d8 +[ 5301.596143] x17: ffff800121bad000 x16: ffff800080020000 x15: 0000000000000006 +[ 5301.596147] x14: 0000000000000002 x13: ffff0001f3ed8d14 x12: ffff700017ddeda5 +[ 5301.596151] x11: 1ffff00017ddeda4 x10: ffff700017ddeda4 x9 : ffff800082cc5eec +[ 5301.596155] x8 : 0000000000000004 x7 : 00000000f1f1f1f1 x6 : 00000000f2f2f200 +[ 5301.596158] x5 : 00000000f3f3f3f3 x4 : ffff700017dded80 x3 : 00000000f204f1f1 +[ 5301.596162] x2 : 0000000000000026 x1 : 0000000000000000 x0 : 0000000000000130 +[ 5301.596166] Call trace: +[ 5301.596175] strnlen+0x40/0x88 +[ 5301.596179] trace_event_get_offsets_qdisc_reset+0x6c/0x2b0 +[ 5301.596182] perf_trace_qdisc_reset+0xb0/0x538 +[ 5301.596184] __traceiter_qdisc_reset+0x68/0xc0 +[ 5301.596188] qdisc_reset+0x43c/0x5e8 +[ 5301.596190] netif_set_real_num_tx_queues+0x288/0x770 +[ 5301.596194] veth_init_queues+0xfc/0x130 [veth] +[ 5301.596198] veth_newlink+0x45c/0x850 [veth] +[ 5301.596202] rtnl_newlink_create+0x2c8/0x798 +[ 5301.596205] __rtnl_newlink+0x92c/0xb60 +[ 5301.596208] rtnl_newlink+0xd8/0x130 +[ 5301.596211] rtnetlink_rcv_msg+0x2e0/0x890 +[ 5301.596214] netlink_rcv_skb+0x1c4/0x380 +[ 5301.596225] rtnetlink_rcv+0x20/0x38 +[ 5301.596227] netlink_unicast+0x3c8/0x640 +[ 5301.596231] netlink_sendmsg+0x658/0xa60 +[ 5301.596234] __sock_sendmsg+0xd0/0x180 +[ 5301.596243] __sys_sendto+0x1c0/0x280 +[ 5301.596246] __arm64_sys_sendto+0xc8/0x150 +[ 5301.596249] invoke_syscall+0xdc/0x268 +[ 5301.596256] el0_svc_common.constprop.0+0x16c/0x240 +[ 5301.596259] do_el0_svc+0x48/0x68 +[ 5301.596261] el0_svc+0x50/0x188 +[ 5301.596265] el0t_64_sync_handler+0x120/0x130 +[ 5301.596268] el0t_64_sync+0x194/0x198 +[ 5301.596272] Code: eb15001f 54000120 d343fc02 12000801 (38f46842) +[ 5301.596285] SMP: stopping secondary CPUs +[ 5301.597053] Starting crashdump kernel... +[ 5301.597057] Bye! + +After applying our patch, I didn't find any kernel panic errors. + +We've found a simple reproducer + + # echo 1 > /sys/kernel/debug/tracing/events/qdisc/qdisc_reset/enable + + # ip link add veth0 type veth peer name veth1 + + Error: Unknown device type. + +However, without our patch applied, I tested upstream 6.10.0-rc3 kernel +using the qdisc_reset event and the ip command on my qemu virtual machine. + +This 2 commands makes always kernel panic. + +Linux version: 6.10.0-rc3 + +[ 0.000000] Linux version 6.10.0-rc3-00164-g44ef20baed8e-dirty +(paran@fedora) (gcc (GCC) 14.1.1 20240522 (Red Hat 14.1.1-4), GNU ld +version 2.41-34.fc40) #20 SMP PREEMPT Sat Jun 15 16:51:25 KST 2024 + +Kernel panic message: + +[ 615.236484] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP +[ 615.237250] Dumping ftrace buffer: +[ 615.237679] (ftrace buffer empty) +[ 615.238097] Modules linked in: veth crct10dif_ce virtio_gpu +virtio_dma_buf drm_shmem_helper drm_kms_helper zynqmp_fpga xilinx_can +xilinx_spi xilinx_selectmap xilinx_core xilinx_pr_decoupler versal_fpga +uvcvideo uvc videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videodev +videobuf2_common mc usbnet deflate zstd ubifs ubi rcar_canfd rcar_can +omap_mailbox ntb_msi_test ntb_hw_epf lattice_sysconfig_spi +lattice_sysconfig ice40_spi gpio_xilinx dwmac_altr_socfpga mdio_regmap +stmmac_platform stmmac pcs_xpcs dfl_fme_region dfl_fme_mgr dfl_fme_br +dfl_afu dfl fpga_region fpga_bridge can can_dev br_netfilter bridge stp +llc atl1c ath11k_pci mhi ath11k_ahb ath11k qmi_helpers ath10k_sdio +ath10k_pci ath10k_core ath mac80211 libarc4 cfg80211 drm fuse backlight ipv6 +Jun 22 02:36:5[3 6k152.62-4sm98k4-0k]v kCePUr:n e1l :P IUDn:a b4le6 +8t oC ohmma: nidpl eN oketr nteali nptaedg i6n.g1 0re.0q-urecs3t- 0at0 +1v6i4r-tgu4a4le fa2d0dbraeeds0se-dir tyd f#f2f08 + 615.252376] Hardware name: linux,dummy-virt (DT) +[ 615.253220] pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS +BTYPE=--) +[ 615.254433] pc : strnlen+0x6c/0xe0 +[ 615.255096] lr : trace_event_get_offsets_qdisc_reset+0x94/0x3d0 +[ 615.256088] sp : ffff800080b269a0 +[ 615.256615] x29: ffff800080b269a0 x28: ffffc070f3f98500 x27: +0000000000000001 +[ 615.257831] x26: 0000000000000010 x25: ffffc070f3f98540 x24: +ffffc070f619cf60 +[ 615.259020] x23: 0000000000000128 x22: 0000000000000138 x21: +dfff800000000000 +[ 615.260241] x20: ffffc070f631ad00 x19: 0000000000000128 x18: +ffffc070f448b800 +[ 615.261454] x17: 0000000000000000 x16: 0000000000000001 x15: +ffffc070f4ba2a90 +[ 615.262635] x14: ffff700010164d73 x13: 1ffff80e1e8d5eb3 x12: +1ffff00010164d72 +[ 615.263877] x11: ffff700010164d72 x10: dfff800000000000 x9 : +ffffc070e85d6184 +[ 615.265047] x8 : ffffc070e4402070 x7 : 000000000000f1f1 x6 : +000000001504a6d3 +[ 615.266336] x5 : ffff28ca21122140 x4 : ffffc070f5043ea8 x3 : +0000000000000000 +[ 615.267528] x2 : 0000000000000025 x1 : 0000000000000000 x0 : +0000000000000000 +[ 615.268747] Call trace: +[ 615.269180] strnlen+0x6c/0xe0 +[ 615.269767] trace_event_get_offsets_qdisc_reset+0x94/0x3d0 +[ 615.270716] trace_event_raw_event_qdisc_reset+0xe8/0x4e8 +[ 615.271667] __traceiter_qdisc_reset+0xa0/0x140 +[ 615.272499] qdisc_reset+0x554/0x848 +[ 615.273134] netif_set_real_num_tx_queues+0x360/0x9a8 +[ 615.274050] veth_init_queues+0x110/0x220 [veth] +[ 615.275110] veth_newlink+0x538/0xa50 [veth] +[ 615.276172] __rtnl_newlink+0x11e4/0x1bc8 +[ 615.276944] rtnl_newlink+0xac/0x120 +[ 615.277657] rtnetlink_rcv_msg+0x4e4/0x1370 +[ 615.278409] netlink_rcv_skb+0x25c/0x4f0 +[ 615.279122] rtnetlink_rcv+0x48/0x70 +[ 615.279769] netlink_unicast+0x5a8/0x7b8 +[ 615.280462] netlink_sendmsg+0xa70/0x1190 + +Yeoreum and I don't know if the patch we wrote will fix the underlying +cause, but we think that priority is to prevent kernel panic happening. +So, we're sending this patch. + +Fixes: 51270d573a8d ("tracing/net_sched: Fix tracepoints that save qdisc_dev() as a string") +Link: https://lore.kernel.org/lkml/20240229143432.273b4871@gandalf.local.home/t/ +Cc: netdev@vger.kernel.org +Tested-by: Yunseong Kim +Signed-off-by: Yunseong Kim +Signed-off-by: Yeoreum Yun +Link: https://lore.kernel.org/r/20240624173320.24945-4-yskelg@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + include/trace/events/qdisc.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h +index 5180da19d837f..fc77362386a5b 100644 +--- a/include/trace/events/qdisc.h ++++ b/include/trace/events/qdisc.h +@@ -81,7 +81,7 @@ TRACE_EVENT(qdisc_reset, + TP_ARGS(q), + + TP_STRUCT__entry( +- __string( dev, qdisc_dev(q)->name ) ++ __string( dev, qdisc_dev(q) ? qdisc_dev(q)->name : "(null)" ) + __string( kind, q->ops->id ) + __field( u32, parent ) + __field( u32, handle ) +-- +2.43.0 + diff --git a/queue-5.15/xdp-remove-warn-from-__xdp_reg_mem_model.patch b/queue-5.15/xdp-remove-warn-from-__xdp_reg_mem_model.patch new file mode 100644 index 00000000000..54338f8230a --- /dev/null +++ b/queue-5.15/xdp-remove-warn-from-__xdp_reg_mem_model.patch @@ -0,0 +1,76 @@ +From e1028e8e933a5ad53594331ee7302f4c6b11a069 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 24 Jun 2024 11:07:47 +0300 +Subject: xdp: Remove WARN() from __xdp_reg_mem_model() + +From: Daniil Dulov + +[ Upstream commit 7e9f79428372c6eab92271390851be34ab26bfb4 ] + +syzkaller reports a warning in __xdp_reg_mem_model(). + +The warning occurs only if __mem_id_init_hash_table() returns an error. It +returns the error in two cases: + + 1. memory allocation fails; + 2. rhashtable_init() fails when some fields of rhashtable_params + struct are not initialized properly. + +The second case cannot happen since there is a static const rhashtable_params +struct with valid fields. So, warning is only triggered when there is a +problem with memory allocation. + +Thus, there is no sense in using WARN() to handle this error and it can be +safely removed. + +WARNING: CPU: 0 PID: 5065 at net/core/xdp.c:299 __xdp_reg_mem_model+0x2d9/0x650 net/core/xdp.c:299 + +CPU: 0 PID: 5065 Comm: syz-executor883 Not tainted 6.8.0-syzkaller-05271-gf99c5f563c17 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024 +RIP: 0010:__xdp_reg_mem_model+0x2d9/0x650 net/core/xdp.c:299 + +Call Trace: + xdp_reg_mem_model+0x22/0x40 net/core/xdp.c:344 + xdp_test_run_setup net/bpf/test_run.c:188 [inline] + bpf_test_run_xdp_live+0x365/0x1e90 net/bpf/test_run.c:377 + bpf_prog_test_run_xdp+0x813/0x11b0 net/bpf/test_run.c:1267 + bpf_prog_test_run+0x33a/0x3b0 kernel/bpf/syscall.c:4240 + __sys_bpf+0x48d/0x810 kernel/bpf/syscall.c:5649 + __do_sys_bpf kernel/bpf/syscall.c:5738 [inline] + __se_sys_bpf kernel/bpf/syscall.c:5736 [inline] + __x64_sys_bpf+0x7c/0x90 kernel/bpf/syscall.c:5736 + do_syscall_64+0xfb/0x240 + entry_SYSCALL_64_after_hwframe+0x6d/0x75 + +Found by Linux Verification Center (linuxtesting.org) with syzkaller. + +Fixes: 8d5d88527587 ("xdp: rhashtable with allocator ID to pointer mapping") +Signed-off-by: Daniil Dulov +Signed-off-by: Daniel Borkmann +Acked-by: Jesper Dangaard Brouer +Link: https://lore.kernel.org/all/20240617162708.492159-1-d.dulov@aladdin.ru +Link: https://lore.kernel.org/bpf/20240624080747.36858-1-d.dulov@aladdin.ru +Signed-off-by: Sasha Levin +--- + net/core/xdp.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/net/core/xdp.c b/net/core/xdp.c +index a3e3d2538a3a8..e9a9694c4fdcc 100644 +--- a/net/core/xdp.c ++++ b/net/core/xdp.c +@@ -292,10 +292,8 @@ static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem, + mutex_lock(&mem_id_lock); + ret = __mem_id_init_hash_table(); + mutex_unlock(&mem_id_lock); +- if (ret < 0) { +- WARN_ON(1); ++ if (ret < 0) + return ERR_PTR(ret); +- } + } + + xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp); +-- +2.43.0 + -- 2.47.3