From: Sasha Levin Date: Sat, 10 Sep 2022 23:50:47 +0000 (-0400) Subject: Fixes for 4.19 X-Git-Tag: v5.19.9~41 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=18e852b64d71c6189dfaae982980f0cb4c343fe6;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 4.19 Signed-off-by: Sasha Levin --- diff --git a/queue-4.19/i40e-fix-kernel-crash-during-module-removal.patch b/queue-4.19/i40e-fix-kernel-crash-during-module-removal.patch new file mode 100644 index 00000000000..78d62bea81b --- /dev/null +++ b/queue-4.19/i40e-fix-kernel-crash-during-module-removal.patch @@ -0,0 +1,106 @@ +From dc12d95f9aac09d621b0fc37e94fc57874faf587 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 16 Aug 2022 18:22:30 +0200 +Subject: i40e: Fix kernel crash during module removal + +From: Ivan Vecera + +[ Upstream commit fb8396aeda5872369a8ed6d2301e2c86e303c520 ] + +The driver incorrectly frees client instance and subsequent +i40e module removal leads to kernel crash. + +Reproducer: +1. Do ethtool offline test followed immediately by another one +host# ethtool -t eth0 offline; ethtool -t eth0 offline +2. Remove recursively irdma module that also removes i40e module +host# modprobe -r irdma + +Result: +[ 8675.035651] i40e 0000:3d:00.0 eno1: offline testing starting +[ 8675.193774] i40e 0000:3d:00.0 eno1: testing finished +[ 8675.201316] i40e 0000:3d:00.0 eno1: offline testing starting +[ 8675.358921] i40e 0000:3d:00.0 eno1: testing finished +[ 8675.496921] i40e 0000:3d:00.0: IRDMA hardware initialization FAILED init_state=2 status=-110 +[ 8686.188955] i40e 0000:3d:00.1: i40e_ptp_stop: removed PHC on eno2 +[ 8686.943890] i40e 0000:3d:00.1: Deleted LAN device PF1 bus=0x3d dev=0x00 func=0x01 +[ 8686.952669] i40e 0000:3d:00.0: i40e_ptp_stop: removed PHC on eno1 +[ 8687.761787] BUG: kernel NULL pointer dereference, address: 0000000000000030 +[ 8687.768755] #PF: supervisor read access in kernel mode +[ 8687.773895] #PF: error_code(0x0000) - not-present page +[ 8687.779034] PGD 0 P4D 0 +[ 8687.781575] Oops: 0000 [#1] PREEMPT SMP NOPTI +[ 8687.785935] CPU: 51 PID: 172891 Comm: rmmod Kdump: loaded Tainted: G W I 5.19.0+ #2 +[ 8687.794800] Hardware name: Intel Corporation S2600WFD/S2600WFD, BIOS SE5C620.86B.0X.02.0001.051420190324 05/14/2019 +[ 8687.805222] RIP: 0010:i40e_lan_del_device+0x13/0xb0 [i40e] +[ 8687.810719] Code: d4 84 c0 0f 84 b8 25 01 00 e9 9c 25 01 00 41 bc f4 ff ff ff eb 91 90 0f 1f 44 00 00 41 54 55 53 48 8b 87 58 08 00 00 48 89 fb <48> 8b 68 30 48 89 ef e8 21 8a 0f d5 48 89 ef e8 a9 78 0f d5 48 8b +[ 8687.829462] RSP: 0018:ffffa604072efce0 EFLAGS: 00010202 +[ 8687.834689] RAX: 0000000000000000 RBX: ffff8f43833b2000 RCX: 0000000000000000 +[ 8687.841821] RDX: 0000000000000000 RSI: ffff8f4b0545b298 RDI: ffff8f43833b2000 +[ 8687.848955] RBP: ffff8f43833b2000 R08: 0000000000000001 R09: 0000000000000000 +[ 8687.856086] R10: 0000000000000000 R11: 000ffffffffff000 R12: ffff8f43833b2ef0 +[ 8687.863218] R13: ffff8f43833b2ef0 R14: ffff915103966000 R15: ffff8f43833b2008 +[ 8687.870342] FS: 00007f79501c3740(0000) GS:ffff8f4adffc0000(0000) knlGS:0000000000000000 +[ 8687.878427] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 8687.884174] CR2: 0000000000000030 CR3: 000000014276e004 CR4: 00000000007706e0 +[ 8687.891306] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 8687.898441] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[ 8687.905572] PKRU: 55555554 +[ 8687.908286] Call Trace: +[ 8687.910737] +[ 8687.912843] i40e_remove+0x2c0/0x330 [i40e] +[ 8687.917040] pci_device_remove+0x33/0xa0 +[ 8687.920962] device_release_driver_internal+0x1aa/0x230 +[ 8687.926188] driver_detach+0x44/0x90 +[ 8687.929770] bus_remove_driver+0x55/0xe0 +[ 8687.933693] pci_unregister_driver+0x2a/0xb0 +[ 8687.937967] i40e_exit_module+0xc/0xf48 [i40e] + +Two offline tests cause IRDMA driver failure (ETIMEDOUT) and this +failure is indicated back to i40e_client_subtask() that calls +i40e_client_del_instance() to free client instance referenced +by pf->cinst and sets this pointer to NULL. During the module +removal i40e_remove() calls i40e_lan_del_device() that dereferences +pf->cinst that is NULL -> crash. +Do not remove client instance when client open callbacks fails and +just clear __I40E_CLIENT_INSTANCE_OPENED bit. The driver also needs +to take care about this situation (when netdev is up and client +is NOT opened) in i40e_notify_client_of_netdev_close() and +calls client close callback only when __I40E_CLIENT_INSTANCE_OPENED +is set. + +Fixes: 0ef2d5afb12d ("i40e: KISS the client interface") +Signed-off-by: Ivan Vecera +Tested-by: Helena Anna Dubel +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/i40e/i40e_client.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c +index c1832a8487140..2fa4becdaee92 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_client.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_client.c +@@ -178,6 +178,10 @@ void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset) + "Cannot locate client instance close routine\n"); + return; + } ++ if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) { ++ dev_dbg(&pf->pdev->dev, "Client is not open, abort close\n"); ++ return; ++ } + cdev->client->ops->close(&cdev->lan_info, cdev->client, reset); + clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state); + i40e_client_release_qvlist(&cdev->lan_info); +@@ -376,7 +380,6 @@ void i40e_client_subtask(struct i40e_pf *pf) + /* Remove failed client instance */ + clear_bit(__I40E_CLIENT_INSTANCE_OPENED, + &cdev->state); +- i40e_client_del_instance(pf); + return; + } + } +-- +2.35.1 + diff --git a/queue-4.19/ipv6-sr-fix-out-of-bounds-read-when-setting-hmac-dat.patch b/queue-4.19/ipv6-sr-fix-out-of-bounds-read-when-setting-hmac-dat.patch new file mode 100644 index 00000000000..842b3d50849 --- /dev/null +++ b/queue-4.19/ipv6-sr-fix-out-of-bounds-read-when-setting-hmac-dat.patch @@ -0,0 +1,78 @@ +From 5c9c8d6265f7c957f3e6afb5d057d7bf85ab84e8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 2 Sep 2022 10:45:06 +0100 +Subject: ipv6: sr: fix out-of-bounds read when setting HMAC data. + +From: David Lebrun + +[ Upstream commit 84a53580c5d2138c7361c7c3eea5b31827e63b35 ] + +The SRv6 layer allows defining HMAC data that can later be used to sign IPv6 +Segment Routing Headers. This configuration is realised via netlink through +four attributes: SEG6_ATTR_HMACKEYID, SEG6_ATTR_SECRET, SEG6_ATTR_SECRETLEN and +SEG6_ATTR_ALGID. Because the SECRETLEN attribute is decoupled from the actual +length of the SECRET attribute, it is possible to provide invalid combinations +(e.g., secret = "", secretlen = 64). This case is not checked in the code and +with an appropriately crafted netlink message, an out-of-bounds read of up +to 64 bytes (max secret length) can occur past the skb end pointer and into +skb_shared_info: + +Breakpoint 1, seg6_genl_sethmac (skb=, info=) at net/ipv6/seg6.c:208 +208 memcpy(hinfo->secret, secret, slen); +(gdb) bt + #0 seg6_genl_sethmac (skb=, info=) at net/ipv6/seg6.c:208 + #1 0xffffffff81e012e9 in genl_family_rcv_msg_doit (skb=skb@entry=0xffff88800b1f9f00, nlh=nlh@entry=0xffff88800b1b7600, + extack=extack@entry=0xffffc90000ba7af0, ops=ops@entry=0xffffc90000ba7a80, hdrlen=4, net=0xffffffff84237580 , family=, + family=) at net/netlink/genetlink.c:731 + #2 0xffffffff81e01435 in genl_family_rcv_msg (extack=0xffffc90000ba7af0, nlh=0xffff88800b1b7600, skb=0xffff88800b1f9f00, + family=0xffffffff82fef6c0 ) at net/netlink/genetlink.c:775 + #3 genl_rcv_msg (skb=0xffff88800b1f9f00, nlh=0xffff88800b1b7600, extack=0xffffc90000ba7af0) at net/netlink/genetlink.c:792 + #4 0xffffffff81dfffc3 in netlink_rcv_skb (skb=skb@entry=0xffff88800b1f9f00, cb=cb@entry=0xffffffff81e01350 ) + at net/netlink/af_netlink.c:2501 + #5 0xffffffff81e00919 in genl_rcv (skb=0xffff88800b1f9f00) at net/netlink/genetlink.c:803 + #6 0xffffffff81dff6ae in netlink_unicast_kernel (ssk=0xffff888010eec800, skb=0xffff88800b1f9f00, sk=0xffff888004aed000) + at net/netlink/af_netlink.c:1319 + #7 netlink_unicast (ssk=ssk@entry=0xffff888010eec800, skb=skb@entry=0xffff88800b1f9f00, portid=portid@entry=0, nonblock=) + at net/netlink/af_netlink.c:1345 + #8 0xffffffff81dff9a4 in netlink_sendmsg (sock=, msg=0xffffc90000ba7e48, len=) at net/netlink/af_netlink.c:1921 +... +(gdb) p/x ((struct sk_buff *)0xffff88800b1f9f00)->head + ((struct sk_buff *)0xffff88800b1f9f00)->end +$1 = 0xffff88800b1b76c0 +(gdb) p/x secret +$2 = 0xffff88800b1b76c0 +(gdb) p slen +$3 = 64 '@' + +The OOB data can then be read back from userspace by dumping HMAC state. This +commit fixes this by ensuring SECRETLEN cannot exceed the actual length of +SECRET. + +Reported-by: Lucas Leong +Tested: verified that EINVAL is correctly returned when secretlen > len(secret) +Fixes: 4f4853dc1c9c1 ("ipv6: sr: implement API to control SR HMAC structure") +Signed-off-by: David Lebrun +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv6/seg6.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c +index 9b2f272ca1649..89d55770ac74b 100644 +--- a/net/ipv6/seg6.c ++++ b/net/ipv6/seg6.c +@@ -130,6 +130,11 @@ static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info) + goto out_unlock; + } + ++ if (slen > nla_len(info->attrs[SEG6_ATTR_SECRET])) { ++ err = -EINVAL; ++ goto out_unlock; ++ } ++ + if (hinfo) { + err = seg6_hmac_info_del(net, hmackeyid); + if (err) +-- +2.35.1 + diff --git a/queue-4.19/netfilter-br_netfilter-drop-dst-references-before-se.patch b/queue-4.19/netfilter-br_netfilter-drop-dst-references-before-se.patch new file mode 100644 index 00000000000..21c21eb1a7a --- /dev/null +++ b/queue-4.19/netfilter-br_netfilter-drop-dst-references-before-se.patch @@ -0,0 +1,96 @@ +From 2c379a897802f08ba42717905317c738a88d5b25 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 30 Aug 2022 22:36:03 -0700 +Subject: netfilter: br_netfilter: Drop dst references before setting. + +From: Harsh Modi + +[ Upstream commit d047283a7034140ea5da759a494fd2274affdd46 ] + +The IPv6 path already drops dst in the daddr changed case, but the IPv4 +path does not. This change makes the two code paths consistent. + +Further, it is possible that there is already a metadata_dst allocated from +ingress that might already be attached to skbuff->dst while following +the bridge path. If it is not released before setting a new +metadata_dst, it will be leaked. This is similar to what is done in +bpf_set_tunnel_key() or ip6_route_input(). + +It is important to note that the memory being leaked is not the dst +being set in the bridge code, but rather memory allocated from some +other code path that is not being freed correctly before the skb dst is +overwritten. + +An example of the leakage fixed by this commit found using kmemleak: + +unreferenced object 0xffff888010112b00 (size 256): + comm "softirq", pid 0, jiffies 4294762496 (age 32.012s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 80 16 f1 83 ff ff ff ff ................ + e1 4e f6 82 ff ff ff ff 00 00 00 00 00 00 00 00 .N.............. + backtrace: + [<00000000d79567ea>] metadata_dst_alloc+0x1b/0xe0 + [<00000000be113e13>] udp_tun_rx_dst+0x174/0x1f0 + [<00000000a36848f4>] geneve_udp_encap_recv+0x350/0x7b0 + [<00000000d4afb476>] udp_queue_rcv_one_skb+0x380/0x560 + [<00000000ac064aea>] udp_unicast_rcv_skb+0x75/0x90 + [<000000009a8ee8c5>] ip_protocol_deliver_rcu+0xd8/0x230 + [<00000000ef4980bb>] ip_local_deliver_finish+0x7a/0xa0 + [<00000000d7533c8c>] __netif_receive_skb_one_core+0x89/0xa0 + [<00000000a879497d>] process_backlog+0x93/0x190 + [<00000000e41ade9f>] __napi_poll+0x28/0x170 + [<00000000b4c0906b>] net_rx_action+0x14f/0x2a0 + [<00000000b20dd5d4>] __do_softirq+0xf4/0x305 + [<000000003a7d7e15>] __irq_exit_rcu+0xc3/0x140 + [<00000000968d39a2>] sysvec_apic_timer_interrupt+0x9e/0xc0 + [<000000009e920794>] asm_sysvec_apic_timer_interrupt+0x16/0x20 + [<000000008942add0>] native_safe_halt+0x13/0x20 + +Florian Westphal says: "Original code was likely fine because nothing +ever did set a skb->dst entry earlier than bridge in those days." + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Harsh Modi +Acked-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/bridge/br_netfilter_hooks.c | 2 ++ + net/bridge/br_netfilter_ipv6.c | 1 + + 2 files changed, 3 insertions(+) + +diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c +index 55c7cdf5e7b83..35642dc96852a 100644 +--- a/net/bridge/br_netfilter_hooks.c ++++ b/net/bridge/br_netfilter_hooks.c +@@ -385,6 +385,7 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_ + /* - Bridged-and-DNAT'ed traffic doesn't + * require ip_forwarding. */ + if (rt->dst.dev == dev) { ++ skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + goto bridged_dnat; + } +@@ -414,6 +415,7 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_ + kfree_skb(skb); + return 0; + } ++ skb_dst_drop(skb); + skb_dst_set_noref(skb, &rt->dst); + } + +diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c +index 09d5e0c7b3ba4..995d86777e7cb 100644 +--- a/net/bridge/br_netfilter_ipv6.c ++++ b/net/bridge/br_netfilter_ipv6.c +@@ -201,6 +201,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc + kfree_skb(skb); + return 0; + } ++ skb_dst_drop(skb); + skb_dst_set_noref(skb, &rt->dst); + } + +-- +2.35.1 + diff --git a/queue-4.19/netfilter-nf_conntrack_irc-fix-forged-ip-logic.patch b/queue-4.19/netfilter-nf_conntrack_irc-fix-forged-ip-logic.patch new file mode 100644 index 00000000000..96a3cd3b0fa --- /dev/null +++ b/queue-4.19/netfilter-nf_conntrack_irc-fix-forged-ip-logic.patch @@ -0,0 +1,43 @@ +From e1437bfc02daf0ab41c60a70b6668c62ea62ab89 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 26 Aug 2022 14:56:58 +1000 +Subject: netfilter: nf_conntrack_irc: Fix forged IP logic + +From: David Leadbeater + +[ Upstream commit 0efe125cfb99e6773a7434f3463f7c2fa28f3a43 ] + +Ensure the match happens in the right direction, previously the +destination used was the server, not the NAT host, as the comment +shows the code intended. + +Additionally nf_nat_irc uses port 0 as a signal and there's no valid way +it can appear in a DCC message, so consider port 0 also forged. + +Fixes: 869f37d8e48f ("[NETFILTER]: nf_conntrack/nf_nat: add IRC helper port") +Signed-off-by: David Leadbeater +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_conntrack_irc.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c +index 4099f4d79bae7..b7436935b57d0 100644 +--- a/net/netfilter/nf_conntrack_irc.c ++++ b/net/netfilter/nf_conntrack_irc.c +@@ -187,8 +187,9 @@ static int help(struct sk_buff *skb, unsigned int protoff, + + /* dcc_ip can be the internal OR external (NAT'ed) IP */ + tuple = &ct->tuplehash[dir].tuple; +- if (tuple->src.u3.ip != dcc_ip && +- tuple->dst.u3.ip != dcc_ip) { ++ if ((tuple->src.u3.ip != dcc_ip && ++ ct->tuplehash[!dir].tuple.dst.u3.ip != dcc_ip) || ++ dcc_port == 0) { + net_warn_ratelimited("Forged DCC command from %pI4: %pI4:%u\n", + &tuple->src.u3.ip, + &dcc_ip, dcc_port); +-- +2.35.1 + diff --git a/queue-4.19/rdma-mlx5-set-local-port-to-one-when-accessing-count.patch b/queue-4.19/rdma-mlx5-set-local-port-to-one-when-accessing-count.patch new file mode 100644 index 00000000000..de77e22a12c --- /dev/null +++ b/queue-4.19/rdma-mlx5-set-local-port-to-one-when-accessing-count.patch @@ -0,0 +1,53 @@ +From cf905036affa52e55311b671ff622b5e4a60571b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 29 Aug 2022 12:02:28 +0300 +Subject: RDMA/mlx5: Set local port to one when accessing counters + +From: Chris Mi + +[ Upstream commit 74b30b3ad5cec95d2647e796d10137438a098bc1 ] + +When accessing Ports Performance Counters Register (PPCNT), +local port must be one if it is Function-Per-Port HCA that +HCA_CAP.num_ports is 1. + +The offending patch can change the local port to other values +when accessing PPCNT after enabling switchdev mode. The following +syndrome will be printed: + + # cat /sys/class/infiniband/rdmap4s0f0/ports/2/counters/* + # dmesg + mlx5_core 0000:04:00.0: mlx5_cmd_check:756:(pid 12450): ACCESS_REG(0x805) op_mod(0x1) failed, status bad parameter(0x3), syndrome (0x1e5585) + +Fix it by setting local port to one for Function-Per-Port HCA. + +Fixes: 210b1f78076f ("IB/mlx5: When not in dual port RoCE mode, use provided port as native") +Reviewed-by: Mark Bloch +Signed-off-by: Chris Mi +Link: https://lore.kernel.org/r/6c5086c295c76211169e58dbd610fb0402360bab.1661763459.git.leonro@nvidia.com +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/mlx5/mad.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c +index cdf6e26ebc87d..fb6dcd12db254 100644 +--- a/drivers/infiniband/hw/mlx5/mad.c ++++ b/drivers/infiniband/hw/mlx5/mad.c +@@ -216,6 +216,12 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u8 port_num, + mdev = dev->mdev; + mdev_port_num = 1; + } ++ if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1) { ++ /* set local port to one for Function-Per-Port HCA. */ ++ mdev = dev->mdev; ++ mdev_port_num = 1; ++ } ++ + /* Declaring support of extended counters */ + if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) { + struct ib_class_port_info cpi = {}; +-- +2.35.1 + diff --git a/queue-4.19/sch_sfb-also-store-skb-len-before-calling-child-enqu.patch b/queue-4.19/sch_sfb-also-store-skb-len-before-calling-child-enqu.patch new file mode 100644 index 00000000000..beef45337ec --- /dev/null +++ b/queue-4.19/sch_sfb-also-store-skb-len-before-calling-child-enqu.patch @@ -0,0 +1,52 @@ +From bc9ef7655bc7425f5e1bbfe04cf5928b35f19688 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 5 Sep 2022 21:21:36 +0200 +Subject: sch_sfb: Also store skb len before calling child enqueue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Toke Høiland-Jørgensen + +[ Upstream commit 2f09707d0c972120bf794cfe0f0c67e2c2ddb252 ] + +Cong Wang noticed that the previous fix for sch_sfb accessing the queued +skb after enqueueing it to a child qdisc was incomplete: the SFB enqueue +function was also calling qdisc_qstats_backlog_inc() after enqueue, which +reads the pkt len from the skb cb field. Fix this by also storing the skb +len, and using the stored value to increment the backlog after enqueueing. + +Fixes: 9efd23297cca ("sch_sfb: Don't assume the skb is still around after enqueueing to child") +Signed-off-by: Toke Høiland-Jørgensen +Acked-by: Cong Wang +Link: https://lore.kernel.org/r/20220905192137.965549-1-toke@toke.dk +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/sched/sch_sfb.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c +index 38cf065156951..a8ef8efa62fc0 100644 +--- a/net/sched/sch_sfb.c ++++ b/net/sched/sch_sfb.c +@@ -285,6 +285,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, + { + + struct sfb_sched_data *q = qdisc_priv(sch); ++ unsigned int len = qdisc_pkt_len(skb); + struct Qdisc *child = q->qdisc; + struct tcf_proto *fl; + struct sfb_skb_cb cb; +@@ -407,7 +408,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, + memcpy(&cb, sfb_skb_cb(skb), sizeof(cb)); + ret = qdisc_enqueue(skb, child, to_free); + if (likely(ret == NET_XMIT_SUCCESS)) { +- qdisc_qstats_backlog_inc(sch, skb); ++ sch->qstats.backlog += len; + sch->q.qlen++; + increment_qlen(&cb, q); + } else if (net_xmit_drop_count(ret)) { +-- +2.35.1 + diff --git a/queue-4.19/sch_sfb-don-t-assume-the-skb-is-still-around-after-e.patch b/queue-4.19/sch_sfb-don-t-assume-the-skb-is-still-around-after-e.patch new file mode 100644 index 00000000000..81f8e6473a8 --- /dev/null +++ b/queue-4.19/sch_sfb-don-t-assume-the-skb-is-still-around-after-e.patch @@ -0,0 +1,82 @@ +From 84547c3d08860435ac7b8239398845497778d823 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 31 Aug 2022 23:52:18 +0200 +Subject: sch_sfb: Don't assume the skb is still around after enqueueing to + child +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Toke Høiland-Jørgensen + +[ Upstream commit 9efd23297cca530bb35e1848665805d3fcdd7889 ] + +The sch_sfb enqueue() routine assumes the skb is still alive after it has +been enqueued into a child qdisc, using the data in the skb cb field in the +increment_qlen() routine after enqueue. However, the skb may in fact have +been freed, causing a use-after-free in this case. In particular, this +happens if sch_cake is used as a child of sfb, and the GSO splitting mode +of CAKE is enabled (in which case the skb will be split into segments and +the original skb freed). + +Fix this by copying the sfb cb data to the stack before enqueueing the skb, +and using this stack copy in increment_qlen() instead of the skb pointer +itself. + +Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-18231 +Fixes: e13e02a3c68d ("net_sched: SFB flow scheduler") +Signed-off-by: Toke Høiland-Jørgensen +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sched/sch_sfb.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c +index 81d205acb1b6a..38cf065156951 100644 +--- a/net/sched/sch_sfb.c ++++ b/net/sched/sch_sfb.c +@@ -139,15 +139,15 @@ static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q) + } + } + +-static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q) ++static void increment_qlen(const struct sfb_skb_cb *cb, struct sfb_sched_data *q) + { + u32 sfbhash; + +- sfbhash = sfb_hash(skb, 0); ++ sfbhash = cb->hashes[0]; + if (sfbhash) + increment_one_qlen(sfbhash, 0, q); + +- sfbhash = sfb_hash(skb, 1); ++ sfbhash = cb->hashes[1]; + if (sfbhash) + increment_one_qlen(sfbhash, 1, q); + } +@@ -287,6 +287,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sfb_sched_data *q = qdisc_priv(sch); + struct Qdisc *child = q->qdisc; + struct tcf_proto *fl; ++ struct sfb_skb_cb cb; + int i; + u32 p_min = ~0; + u32 minqlen = ~0; +@@ -403,11 +404,12 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, + } + + enqueue: ++ memcpy(&cb, sfb_skb_cb(skb), sizeof(cb)); + ret = qdisc_enqueue(skb, child, to_free); + if (likely(ret == NET_XMIT_SUCCESS)) { + qdisc_qstats_backlog_inc(sch, skb); + sch->q.qlen++; +- increment_qlen(skb, q); ++ increment_qlen(&cb, q); + } else if (net_xmit_drop_count(ret)) { + q->stats.childdrop++; + qdisc_qstats_drop(sch); +-- +2.35.1 + diff --git a/queue-4.19/series b/queue-4.19/series index 23ad50205dc..11bec1cc293 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -61,3 +61,13 @@ alsa-usb-audio-fix-an-out-of-bounds-bug-in-__snd_usb_parse_audio_interface.patch kprobes-prohibit-probes-in-gate-area.patch debugfs-add-debugfs_lookup_and_remove.patch scsi-mpt3sas-fix-use-after-free-warning.patch +soc-brcmstb-pm-arm-fix-refcount-leak-and-__iomem-lea.patch +netfilter-br_netfilter-drop-dst-references-before-se.patch +netfilter-nf_conntrack_irc-fix-forged-ip-logic.patch +sch_sfb-don-t-assume-the-skb-is-still-around-after-e.patch +tipc-fix-shift-wrapping-bug-in-map_get.patch +i40e-fix-kernel-crash-during-module-removal.patch +ipv6-sr-fix-out-of-bounds-read-when-setting-hmac-dat.patch +rdma-mlx5-set-local-port-to-one-when-accessing-count.patch +tcp-fix-early-etimedout-after-spurious-non-sack-rto.patch +sch_sfb-also-store-skb-len-before-calling-child-enqu.patch diff --git a/queue-4.19/soc-brcmstb-pm-arm-fix-refcount-leak-and-__iomem-lea.patch b/queue-4.19/soc-brcmstb-pm-arm-fix-refcount-leak-and-__iomem-lea.patch new file mode 100644 index 00000000000..f247dcb67e6 --- /dev/null +++ b/queue-4.19/soc-brcmstb-pm-arm-fix-refcount-leak-and-__iomem-lea.patch @@ -0,0 +1,163 @@ +From 0bb9057e7cf8c5a10f2d7873f4f9b27d8df9ac3e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 7 Jul 2022 09:56:20 +0800 +Subject: soc: brcmstb: pm-arm: Fix refcount leak and __iomem leak bugs + +From: Liang He + +[ Upstream commit 1085f5080647f0c9f357c270a537869191f7f2a1 ] + +In brcmstb_pm_probe(), there are two kinds of leak bugs: + +(1) we need to add of_node_put() when for_each__matching_node() breaks +(2) we need to add iounmap() for each iomap in fail path + +Fixes: 0b741b8234c8 ("soc: bcm: brcmstb: Add support for S2/S3/S5 suspend states (ARM)") +Signed-off-by: Liang He +Link: https://lore.kernel.org/r/20220707015620.306468-1-windhl@126.com +Signed-off-by: Florian Fainelli +Signed-off-by: Sasha Levin +--- + drivers/soc/bcm/brcmstb/pm/pm-arm.c | 50 ++++++++++++++++++++++------- + 1 file changed, 39 insertions(+), 11 deletions(-) + +diff --git a/drivers/soc/bcm/brcmstb/pm/pm-arm.c b/drivers/soc/bcm/brcmstb/pm/pm-arm.c +index f4ad45a1efabe..fd124e0850207 100644 +--- a/drivers/soc/bcm/brcmstb/pm/pm-arm.c ++++ b/drivers/soc/bcm/brcmstb/pm/pm-arm.c +@@ -689,13 +689,14 @@ static int brcmstb_pm_probe(struct platform_device *pdev) + const struct of_device_id *of_id = NULL; + struct device_node *dn; + void __iomem *base; +- int ret, i; ++ int ret, i, s; + + /* AON ctrl registers */ + base = brcmstb_ioremap_match(aon_ctrl_dt_ids, 0, NULL); + if (IS_ERR(base)) { + pr_err("error mapping AON_CTRL\n"); +- return PTR_ERR(base); ++ ret = PTR_ERR(base); ++ goto aon_err; + } + ctrl.aon_ctrl_base = base; + +@@ -705,8 +706,10 @@ static int brcmstb_pm_probe(struct platform_device *pdev) + /* Assume standard offset */ + ctrl.aon_sram = ctrl.aon_ctrl_base + + AON_CTRL_SYSTEM_DATA_RAM_OFS; ++ s = 0; + } else { + ctrl.aon_sram = base; ++ s = 1; + } + + writel_relaxed(0, ctrl.aon_sram + AON_REG_PANIC); +@@ -716,7 +719,8 @@ static int brcmstb_pm_probe(struct platform_device *pdev) + (const void **)&ddr_phy_data); + if (IS_ERR(base)) { + pr_err("error mapping DDR PHY\n"); +- return PTR_ERR(base); ++ ret = PTR_ERR(base); ++ goto ddr_phy_err; + } + ctrl.support_warm_boot = ddr_phy_data->supports_warm_boot; + ctrl.pll_status_offset = ddr_phy_data->pll_status_offset; +@@ -736,17 +740,20 @@ static int brcmstb_pm_probe(struct platform_device *pdev) + for_each_matching_node(dn, ddr_shimphy_dt_ids) { + i = ctrl.num_memc; + if (i >= MAX_NUM_MEMC) { ++ of_node_put(dn); + pr_warn("too many MEMCs (max %d)\n", MAX_NUM_MEMC); + break; + } + + base = of_io_request_and_map(dn, 0, dn->full_name); + if (IS_ERR(base)) { ++ of_node_put(dn); + if (!ctrl.support_warm_boot) + break; + + pr_err("error mapping DDR SHIMPHY %d\n", i); +- return PTR_ERR(base); ++ ret = PTR_ERR(base); ++ goto ddr_shimphy_err; + } + ctrl.memcs[i].ddr_shimphy_base = base; + ctrl.num_memc++; +@@ -757,14 +764,18 @@ static int brcmstb_pm_probe(struct platform_device *pdev) + for_each_matching_node(dn, brcmstb_memc_of_match) { + base = of_iomap(dn, 0); + if (!base) { ++ of_node_put(dn); + pr_err("error mapping DDR Sequencer %d\n", i); +- return -ENOMEM; ++ ret = -ENOMEM; ++ goto brcmstb_memc_err; + } + + of_id = of_match_node(brcmstb_memc_of_match, dn); + if (!of_id) { + iounmap(base); +- return -EINVAL; ++ of_node_put(dn); ++ ret = -EINVAL; ++ goto brcmstb_memc_err; + } + + ddr_seq_data = of_id->data; +@@ -784,21 +795,24 @@ static int brcmstb_pm_probe(struct platform_device *pdev) + dn = of_find_matching_node(NULL, sram_dt_ids); + if (!dn) { + pr_err("SRAM not found\n"); +- return -EINVAL; ++ ret = -EINVAL; ++ goto brcmstb_memc_err; + } + + ret = brcmstb_init_sram(dn); + of_node_put(dn); + if (ret) { + pr_err("error setting up SRAM for PM\n"); +- return ret; ++ goto brcmstb_memc_err; + } + + ctrl.pdev = pdev; + + ctrl.s3_params = kmalloc(sizeof(*ctrl.s3_params), GFP_KERNEL); +- if (!ctrl.s3_params) +- return -ENOMEM; ++ if (!ctrl.s3_params) { ++ ret = -ENOMEM; ++ goto s3_params_err; ++ } + ctrl.s3_params_pa = dma_map_single(&pdev->dev, ctrl.s3_params, + sizeof(*ctrl.s3_params), + DMA_TO_DEVICE); +@@ -818,7 +832,21 @@ static int brcmstb_pm_probe(struct platform_device *pdev) + + out: + kfree(ctrl.s3_params); +- ++s3_params_err: ++ iounmap(ctrl.boot_sram); ++brcmstb_memc_err: ++ for (i--; i >= 0; i--) ++ iounmap(ctrl.memcs[i].ddr_ctrl); ++ddr_shimphy_err: ++ for (i = 0; i < ctrl.num_memc; i++) ++ iounmap(ctrl.memcs[i].ddr_shimphy_base); ++ ++ iounmap(ctrl.memcs[0].ddr_phy_base); ++ddr_phy_err: ++ iounmap(ctrl.aon_ctrl_base); ++ if (s) ++ iounmap(ctrl.aon_sram); ++aon_err: + pr_warn("PM: initialization failed with code %d\n", ret); + + return ret; +-- +2.35.1 + diff --git a/queue-4.19/tcp-fix-early-etimedout-after-spurious-non-sack-rto.patch b/queue-4.19/tcp-fix-early-etimedout-after-spurious-non-sack-rto.patch new file mode 100644 index 00000000000..366a571b337 --- /dev/null +++ b/queue-4.19/tcp-fix-early-etimedout-after-spurious-non-sack-rto.patch @@ -0,0 +1,129 @@ +From 9ba30b863196f559220cfdb6791597b887d219a1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 3 Sep 2022 08:10:23 -0400 +Subject: tcp: fix early ETIMEDOUT after spurious non-SACK RTO + +From: Neal Cardwell + +[ Upstream commit 686dc2db2a0fdc1d34b424ec2c0a735becd8d62b ] + +Fix a bug reported and analyzed by Nagaraj Arankal, where the handling +of a spurious non-SACK RTO could cause a connection to fail to clear +retrans_stamp, causing a later RTO to very prematurely time out the +connection with ETIMEDOUT. + +Here is the buggy scenario, expanding upon Nagaraj Arankal's excellent +report: + +(*1) Send one data packet on a non-SACK connection + +(*2) Because no ACK packet is received, the packet is retransmitted + and we enter CA_Loss; but this retransmission is spurious. + +(*3) The ACK for the original data is received. The transmitted packet + is acknowledged. The TCP timestamp is before the retrans_stamp, + so tcp_may_undo() returns true, and tcp_try_undo_loss() returns + true without changing state to Open (because tcp_is_sack() is + false), and tcp_process_loss() returns without calling + tcp_try_undo_recovery(). Normally after undoing a CA_Loss + episode, tcp_fastretrans_alert() would see that the connection + has returned to CA_Open and fall through and call + tcp_try_to_open(), which would set retrans_stamp to 0. However, + for non-SACK connections we hold the connection in CA_Loss, so do + not fall through to call tcp_try_to_open() and do not set + retrans_stamp to 0. So retrans_stamp is (erroneously) still + non-zero. + + At this point the first "retransmission event" has passed and + been recovered from. Any future retransmission is a completely + new "event". However, retrans_stamp is erroneously still + set. (And we are still in CA_Loss, which is correct.) + +(*4) After 16 minutes (to correspond with tcp_retries2=15), a new data + packet is sent. Note: No data is transmitted between (*3) and + (*4) and we disabled keep alives. + + The socket's timeout SHOULD be calculated from this point in + time, but instead it's calculated from the prior "event" 16 + minutes ago (step (*2)). + +(*5) Because no ACK packet is received, the packet is retransmitted. + +(*6) At the time of the 2nd retransmission, the socket returns + ETIMEDOUT, prematurely, because retrans_stamp is (erroneously) + too far in the past (set at the time of (*2)). + +This commit fixes this bug by ensuring that we reuse in +tcp_try_undo_loss() the same careful logic for non-SACK connections +that we have in tcp_try_undo_recovery(). To avoid duplicating logic, +we factor out that logic into a new +tcp_is_non_sack_preventing_reopen() helper and call that helper from +both undo functions. + +Fixes: da34ac7626b5 ("tcp: only undo on partial ACKs in CA_Loss") +Reported-by: Nagaraj Arankal +Link: https://lore.kernel.org/all/SJ0PR84MB1847BE6C24D274C46A1B9B0EB27A9@SJ0PR84MB1847.NAMPRD84.PROD.OUTLOOK.COM/ +Signed-off-by: Neal Cardwell +Signed-off-by: Yuchung Cheng +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/20220903121023.866900-1-ncardwell.kernel@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 25 ++++++++++++++++++------- + 1 file changed, 18 insertions(+), 7 deletions(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 0be9d5d3c032f..aac5d5b739268 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -2372,6 +2372,21 @@ static inline bool tcp_may_undo(const struct tcp_sock *tp) + return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp)); + } + ++static bool tcp_is_non_sack_preventing_reopen(struct sock *sk) ++{ ++ struct tcp_sock *tp = tcp_sk(sk); ++ ++ if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { ++ /* Hold old state until something *above* high_seq ++ * is ACKed. For Reno it is MUST to prevent false ++ * fast retransmits (RFC2582). SACK TCP is safe. */ ++ if (!tcp_any_retrans_done(sk)) ++ tp->retrans_stamp = 0; ++ return true; ++ } ++ return false; ++} ++ + /* People celebrate: "We love our President!" */ + static bool tcp_try_undo_recovery(struct sock *sk) + { +@@ -2394,14 +2409,8 @@ static bool tcp_try_undo_recovery(struct sock *sk) + } else if (tp->rack.reo_wnd_persist) { + tp->rack.reo_wnd_persist--; + } +- if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { +- /* Hold old state until something *above* high_seq +- * is ACKed. For Reno it is MUST to prevent false +- * fast retransmits (RFC2582). SACK TCP is safe. */ +- if (!tcp_any_retrans_done(sk)) +- tp->retrans_stamp = 0; ++ if (tcp_is_non_sack_preventing_reopen(sk)) + return true; +- } + tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; + return false; +@@ -2437,6 +2446,8 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPSPURIOUSRTOS); + inet_csk(sk)->icsk_retransmits = 0; ++ if (tcp_is_non_sack_preventing_reopen(sk)) ++ return true; + if (frto_undo || tcp_is_sack(tp)) { + tcp_set_ca_state(sk, TCP_CA_Open); + tp->is_sack_reneg = 0; +-- +2.35.1 + diff --git a/queue-4.19/tipc-fix-shift-wrapping-bug-in-map_get.patch b/queue-4.19/tipc-fix-shift-wrapping-bug-in-map_get.patch new file mode 100644 index 00000000000..0a62983caa2 --- /dev/null +++ b/queue-4.19/tipc-fix-shift-wrapping-bug-in-map_get.patch @@ -0,0 +1,36 @@ +From 52daf468f6630f0d82c805fec836a250a8a02538 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 31 Aug 2022 17:47:56 +0300 +Subject: tipc: fix shift wrapping bug in map_get() + +From: Dan Carpenter + +[ Upstream commit e2b224abd9bf45dcb55750479fc35970725a430b ] + +There is a shift wrapping bug in this code so anything thing above +31 will return false. + +Fixes: 35c55c9877f8 ("tipc: add neighbor monitoring framework") +Signed-off-by: Dan Carpenter +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/tipc/monitor.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c +index 7b6c1c5c30dc8..0268857a3cfed 100644 +--- a/net/tipc/monitor.c ++++ b/net/tipc/monitor.c +@@ -130,7 +130,7 @@ static void map_set(u64 *up_map, int i, unsigned int v) + + static int map_get(u64 up_map, int i) + { +- return (up_map & (1 << i)) >> i; ++ return (up_map & (1ULL << i)) >> i; + } + + static struct tipc_peer *peer_prev(struct tipc_peer *peer) +-- +2.35.1 +