From: Greg Kroah-Hartman Date: Fri, 10 Dec 2021 16:46:39 +0000 (+0100) Subject: 5.10-stable patches X-Git-Tag: v4.4.295~48 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5a7081a63e191e8be54fffed2116aadc1096bf6a;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: devlink-fix-netns-refcount-leak-in-devlink_nl_cmd_reload.patch iavf-fix-reporting-when-setting-descriptor-count.patch iavf-restore-msi-state-on-reset.patch ib-hfi1-correct-guard-on-eager-buffer-deallocation.patch net-sched-fq_pie-prevent-dismantle-issue.patch netfilter-conntrack-annotate-data-races-around-ct-timeout.patch seg6-fix-the-iif-in-the-ipv6-socket-control-block.patch udp-using-datalen-to-cap-max-gso-segments.patch --- diff --git a/queue-5.10/devlink-fix-netns-refcount-leak-in-devlink_nl_cmd_reload.patch b/queue-5.10/devlink-fix-netns-refcount-leak-in-devlink_nl_cmd_reload.patch new file mode 100644 index 00000000000..5f5b3f33bb1 --- /dev/null +++ b/queue-5.10/devlink-fix-netns-refcount-leak-in-devlink_nl_cmd_reload.patch @@ -0,0 +1,63 @@ +From 4dbb0dad8e63fcd0b5a117c2861d2abe7ff5f186 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Sun, 5 Dec 2021 11:28:22 -0800 +Subject: devlink: fix netns refcount leak in devlink_nl_cmd_reload() + +From: Eric Dumazet + +commit 4dbb0dad8e63fcd0b5a117c2861d2abe7ff5f186 upstream. + +While preparing my patch series adding netns refcount tracking, +I spotted bugs in devlink_nl_cmd_reload() + +Some error paths forgot to release a refcount on a netns. + +To fix this, we can reduce the scope of get_net()/put_net() +section around the call to devlink_reload(). + +Fixes: ccdf07219da6 ("devlink: Add reload action option to devlink reload command") +Fixes: dc64cc7c6310 ("devlink: Add devlink reload limit option") +Signed-off-by: Eric Dumazet +Cc: Moshe Shemesh +Cc: Jacob Keller +Cc: Jiri Pirko +Reviewed-by: Leon Romanovsky +Link: https://lore.kernel.org/r/20211205192822.1741045-1-eric.dumazet@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/core/devlink.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +--- a/net/core/devlink.c ++++ b/net/core/devlink.c +@@ -3265,14 +3265,6 @@ static int devlink_nl_cmd_reload(struct + return err; + } + +- if (info->attrs[DEVLINK_ATTR_NETNS_PID] || +- info->attrs[DEVLINK_ATTR_NETNS_FD] || +- info->attrs[DEVLINK_ATTR_NETNS_ID]) { +- dest_net = devlink_netns_get(skb, info); +- if (IS_ERR(dest_net)) +- return PTR_ERR(dest_net); +- } +- + if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION]) + action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]); + else +@@ -3315,6 +3307,14 @@ static int devlink_nl_cmd_reload(struct + return -EINVAL; + } + } ++ if (info->attrs[DEVLINK_ATTR_NETNS_PID] || ++ info->attrs[DEVLINK_ATTR_NETNS_FD] || ++ info->attrs[DEVLINK_ATTR_NETNS_ID]) { ++ dest_net = devlink_netns_get(skb, info); ++ if (IS_ERR(dest_net)) ++ return PTR_ERR(dest_net); ++ } ++ + err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack); + + if (dest_net) diff --git a/queue-5.10/iavf-fix-reporting-when-setting-descriptor-count.patch b/queue-5.10/iavf-fix-reporting-when-setting-descriptor-count.patch new file mode 100644 index 00000000000..b51c5c793ea --- /dev/null +++ b/queue-5.10/iavf-fix-reporting-when-setting-descriptor-count.patch @@ -0,0 +1,93 @@ +From 1a1aa356ddf3f16539f5962c01c5f702686dfc15 Mon Sep 17 00:00:00 2001 +From: Michal Maloszewski +Date: Tue, 26 Oct 2021 12:59:09 +0000 +Subject: iavf: Fix reporting when setting descriptor count + +From: Michal Maloszewski + +commit 1a1aa356ddf3f16539f5962c01c5f702686dfc15 upstream. + +iavf_set_ringparams doesn't communicate to the user that + +1. The user requested descriptor count is out of range. Instead it + just quietly sets descriptors to the "clamped" value and calls it + done. This makes it look an invalid value was successfully set as + the descriptor count when this isn't actually true. + +2. The user provided descriptor count needs to be inflated for alignment + reasons. + +This behavior is confusing. The ice driver has already addressed this +by rejecting invalid values for descriptor count and +messaging for alignment adjustments. +Do the same thing here by adding the error and info messages. + +Fixes: fbb7ddfef253 ("i40evf: core ethtool functionality") +Signed-off-by: Anirudh Venkataramanan +Signed-off-by: Michal Maloszewski +Tested-by: Konrad Jankowski +Signed-off-by: Tony Nguyen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 45 ++++++++++++++++++------- + 1 file changed, 33 insertions(+), 12 deletions(-) + +--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +@@ -612,23 +612,44 @@ static int iavf_set_ringparam(struct net + if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) + return -EINVAL; + +- new_tx_count = clamp_t(u32, ring->tx_pending, +- IAVF_MIN_TXD, +- IAVF_MAX_TXD); +- new_tx_count = ALIGN(new_tx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE); +- +- new_rx_count = clamp_t(u32, ring->rx_pending, +- IAVF_MIN_RXD, +- IAVF_MAX_RXD); +- new_rx_count = ALIGN(new_rx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE); ++ if (ring->tx_pending > IAVF_MAX_TXD || ++ ring->tx_pending < IAVF_MIN_TXD || ++ ring->rx_pending > IAVF_MAX_RXD || ++ ring->rx_pending < IAVF_MIN_RXD) { ++ netdev_err(netdev, "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d] (increment %d)\n", ++ ring->tx_pending, ring->rx_pending, IAVF_MIN_TXD, ++ IAVF_MAX_RXD, IAVF_REQ_DESCRIPTOR_MULTIPLE); ++ return -EINVAL; ++ } ++ ++ new_tx_count = ALIGN(ring->tx_pending, IAVF_REQ_DESCRIPTOR_MULTIPLE); ++ if (new_tx_count != ring->tx_pending) ++ netdev_info(netdev, "Requested Tx descriptor count rounded up to %d\n", ++ new_tx_count); ++ ++ new_rx_count = ALIGN(ring->rx_pending, IAVF_REQ_DESCRIPTOR_MULTIPLE); ++ if (new_rx_count != ring->rx_pending) ++ netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n", ++ new_rx_count); + + /* if nothing to do return success */ + if ((new_tx_count == adapter->tx_desc_count) && +- (new_rx_count == adapter->rx_desc_count)) ++ (new_rx_count == adapter->rx_desc_count)) { ++ netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n"); + return 0; ++ } ++ ++ if (new_tx_count != adapter->tx_desc_count) { ++ netdev_dbg(netdev, "Changing Tx descriptor count from %d to %d\n", ++ adapter->tx_desc_count, new_tx_count); ++ adapter->tx_desc_count = new_tx_count; ++ } + +- adapter->tx_desc_count = new_tx_count; +- adapter->rx_desc_count = new_rx_count; ++ if (new_rx_count != adapter->rx_desc_count) { ++ netdev_dbg(netdev, "Changing Rx descriptor count from %d to %d\n", ++ adapter->rx_desc_count, new_rx_count); ++ adapter->rx_desc_count = new_rx_count; ++ } + + if (netif_running(netdev)) { + adapter->flags |= IAVF_FLAG_RESET_NEEDED; diff --git a/queue-5.10/iavf-restore-msi-state-on-reset.patch b/queue-5.10/iavf-restore-msi-state-on-reset.patch new file mode 100644 index 00000000000..ca1fa733b13 --- /dev/null +++ b/queue-5.10/iavf-restore-msi-state-on-reset.patch @@ -0,0 +1,37 @@ +From 7e4dcc13965c57869684d57a1dc6dd7be589488c Mon Sep 17 00:00:00 2001 +From: Mitch Williams +Date: Fri, 4 Jun 2021 09:53:28 -0700 +Subject: iavf: restore MSI state on reset + +From: Mitch Williams + +commit 7e4dcc13965c57869684d57a1dc6dd7be589488c upstream. + +If the PF experiences an FLR, the VF's MSI and MSI-X configuration will +be conveniently and silently removed in the process. When this happens, +reset recovery will appear to complete normally but no traffic will +pass. The netdev watchdog will helpfully notify everyone of this issue. + +To prevent such public embarrassment, restore MSI configuration at every +reset. For normal resets, this will do no harm, but for VF resets +resulting from a PF FLR, this will keep the VF working. + +Fixes: 5eae00c57f5e ("i40evf: main driver core") +Signed-off-by: Mitch Williams +Tested-by: George Kuruvinakunnel +Signed-off-by: Tony Nguyen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/iavf/iavf_main.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/intel/iavf/iavf_main.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c +@@ -2139,6 +2139,7 @@ static void iavf_reset_task(struct work_ + } + + pci_set_master(adapter->pdev); ++ pci_restore_msi_state(adapter->pdev); + + if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) { + dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n", diff --git a/queue-5.10/ib-hfi1-correct-guard-on-eager-buffer-deallocation.patch b/queue-5.10/ib-hfi1-correct-guard-on-eager-buffer-deallocation.patch new file mode 100644 index 00000000000..cdedc929ecb --- /dev/null +++ b/queue-5.10/ib-hfi1-correct-guard-on-eager-buffer-deallocation.patch @@ -0,0 +1,35 @@ +From 9292f8f9a2ac42eb320bced7153aa2e63d8cc13a Mon Sep 17 00:00:00 2001 +From: Mike Marciniszyn +Date: Mon, 29 Nov 2021 14:19:52 -0500 +Subject: IB/hfi1: Correct guard on eager buffer deallocation + +From: Mike Marciniszyn + +commit 9292f8f9a2ac42eb320bced7153aa2e63d8cc13a upstream. + +The code tests the dma address which legitimately can be 0. + +The code should test the kernel logical address to avoid leaking eager +buffer allocations that happen to map to a dma address of 0. + +Fixes: 60368186fd85 ("IB/hfi1: Fix user-space buffers mapping with IOMMU enabled") +Link: https://lore.kernel.org/r/20211129191952.101968.17137.stgit@awfm-01.cornelisnetworks.com +Signed-off-by: Mike Marciniszyn +Signed-off-by: Dennis Dalessandro +Signed-off-by: Jason Gunthorpe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/hfi1/init.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/infiniband/hw/hfi1/init.c ++++ b/drivers/infiniband/hw/hfi1/init.c +@@ -1148,7 +1148,7 @@ void hfi1_free_ctxtdata(struct hfi1_devd + rcd->egrbufs.rcvtids = NULL; + + for (e = 0; e < rcd->egrbufs.alloced; e++) { +- if (rcd->egrbufs.buffers[e].dma) ++ if (rcd->egrbufs.buffers[e].addr) + dma_free_coherent(&dd->pcidev->dev, + rcd->egrbufs.buffers[e].len, + rcd->egrbufs.buffers[e].addr, diff --git a/queue-5.10/net-sched-fq_pie-prevent-dismantle-issue.patch b/queue-5.10/net-sched-fq_pie-prevent-dismantle-issue.patch new file mode 100644 index 00000000000..7e0883e2de7 --- /dev/null +++ b/queue-5.10/net-sched-fq_pie-prevent-dismantle-issue.patch @@ -0,0 +1,96 @@ +From 61c2402665f1e10c5742033fce18392e369931d7 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Thu, 9 Dec 2021 00:49:37 -0800 +Subject: net/sched: fq_pie: prevent dismantle issue + +From: Eric Dumazet + +commit 61c2402665f1e10c5742033fce18392e369931d7 upstream. + +For some reason, fq_pie_destroy() did not copy +working code from pie_destroy() and other qdiscs, +thus causing elusive bug. + +Before calling del_timer_sync(&q->adapt_timer), +we need to ensure timer will not rearm itself. + +rcu: INFO: rcu_preempt self-detected stall on CPU +rcu: 0-....: (4416 ticks this GP) idle=60d/1/0x4000000000000000 softirq=10433/10434 fqs=2579 + (t=10501 jiffies g=13085 q=3989) +NMI backtrace for cpu 0 +CPU: 0 PID: 13 Comm: ksoftirqd/0 Not tainted 5.16.0-rc4-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 + nmi_cpu_backtrace.cold+0x47/0x144 lib/nmi_backtrace.c:111 + nmi_trigger_cpumask_backtrace+0x1b3/0x230 lib/nmi_backtrace.c:62 + trigger_single_cpu_backtrace include/linux/nmi.h:164 [inline] + rcu_dump_cpu_stacks+0x25e/0x3f0 kernel/rcu/tree_stall.h:343 + print_cpu_stall kernel/rcu/tree_stall.h:627 [inline] + check_cpu_stall kernel/rcu/tree_stall.h:711 [inline] + rcu_pending kernel/rcu/tree.c:3878 [inline] + rcu_sched_clock_irq.cold+0x9d/0x746 kernel/rcu/tree.c:2597 + update_process_times+0x16d/0x200 kernel/time/timer.c:1785 + tick_sched_handle+0x9b/0x180 kernel/time/tick-sched.c:226 + tick_sched_timer+0x1b0/0x2d0 kernel/time/tick-sched.c:1428 + __run_hrtimer kernel/time/hrtimer.c:1685 [inline] + __hrtimer_run_queues+0x1c0/0xe50 kernel/time/hrtimer.c:1749 + hrtimer_interrupt+0x31c/0x790 kernel/time/hrtimer.c:1811 + local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1086 [inline] + __sysvec_apic_timer_interrupt+0x146/0x530 arch/x86/kernel/apic/apic.c:1103 + sysvec_apic_timer_interrupt+0x8e/0xc0 arch/x86/kernel/apic/apic.c:1097 + + + asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:638 +RIP: 0010:write_comp_data kernel/kcov.c:221 [inline] +RIP: 0010:__sanitizer_cov_trace_const_cmp1+0x1d/0x80 kernel/kcov.c:273 +Code: 54 c8 20 48 89 10 c3 66 0f 1f 44 00 00 53 41 89 fb 41 89 f1 bf 03 00 00 00 65 48 8b 0c 25 40 70 02 00 48 89 ce 4c 8b 54 24 08 4e f7 ff ff 84 c0 74 51 48 8b 81 88 15 00 00 44 8b 81 84 15 00 +RSP: 0018:ffffc90000d27b28 EFLAGS: 00000246 +RAX: 0000000000000000 RBX: ffff888064bf1bf0 RCX: ffff888011928000 +RDX: ffff888011928000 RSI: ffff888011928000 RDI: 0000000000000003 +RBP: ffff888064bf1c28 R08: 0000000000000000 R09: 0000000000000000 +R10: ffffffff875d8295 R11: 0000000000000000 R12: 0000000000000000 +R13: ffff8880783dd300 R14: 0000000000000000 R15: 0000000000000000 + pie_calculate_probability+0x405/0x7c0 net/sched/sch_pie.c:418 + fq_pie_timer+0x170/0x2a0 net/sched/sch_fq_pie.c:383 + call_timer_fn+0x1a5/0x6b0 kernel/time/timer.c:1421 + expire_timers kernel/time/timer.c:1466 [inline] + __run_timers.part.0+0x675/0xa20 kernel/time/timer.c:1734 + __run_timers kernel/time/timer.c:1715 [inline] + run_timer_softirq+0xb3/0x1d0 kernel/time/timer.c:1747 + __do_softirq+0x29b/0x9c2 kernel/softirq.c:558 + run_ksoftirqd kernel/softirq.c:921 [inline] + run_ksoftirqd+0x2d/0x60 kernel/softirq.c:913 + smpboot_thread_fn+0x645/0x9c0 kernel/smpboot.c:164 + kthread+0x405/0x4f0 kernel/kthread.c:327 + ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 + + +Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Cc: Mohit P. Tahiliani +Cc: Sachin D. Patil +Cc: V. Saicharan +Cc: Mohit Bhasi +Cc: Leslie Monis +Cc: Gautam Ramakrishnan +Link: https://lore.kernel.org/r/20211209084937.3500020-1-eric.dumazet@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_fq_pie.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/sched/sch_fq_pie.c ++++ b/net/sched/sch_fq_pie.c +@@ -531,6 +531,7 @@ static void fq_pie_destroy(struct Qdisc + struct fq_pie_sched_data *q = qdisc_priv(sch); + + tcf_block_put(q->block); ++ q->p_params.tupdate = 0; + del_timer_sync(&q->adapt_timer); + kvfree(q->flows); + } diff --git a/queue-5.10/netfilter-conntrack-annotate-data-races-around-ct-timeout.patch b/queue-5.10/netfilter-conntrack-annotate-data-races-around-ct-timeout.patch new file mode 100644 index 00000000000..b7501e668c0 --- /dev/null +++ b/queue-5.10/netfilter-conntrack-annotate-data-races-around-ct-timeout.patch @@ -0,0 +1,187 @@ +From 802a7dc5cf1bef06f7b290ce76d478138408d6b1 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 7 Dec 2021 10:03:23 -0800 +Subject: netfilter: conntrack: annotate data-races around ct->timeout + +From: Eric Dumazet + +commit 802a7dc5cf1bef06f7b290ce76d478138408d6b1 upstream. + +(struct nf_conn)->timeout can be read/written locklessly, +add READ_ONCE()/WRITE_ONCE() to prevent load/store tearing. + +BUG: KCSAN: data-race in __nf_conntrack_alloc / __nf_conntrack_find_get + +write to 0xffff888132e78c08 of 4 bytes by task 6029 on cpu 0: + __nf_conntrack_alloc+0x158/0x280 net/netfilter/nf_conntrack_core.c:1563 + init_conntrack+0x1da/0xb30 net/netfilter/nf_conntrack_core.c:1635 + resolve_normal_ct+0x502/0x610 net/netfilter/nf_conntrack_core.c:1746 + nf_conntrack_in+0x1c5/0x88f net/netfilter/nf_conntrack_core.c:1901 + ipv6_conntrack_local+0x19/0x20 net/netfilter/nf_conntrack_proto.c:414 + nf_hook_entry_hookfn include/linux/netfilter.h:142 [inline] + nf_hook_slow+0x72/0x170 net/netfilter/core.c:619 + nf_hook include/linux/netfilter.h:262 [inline] + NF_HOOK include/linux/netfilter.h:305 [inline] + ip6_xmit+0xa3a/0xa60 net/ipv6/ip6_output.c:324 + inet6_csk_xmit+0x1a2/0x1e0 net/ipv6/inet6_connection_sock.c:135 + __tcp_transmit_skb+0x132a/0x1840 net/ipv4/tcp_output.c:1402 + tcp_transmit_skb net/ipv4/tcp_output.c:1420 [inline] + tcp_write_xmit+0x1450/0x4460 net/ipv4/tcp_output.c:2680 + __tcp_push_pending_frames+0x68/0x1c0 net/ipv4/tcp_output.c:2864 + tcp_push_pending_frames include/net/tcp.h:1897 [inline] + tcp_data_snd_check+0x62/0x2e0 net/ipv4/tcp_input.c:5452 + tcp_rcv_established+0x880/0x10e0 net/ipv4/tcp_input.c:5947 + tcp_v6_do_rcv+0x36e/0xa50 net/ipv6/tcp_ipv6.c:1521 + sk_backlog_rcv include/net/sock.h:1030 [inline] + __release_sock+0xf2/0x270 net/core/sock.c:2768 + release_sock+0x40/0x110 net/core/sock.c:3300 + sk_stream_wait_memory+0x435/0x700 net/core/stream.c:145 + tcp_sendmsg_locked+0xb85/0x25a0 net/ipv4/tcp.c:1402 + tcp_sendmsg+0x2c/0x40 net/ipv4/tcp.c:1440 + inet6_sendmsg+0x5f/0x80 net/ipv6/af_inet6.c:644 + sock_sendmsg_nosec net/socket.c:704 [inline] + sock_sendmsg net/socket.c:724 [inline] + __sys_sendto+0x21e/0x2c0 net/socket.c:2036 + __do_sys_sendto net/socket.c:2048 [inline] + __se_sys_sendto net/socket.c:2044 [inline] + __x64_sys_sendto+0x74/0x90 net/socket.c:2044 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x44/0xae + +read to 0xffff888132e78c08 of 4 bytes by task 17446 on cpu 1: + nf_ct_is_expired include/net/netfilter/nf_conntrack.h:286 [inline] + ____nf_conntrack_find net/netfilter/nf_conntrack_core.c:776 [inline] + __nf_conntrack_find_get+0x1c7/0xac0 net/netfilter/nf_conntrack_core.c:807 + resolve_normal_ct+0x273/0x610 net/netfilter/nf_conntrack_core.c:1734 + nf_conntrack_in+0x1c5/0x88f net/netfilter/nf_conntrack_core.c:1901 + ipv6_conntrack_local+0x19/0x20 net/netfilter/nf_conntrack_proto.c:414 + nf_hook_entry_hookfn include/linux/netfilter.h:142 [inline] + nf_hook_slow+0x72/0x170 net/netfilter/core.c:619 + nf_hook include/linux/netfilter.h:262 [inline] + NF_HOOK include/linux/netfilter.h:305 [inline] + ip6_xmit+0xa3a/0xa60 net/ipv6/ip6_output.c:324 + inet6_csk_xmit+0x1a2/0x1e0 net/ipv6/inet6_connection_sock.c:135 + __tcp_transmit_skb+0x132a/0x1840 net/ipv4/tcp_output.c:1402 + __tcp_send_ack+0x1fd/0x300 net/ipv4/tcp_output.c:3956 + tcp_send_ack+0x23/0x30 net/ipv4/tcp_output.c:3962 + __tcp_ack_snd_check+0x2d8/0x510 net/ipv4/tcp_input.c:5478 + tcp_ack_snd_check net/ipv4/tcp_input.c:5523 [inline] + tcp_rcv_established+0x8c2/0x10e0 net/ipv4/tcp_input.c:5948 + tcp_v6_do_rcv+0x36e/0xa50 net/ipv6/tcp_ipv6.c:1521 + sk_backlog_rcv include/net/sock.h:1030 [inline] + __release_sock+0xf2/0x270 net/core/sock.c:2768 + release_sock+0x40/0x110 net/core/sock.c:3300 + tcp_sendpage+0x94/0xb0 net/ipv4/tcp.c:1114 + inet_sendpage+0x7f/0xc0 net/ipv4/af_inet.c:833 + rds_tcp_xmit+0x376/0x5f0 net/rds/tcp_send.c:118 + rds_send_xmit+0xbed/0x1500 net/rds/send.c:367 + rds_send_worker+0x43/0x200 net/rds/threads.c:200 + process_one_work+0x3fc/0x980 kernel/workqueue.c:2298 + worker_thread+0x616/0xa70 kernel/workqueue.c:2445 + kthread+0x2c7/0x2e0 kernel/kthread.c:327 + ret_from_fork+0x1f/0x30 + +value changed: 0x00027cc2 -> 0x00000000 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 1 PID: 17446 Comm: kworker/u4:5 Tainted: G W 5.16.0-rc4-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Workqueue: krdsd rds_send_worker + +Note: I chose an arbitrary commit for the Fixes: tag, +because I do not think we need to backport this fix to very old kernels. + +Fixes: e37542ba111f ("netfilter: conntrack: avoid possible false sharing") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + include/net/netfilter/nf_conntrack.h | 6 +++--- + net/netfilter/nf_conntrack_core.c | 6 +++--- + net/netfilter/nf_conntrack_netlink.c | 2 +- + net/netfilter/nf_flow_table_core.c | 4 ++-- + 4 files changed, 9 insertions(+), 9 deletions(-) + +--- a/include/net/netfilter/nf_conntrack.h ++++ b/include/net/netfilter/nf_conntrack.h +@@ -262,14 +262,14 @@ static inline bool nf_is_loopback_packet + /* jiffies until ct expires, 0 if already expired */ + static inline unsigned long nf_ct_expires(const struct nf_conn *ct) + { +- s32 timeout = ct->timeout - nfct_time_stamp; ++ s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; + + return timeout > 0 ? timeout : 0; + } + + static inline bool nf_ct_is_expired(const struct nf_conn *ct) + { +- return (__s32)(ct->timeout - nfct_time_stamp) <= 0; ++ return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0; + } + + /* use after obtaining a reference count */ +@@ -288,7 +288,7 @@ static inline bool nf_ct_should_gc(const + static inline void nf_ct_offload_timeout(struct nf_conn *ct) + { + if (nf_ct_expires(ct) < NF_CT_DAY / 2) +- ct->timeout = nfct_time_stamp + NF_CT_DAY; ++ WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY); + } + + struct kernel_param; +--- a/net/netfilter/nf_conntrack_core.c ++++ b/net/netfilter/nf_conntrack_core.c +@@ -660,7 +660,7 @@ bool nf_ct_delete(struct nf_conn *ct, u3 + + tstamp = nf_conn_tstamp_find(ct); + if (tstamp) { +- s32 timeout = ct->timeout - nfct_time_stamp; ++ s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; + + tstamp->stop = ktime_get_real_ns(); + if (timeout < 0) +@@ -980,7 +980,7 @@ static int nf_ct_resolve_clash_harder(st + } + + /* We want the clashing entry to go away real soon: 1 second timeout. */ +- loser_ct->timeout = nfct_time_stamp + HZ; ++ WRITE_ONCE(loser_ct->timeout, nfct_time_stamp + HZ); + + /* IPS_NAT_CLASH removes the entry automatically on the first + * reply. Also prevents UDP tracker from moving the entry to +@@ -1487,7 +1487,7 @@ __nf_conntrack_alloc(struct net *net, + /* save hash for reusing when confirming */ + *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; + ct->status = 0; +- ct->timeout = 0; ++ WRITE_ONCE(ct->timeout, 0); + write_pnet(&ct->ct_net, net); + memset(&ct->__nfct_init_offset, 0, + offsetof(struct nf_conn, proto) - +--- a/net/netfilter/nf_conntrack_netlink.c ++++ b/net/netfilter/nf_conntrack_netlink.c +@@ -1971,7 +1971,7 @@ static int ctnetlink_change_timeout(stru + + if (timeout > INT_MAX) + timeout = INT_MAX; +- ct->timeout = nfct_time_stamp + (u32)timeout; ++ WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout); + + if (test_bit(IPS_DYING_BIT, &ct->status)) + return -ETIME; +--- a/net/netfilter/nf_flow_table_core.c ++++ b/net/netfilter/nf_flow_table_core.c +@@ -151,8 +151,8 @@ static void flow_offload_fixup_ct_timeou + else + return; + +- if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout) +- ct->timeout = nfct_time_stamp + timeout; ++ if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout) ++ WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout); + } + + static void flow_offload_fixup_ct_state(struct nf_conn *ct) diff --git a/queue-5.10/seg6-fix-the-iif-in-the-ipv6-socket-control-block.patch b/queue-5.10/seg6-fix-the-iif-in-the-ipv6-socket-control-block.patch new file mode 100644 index 00000000000..ee1f706dc6c --- /dev/null +++ b/queue-5.10/seg6-fix-the-iif-in-the-ipv6-socket-control-block.patch @@ -0,0 +1,63 @@ +From ae68d93354e5bf5191ee673982251864ea24dd5c Mon Sep 17 00:00:00 2001 +From: Andrea Mayer +Date: Wed, 8 Dec 2021 20:54:09 +0100 +Subject: seg6: fix the iif in the IPv6 socket control block + +From: Andrea Mayer + +commit ae68d93354e5bf5191ee673982251864ea24dd5c upstream. + +When an IPv4 packet is received, the ip_rcv_core(...) sets the receiving +interface index into the IPv4 socket control block (v5.16-rc4, +net/ipv4/ip_input.c line 510): + + IPCB(skb)->iif = skb->skb_iif; + +If that IPv4 packet is meant to be encapsulated in an outer IPv6+SRH +header, the seg6_do_srh_encap(...) performs the required encapsulation. +In this case, the seg6_do_srh_encap function clears the IPv6 socket control +block (v5.16-rc4 net/ipv6/seg6_iptunnel.c line 163): + + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + +The memset(...) was introduced in commit ef489749aae5 ("ipv6: sr: clear +IP6CB(skb) on SRH ip4ip6 encapsulation") a long time ago (2019-01-29). + +Since the IPv6 socket control block and the IPv4 socket control block share +the same memory area (skb->cb), the receiving interface index info is lost +(IP6CB(skb)->iif is set to zero). + +As a side effect, that condition triggers a NULL pointer dereference if +commit 0857d6f8c759 ("ipv6: When forwarding count rx stats on the orig +netdev") is applied. + +To fix that issue, we set the IP6CB(skb)->iif with the index of the +receiving interface once again. + +Fixes: ef489749aae5 ("ipv6: sr: clear IP6CB(skb) on SRH ip4ip6 encapsulation") +Signed-off-by: Andrea Mayer +Reviewed-by: David Ahern +Link: https://lore.kernel.org/r/20211208195409.12169-1-andrea.mayer@uniroma2.it +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/seg6_iptunnel.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/net/ipv6/seg6_iptunnel.c ++++ b/net/ipv6/seg6_iptunnel.c +@@ -160,6 +160,14 @@ int seg6_do_srh_encap(struct sk_buff *sk + hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); + + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); ++ ++ /* the control block has been erased, so we have to set the ++ * iif once again. ++ * We read the receiving interface index directly from the ++ * skb->skb_iif as it is done in the IPv4 receiving path (i.e.: ++ * ip_rcv_core(...)). ++ */ ++ IP6CB(skb)->iif = skb->skb_iif; + } + + hdr->nexthdr = NEXTHDR_ROUTING; diff --git a/queue-5.10/series b/queue-5.10/series index 8f39142175c..a0ef09f53bf 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -32,3 +32,11 @@ bpf-fix-the-off-by-two-error-in-range-markings.patch ice-ignore-dropped-packets-during-init.patch bonding-make-tx_rebalance_counter-an-atomic.patch nfp-fix-memory-leak-in-nfp_cpp_area_cache_add.patch +seg6-fix-the-iif-in-the-ipv6-socket-control-block.patch +udp-using-datalen-to-cap-max-gso-segments.patch +netfilter-conntrack-annotate-data-races-around-ct-timeout.patch +iavf-restore-msi-state-on-reset.patch +iavf-fix-reporting-when-setting-descriptor-count.patch +ib-hfi1-correct-guard-on-eager-buffer-deallocation.patch +devlink-fix-netns-refcount-leak-in-devlink_nl_cmd_reload.patch +net-sched-fq_pie-prevent-dismantle-issue.patch diff --git a/queue-5.10/udp-using-datalen-to-cap-max-gso-segments.patch b/queue-5.10/udp-using-datalen-to-cap-max-gso-segments.patch new file mode 100644 index 00000000000..7168069b177 --- /dev/null +++ b/queue-5.10/udp-using-datalen-to-cap-max-gso-segments.patch @@ -0,0 +1,41 @@ +From 158390e45612ef0fde160af0826f1740c36daf21 Mon Sep 17 00:00:00 2001 +From: Jianguo Wu +Date: Wed, 8 Dec 2021 18:03:33 +0800 +Subject: udp: using datalen to cap max gso segments + +From: Jianguo Wu + +commit 158390e45612ef0fde160af0826f1740c36daf21 upstream. + +The max number of UDP gso segments is intended to cap to UDP_MAX_SEGMENTS, +this is checked in udp_send_skb(): + + if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + kfree_skb(skb); + return -EINVAL; + } + +skb->len contains network and transport header len here, we should use +only data len instead. + +Fixes: bec1f6f69736 ("udp: generate gso with UDP_SEGMENT") +Signed-off-by: Jianguo Wu +Reviewed-by: Willem de Bruijn +Link: https://lore.kernel.org/r/900742e5-81fb-30dc-6e0b-375c6cdd7982@163.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/udp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -899,7 +899,7 @@ static int udp_send_skb(struct sk_buff * + kfree_skb(skb); + return -EINVAL; + } +- if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { ++ if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { + kfree_skb(skb); + return -EINVAL; + }