5.10-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 10 Dec 2021 16:46:39 +0000 (17:46 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 10 Dec 2021 16:46:39 +0000 (17:46 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 10 Dec 2021 16:46:39 +0000 (17:46 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 10 Dec 2021 16:46:39 +0000 (17:46 +0100)
diff --git a/queue-5.10/devlink-fix-netns-refcount-leak-in-devlink_nl_cmd_reload.patch b/queue-5.10/devlink-fix-netns-refcount-leak-in-devlink_nl_cmd_reload.patch

new file mode 100644 (file)

index 0000000..5f5b3f3
--- /dev/null
+++ b/queue-5.10/devlink-fix-netns-refcount-leak-in-devlink_nl_cmd_reload.patch
@@ -0,0 +1,63 @@
+From 4dbb0dad8e63fcd0b5a117c2861d2abe7ff5f186 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Sun, 5 Dec 2021 11:28:22 -0800
+Subject: devlink: fix netns refcount leak in devlink_nl_cmd_reload()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 4dbb0dad8e63fcd0b5a117c2861d2abe7ff5f186 upstream.
+
+While preparing my patch series adding netns refcount tracking,
+I spotted bugs in devlink_nl_cmd_reload()
+
+Some error paths forgot to release a refcount on a netns.
+
+To fix this, we can reduce the scope of get_net()/put_net()
+section around the call to devlink_reload().
+
+Fixes: ccdf07219da6 ("devlink: Add reload action option to devlink reload command")
+Fixes: dc64cc7c6310 ("devlink: Add devlink reload limit option")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Moshe Shemesh <moshe@mellanox.com>
+Cc: Jacob Keller <jacob.e.keller@intel.com>
+Cc: Jiri Pirko <jiri@nvidia.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Link: https://lore.kernel.org/r/20211205192822.1741045-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/devlink.c |   16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+--- a/net/core/devlink.c
++++ b/net/core/devlink.c
+@@ -3265,14 +3265,6 @@ static int devlink_nl_cmd_reload(struct
+               return err;
+       }
+ 
+-      if (info->attrs[DEVLINK_ATTR_NETNS_PID] ||
+-          info->attrs[DEVLINK_ATTR_NETNS_FD] ||
+-          info->attrs[DEVLINK_ATTR_NETNS_ID]) {
+-              dest_net = devlink_netns_get(skb, info);
+-              if (IS_ERR(dest_net))
+-                      return PTR_ERR(dest_net);
+-      }
+-
+       if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION])
+               action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]);
+       else
+@@ -3315,6 +3307,14 @@ static int devlink_nl_cmd_reload(struct
+                       return -EINVAL;
+               }
+       }
++      if (info->attrs[DEVLINK_ATTR_NETNS_PID] ||
++          info->attrs[DEVLINK_ATTR_NETNS_FD] ||
++          info->attrs[DEVLINK_ATTR_NETNS_ID]) {
++              dest_net = devlink_netns_get(skb, info);
++              if (IS_ERR(dest_net))
++                      return PTR_ERR(dest_net);
++      }
++
+       err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack);
+ 
+       if (dest_net)
diff --git a/queue-5.10/iavf-fix-reporting-when-setting-descriptor-count.patch b/queue-5.10/iavf-fix-reporting-when-setting-descriptor-count.patch

new file mode 100644 (file)

index 0000000..b51c5c7
--- /dev/null
+++ b/queue-5.10/iavf-fix-reporting-when-setting-descriptor-count.patch
@@ -0,0 +1,93 @@
+From 1a1aa356ddf3f16539f5962c01c5f702686dfc15 Mon Sep 17 00:00:00 2001
+From: Michal Maloszewski <michal.maloszewski@intel.com>
+Date: Tue, 26 Oct 2021 12:59:09 +0000
+Subject: iavf: Fix reporting when setting descriptor count
+
+From: Michal Maloszewski <michal.maloszewski@intel.com>
+
+commit 1a1aa356ddf3f16539f5962c01c5f702686dfc15 upstream.
+
+iavf_set_ringparams doesn't communicate to the user that
+
+1. The user requested descriptor count is out of range. Instead it
+   just quietly sets descriptors to the "clamped" value and calls it
+   done. This makes it look an invalid value was successfully set as
+   the descriptor count when this isn't actually true.
+
+2. The user provided descriptor count needs to be inflated for alignment
+   reasons.
+
+This behavior is confusing. The ice driver has already addressed this
+by rejecting invalid values for descriptor count and
+messaging for alignment adjustments.
+Do the same thing here by adding the error and info messages.
+
+Fixes: fbb7ddfef253 ("i40evf: core ethtool functionality")
+Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
+Signed-off-by: Michal Maloszewski <michal.maloszewski@intel.com>
+Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf_ethtool.c |   45 ++++++++++++++++++-------
+ 1 file changed, 33 insertions(+), 12 deletions(-)
+
+--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+@@ -612,23 +612,44 @@ static int iavf_set_ringparam(struct net
+       if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+               return -EINVAL;
+ 
+-      new_tx_count = clamp_t(u32, ring->tx_pending,
+-                             IAVF_MIN_TXD,
+-                             IAVF_MAX_TXD);
+-      new_tx_count = ALIGN(new_tx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE);
+-
+-      new_rx_count = clamp_t(u32, ring->rx_pending,
+-                             IAVF_MIN_RXD,
+-                             IAVF_MAX_RXD);
+-      new_rx_count = ALIGN(new_rx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE);
++      if (ring->tx_pending > IAVF_MAX_TXD ||
++          ring->tx_pending < IAVF_MIN_TXD ||
++          ring->rx_pending > IAVF_MAX_RXD ||
++          ring->rx_pending < IAVF_MIN_RXD) {
++              netdev_err(netdev, "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d] (increment %d)\n",
++                         ring->tx_pending, ring->rx_pending, IAVF_MIN_TXD,
++                         IAVF_MAX_RXD, IAVF_REQ_DESCRIPTOR_MULTIPLE);
++              return -EINVAL;
++      }
++
++      new_tx_count = ALIGN(ring->tx_pending, IAVF_REQ_DESCRIPTOR_MULTIPLE);
++      if (new_tx_count != ring->tx_pending)
++              netdev_info(netdev, "Requested Tx descriptor count rounded up to %d\n",
++                          new_tx_count);
++
++      new_rx_count = ALIGN(ring->rx_pending, IAVF_REQ_DESCRIPTOR_MULTIPLE);
++      if (new_rx_count != ring->rx_pending)
++              netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n",
++                          new_rx_count);
+ 
+       /* if nothing to do return success */
+       if ((new_tx_count == adapter->tx_desc_count) &&
+-          (new_rx_count == adapter->rx_desc_count))
++          (new_rx_count == adapter->rx_desc_count)) {
++              netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n");
+               return 0;
++      }
++
++      if (new_tx_count != adapter->tx_desc_count) {
++              netdev_dbg(netdev, "Changing Tx descriptor count from %d to %d\n",
++                         adapter->tx_desc_count, new_tx_count);
++              adapter->tx_desc_count = new_tx_count;
++      }
+ 
+-      adapter->tx_desc_count = new_tx_count;
+-      adapter->rx_desc_count = new_rx_count;
++      if (new_rx_count != adapter->rx_desc_count) {
++              netdev_dbg(netdev, "Changing Rx descriptor count from %d to %d\n",
++                         adapter->rx_desc_count, new_rx_count);
++              adapter->rx_desc_count = new_rx_count;
++      }
+ 
+       if (netif_running(netdev)) {
+               adapter->flags |= IAVF_FLAG_RESET_NEEDED;
diff --git a/queue-5.10/iavf-restore-msi-state-on-reset.patch b/queue-5.10/iavf-restore-msi-state-on-reset.patch

new file mode 100644 (file)

index 0000000..ca1fa73
--- /dev/null
+++ b/queue-5.10/iavf-restore-msi-state-on-reset.patch
@@ -0,0 +1,37 @@
+From 7e4dcc13965c57869684d57a1dc6dd7be589488c Mon Sep 17 00:00:00 2001
+From: Mitch Williams <mitch.a.williams@intel.com>
+Date: Fri, 4 Jun 2021 09:53:28 -0700
+Subject: iavf: restore MSI state on reset
+
+From: Mitch Williams <mitch.a.williams@intel.com>
+
+commit 7e4dcc13965c57869684d57a1dc6dd7be589488c upstream.
+
+If the PF experiences an FLR, the VF's MSI and MSI-X configuration will
+be conveniently and silently removed in the process. When this happens,
+reset recovery will appear to complete normally but no traffic will
+pass. The netdev watchdog will helpfully notify everyone of this issue.
+
+To prevent such public embarrassment, restore MSI configuration at every
+reset. For normal resets, this will do no harm, but for VF resets
+resulting from a PF FLR, this will keep the VF working.
+
+Fixes: 5eae00c57f5e ("i40evf: main driver core")
+Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
+Tested-by: George Kuruvinakunnel <george.kuruvinakunnel@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf_main.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -2139,6 +2139,7 @@ static void iavf_reset_task(struct work_
+       }
+ 
+       pci_set_master(adapter->pdev);
++      pci_restore_msi_state(adapter->pdev);
+ 
+       if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) {
+               dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n",
diff --git a/queue-5.10/ib-hfi1-correct-guard-on-eager-buffer-deallocation.patch b/queue-5.10/ib-hfi1-correct-guard-on-eager-buffer-deallocation.patch

new file mode 100644 (file)

index 0000000..cdedc92
--- /dev/null
+++ b/queue-5.10/ib-hfi1-correct-guard-on-eager-buffer-deallocation.patch
@@ -0,0 +1,35 @@
+From 9292f8f9a2ac42eb320bced7153aa2e63d8cc13a Mon Sep 17 00:00:00 2001
+From: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
+Date: Mon, 29 Nov 2021 14:19:52 -0500
+Subject: IB/hfi1: Correct guard on eager buffer deallocation
+
+From: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
+
+commit 9292f8f9a2ac42eb320bced7153aa2e63d8cc13a upstream.
+
+The code tests the dma address which legitimately can be 0.
+
+The code should test the kernel logical address to avoid leaking eager
+buffer allocations that happen to map to a dma address of 0.
+
+Fixes: 60368186fd85 ("IB/hfi1: Fix user-space buffers mapping with IOMMU enabled")
+Link: https://lore.kernel.org/r/20211129191952.101968.17137.stgit@awfm-01.cornelisnetworks.com
+Signed-off-by: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
+Signed-off-by: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/infiniband/hw/hfi1/init.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/infiniband/hw/hfi1/init.c
++++ b/drivers/infiniband/hw/hfi1/init.c
+@@ -1148,7 +1148,7 @@ void hfi1_free_ctxtdata(struct hfi1_devd
+       rcd->egrbufs.rcvtids = NULL;
+ 
+       for (e = 0; e < rcd->egrbufs.alloced; e++) {
+-              if (rcd->egrbufs.buffers[e].dma)
++              if (rcd->egrbufs.buffers[e].addr)
+                       dma_free_coherent(&dd->pcidev->dev,
+                                         rcd->egrbufs.buffers[e].len,
+                                         rcd->egrbufs.buffers[e].addr,
diff --git a/queue-5.10/net-sched-fq_pie-prevent-dismantle-issue.patch b/queue-5.10/net-sched-fq_pie-prevent-dismantle-issue.patch

new file mode 100644 (file)

index 0000000..7e0883e
--- /dev/null
+++ b/queue-5.10/net-sched-fq_pie-prevent-dismantle-issue.patch
@@ -0,0 +1,96 @@
+From 61c2402665f1e10c5742033fce18392e369931d7 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 9 Dec 2021 00:49:37 -0800
+Subject: net/sched: fq_pie: prevent dismantle issue
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 61c2402665f1e10c5742033fce18392e369931d7 upstream.
+
+For some reason, fq_pie_destroy() did not copy
+working code from pie_destroy() and other qdiscs,
+thus causing elusive bug.
+
+Before calling del_timer_sync(&q->adapt_timer),
+we need to ensure timer will not rearm itself.
+
+rcu: INFO: rcu_preempt self-detected stall on CPU
+rcu:    0-....: (4416 ticks this GP) idle=60d/1/0x4000000000000000 softirq=10433/10434 fqs=2579
+        (t=10501 jiffies g=13085 q=3989)
+NMI backtrace for cpu 0
+CPU: 0 PID: 13 Comm: ksoftirqd/0 Not tainted 5.16.0-rc4-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ <IRQ>
+ __dump_stack lib/dump_stack.c:88 [inline]
+ dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
+ nmi_cpu_backtrace.cold+0x47/0x144 lib/nmi_backtrace.c:111
+ nmi_trigger_cpumask_backtrace+0x1b3/0x230 lib/nmi_backtrace.c:62
+ trigger_single_cpu_backtrace include/linux/nmi.h:164 [inline]
+ rcu_dump_cpu_stacks+0x25e/0x3f0 kernel/rcu/tree_stall.h:343
+ print_cpu_stall kernel/rcu/tree_stall.h:627 [inline]
+ check_cpu_stall kernel/rcu/tree_stall.h:711 [inline]
+ rcu_pending kernel/rcu/tree.c:3878 [inline]
+ rcu_sched_clock_irq.cold+0x9d/0x746 kernel/rcu/tree.c:2597
+ update_process_times+0x16d/0x200 kernel/time/timer.c:1785
+ tick_sched_handle+0x9b/0x180 kernel/time/tick-sched.c:226
+ tick_sched_timer+0x1b0/0x2d0 kernel/time/tick-sched.c:1428
+ __run_hrtimer kernel/time/hrtimer.c:1685 [inline]
+ __hrtimer_run_queues+0x1c0/0xe50 kernel/time/hrtimer.c:1749
+ hrtimer_interrupt+0x31c/0x790 kernel/time/hrtimer.c:1811
+ local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1086 [inline]
+ __sysvec_apic_timer_interrupt+0x146/0x530 arch/x86/kernel/apic/apic.c:1103
+ sysvec_apic_timer_interrupt+0x8e/0xc0 arch/x86/kernel/apic/apic.c:1097
+ </IRQ>
+ <TASK>
+ asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:638
+RIP: 0010:write_comp_data kernel/kcov.c:221 [inline]
+RIP: 0010:__sanitizer_cov_trace_const_cmp1+0x1d/0x80 kernel/kcov.c:273
+Code: 54 c8 20 48 89 10 c3 66 0f 1f 44 00 00 53 41 89 fb 41 89 f1 bf 03 00 00 00 65 48 8b 0c 25 40 70 02 00 48 89 ce 4c 8b 54 24 08 <e8> 4e f7 ff ff 84 c0 74 51 48 8b 81 88 15 00 00 44 8b 81 84 15 00
+RSP: 0018:ffffc90000d27b28 EFLAGS: 00000246
+RAX: 0000000000000000 RBX: ffff888064bf1bf0 RCX: ffff888011928000
+RDX: ffff888011928000 RSI: ffff888011928000 RDI: 0000000000000003
+RBP: ffff888064bf1c28 R08: 0000000000000000 R09: 0000000000000000
+R10: ffffffff875d8295 R11: 0000000000000000 R12: 0000000000000000
+R13: ffff8880783dd300 R14: 0000000000000000 R15: 0000000000000000
+ pie_calculate_probability+0x405/0x7c0 net/sched/sch_pie.c:418
+ fq_pie_timer+0x170/0x2a0 net/sched/sch_fq_pie.c:383
+ call_timer_fn+0x1a5/0x6b0 kernel/time/timer.c:1421
+ expire_timers kernel/time/timer.c:1466 [inline]
+ __run_timers.part.0+0x675/0xa20 kernel/time/timer.c:1734
+ __run_timers kernel/time/timer.c:1715 [inline]
+ run_timer_softirq+0xb3/0x1d0 kernel/time/timer.c:1747
+ __do_softirq+0x29b/0x9c2 kernel/softirq.c:558
+ run_ksoftirqd kernel/softirq.c:921 [inline]
+ run_ksoftirqd+0x2d/0x60 kernel/softirq.c:913
+ smpboot_thread_fn+0x645/0x9c0 kernel/smpboot.c:164
+ kthread+0x405/0x4f0 kernel/kthread.c:327
+ ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295
+ </TASK>
+
+Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Cc: Mohit P. Tahiliani <tahiliani@nitk.edu.in>
+Cc: Sachin D. Patil <sdp.sachin@gmail.com>
+Cc: V. Saicharan <vsaicharan1998@gmail.com>
+Cc: Mohit Bhasi <mohitbhasi1998@gmail.com>
+Cc: Leslie Monis <lesliemonis@gmail.com>
+Cc: Gautam Ramakrishnan <gautamramk@gmail.com>
+Link: https://lore.kernel.org/r/20211209084937.3500020-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_fq_pie.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/sched/sch_fq_pie.c
++++ b/net/sched/sch_fq_pie.c
+@@ -531,6 +531,7 @@ static void fq_pie_destroy(struct Qdisc
+       struct fq_pie_sched_data *q = qdisc_priv(sch);
+ 
+       tcf_block_put(q->block);
++      q->p_params.tupdate = 0;
+       del_timer_sync(&q->adapt_timer);
+       kvfree(q->flows);
+ }
diff --git a/queue-5.10/netfilter-conntrack-annotate-data-races-around-ct-timeout.patch b/queue-5.10/netfilter-conntrack-annotate-data-races-around-ct-timeout.patch

new file mode 100644 (file)

index 0000000..b7501e6
--- /dev/null
+++ b/queue-5.10/netfilter-conntrack-annotate-data-races-around-ct-timeout.patch
@@ -0,0 +1,187 @@
+From 802a7dc5cf1bef06f7b290ce76d478138408d6b1 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 7 Dec 2021 10:03:23 -0800
+Subject: netfilter: conntrack: annotate data-races around ct->timeout
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 802a7dc5cf1bef06f7b290ce76d478138408d6b1 upstream.
+
+(struct nf_conn)->timeout can be read/written locklessly,
+add READ_ONCE()/WRITE_ONCE() to prevent load/store tearing.
+
+BUG: KCSAN: data-race in __nf_conntrack_alloc / __nf_conntrack_find_get
+
+write to 0xffff888132e78c08 of 4 bytes by task 6029 on cpu 0:
+ __nf_conntrack_alloc+0x158/0x280 net/netfilter/nf_conntrack_core.c:1563
+ init_conntrack+0x1da/0xb30 net/netfilter/nf_conntrack_core.c:1635
+ resolve_normal_ct+0x502/0x610 net/netfilter/nf_conntrack_core.c:1746
+ nf_conntrack_in+0x1c5/0x88f net/netfilter/nf_conntrack_core.c:1901
+ ipv6_conntrack_local+0x19/0x20 net/netfilter/nf_conntrack_proto.c:414
+ nf_hook_entry_hookfn include/linux/netfilter.h:142 [inline]
+ nf_hook_slow+0x72/0x170 net/netfilter/core.c:619
+ nf_hook include/linux/netfilter.h:262 [inline]
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ ip6_xmit+0xa3a/0xa60 net/ipv6/ip6_output.c:324
+ inet6_csk_xmit+0x1a2/0x1e0 net/ipv6/inet6_connection_sock.c:135
+ __tcp_transmit_skb+0x132a/0x1840 net/ipv4/tcp_output.c:1402
+ tcp_transmit_skb net/ipv4/tcp_output.c:1420 [inline]
+ tcp_write_xmit+0x1450/0x4460 net/ipv4/tcp_output.c:2680
+ __tcp_push_pending_frames+0x68/0x1c0 net/ipv4/tcp_output.c:2864
+ tcp_push_pending_frames include/net/tcp.h:1897 [inline]
+ tcp_data_snd_check+0x62/0x2e0 net/ipv4/tcp_input.c:5452
+ tcp_rcv_established+0x880/0x10e0 net/ipv4/tcp_input.c:5947
+ tcp_v6_do_rcv+0x36e/0xa50 net/ipv6/tcp_ipv6.c:1521
+ sk_backlog_rcv include/net/sock.h:1030 [inline]
+ __release_sock+0xf2/0x270 net/core/sock.c:2768
+ release_sock+0x40/0x110 net/core/sock.c:3300
+ sk_stream_wait_memory+0x435/0x700 net/core/stream.c:145
+ tcp_sendmsg_locked+0xb85/0x25a0 net/ipv4/tcp.c:1402
+ tcp_sendmsg+0x2c/0x40 net/ipv4/tcp.c:1440
+ inet6_sendmsg+0x5f/0x80 net/ipv6/af_inet6.c:644
+ sock_sendmsg_nosec net/socket.c:704 [inline]
+ sock_sendmsg net/socket.c:724 [inline]
+ __sys_sendto+0x21e/0x2c0 net/socket.c:2036
+ __do_sys_sendto net/socket.c:2048 [inline]
+ __se_sys_sendto net/socket.c:2044 [inline]
+ __x64_sys_sendto+0x74/0x90 net/socket.c:2044
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+read to 0xffff888132e78c08 of 4 bytes by task 17446 on cpu 1:
+ nf_ct_is_expired include/net/netfilter/nf_conntrack.h:286 [inline]
+ ____nf_conntrack_find net/netfilter/nf_conntrack_core.c:776 [inline]
+ __nf_conntrack_find_get+0x1c7/0xac0 net/netfilter/nf_conntrack_core.c:807
+ resolve_normal_ct+0x273/0x610 net/netfilter/nf_conntrack_core.c:1734
+ nf_conntrack_in+0x1c5/0x88f net/netfilter/nf_conntrack_core.c:1901
+ ipv6_conntrack_local+0x19/0x20 net/netfilter/nf_conntrack_proto.c:414
+ nf_hook_entry_hookfn include/linux/netfilter.h:142 [inline]
+ nf_hook_slow+0x72/0x170 net/netfilter/core.c:619
+ nf_hook include/linux/netfilter.h:262 [inline]
+ NF_HOOK include/linux/netfilter.h:305 [inline]
+ ip6_xmit+0xa3a/0xa60 net/ipv6/ip6_output.c:324
+ inet6_csk_xmit+0x1a2/0x1e0 net/ipv6/inet6_connection_sock.c:135
+ __tcp_transmit_skb+0x132a/0x1840 net/ipv4/tcp_output.c:1402
+ __tcp_send_ack+0x1fd/0x300 net/ipv4/tcp_output.c:3956
+ tcp_send_ack+0x23/0x30 net/ipv4/tcp_output.c:3962
+ __tcp_ack_snd_check+0x2d8/0x510 net/ipv4/tcp_input.c:5478
+ tcp_ack_snd_check net/ipv4/tcp_input.c:5523 [inline]
+ tcp_rcv_established+0x8c2/0x10e0 net/ipv4/tcp_input.c:5948
+ tcp_v6_do_rcv+0x36e/0xa50 net/ipv6/tcp_ipv6.c:1521
+ sk_backlog_rcv include/net/sock.h:1030 [inline]
+ __release_sock+0xf2/0x270 net/core/sock.c:2768
+ release_sock+0x40/0x110 net/core/sock.c:3300
+ tcp_sendpage+0x94/0xb0 net/ipv4/tcp.c:1114
+ inet_sendpage+0x7f/0xc0 net/ipv4/af_inet.c:833
+ rds_tcp_xmit+0x376/0x5f0 net/rds/tcp_send.c:118
+ rds_send_xmit+0xbed/0x1500 net/rds/send.c:367
+ rds_send_worker+0x43/0x200 net/rds/threads.c:200
+ process_one_work+0x3fc/0x980 kernel/workqueue.c:2298
+ worker_thread+0x616/0xa70 kernel/workqueue.c:2445
+ kthread+0x2c7/0x2e0 kernel/kthread.c:327
+ ret_from_fork+0x1f/0x30
+
+value changed: 0x00027cc2 -> 0x00000000
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 1 PID: 17446 Comm: kworker/u4:5 Tainted: G        W         5.16.0-rc4-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Workqueue: krdsd rds_send_worker
+
+Note: I chose an arbitrary commit for the Fixes: tag,
+because I do not think we need to backport this fix to very old kernels.
+
+Fixes: e37542ba111f ("netfilter: conntrack: avoid possible false sharing")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/netfilter/nf_conntrack.h |    6 +++---
+ net/netfilter/nf_conntrack_core.c    |    6 +++---
+ net/netfilter/nf_conntrack_netlink.c |    2 +-
+ net/netfilter/nf_flow_table_core.c   |    4 ++--
+ 4 files changed, 9 insertions(+), 9 deletions(-)
+
+--- a/include/net/netfilter/nf_conntrack.h
++++ b/include/net/netfilter/nf_conntrack.h
+@@ -262,14 +262,14 @@ static inline bool nf_is_loopback_packet
+ /* jiffies until ct expires, 0 if already expired */
+ static inline unsigned long nf_ct_expires(const struct nf_conn *ct)
+ {
+-      s32 timeout = ct->timeout - nfct_time_stamp;
++      s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
+ 
+       return timeout > 0 ? timeout : 0;
+ }
+ 
+ static inline bool nf_ct_is_expired(const struct nf_conn *ct)
+ {
+-      return (__s32)(ct->timeout - nfct_time_stamp) <= 0;
++      return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0;
+ }
+ 
+ /* use after obtaining a reference count */
+@@ -288,7 +288,7 @@ static inline bool nf_ct_should_gc(const
+ static inline void nf_ct_offload_timeout(struct nf_conn *ct)
+ {
+       if (nf_ct_expires(ct) < NF_CT_DAY / 2)
+-              ct->timeout = nfct_time_stamp + NF_CT_DAY;
++              WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
+ }
+ 
+ struct kernel_param;
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -660,7 +660,7 @@ bool nf_ct_delete(struct nf_conn *ct, u3
+ 
+       tstamp = nf_conn_tstamp_find(ct);
+       if (tstamp) {
+-              s32 timeout = ct->timeout - nfct_time_stamp;
++              s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp;
+ 
+               tstamp->stop = ktime_get_real_ns();
+               if (timeout < 0)
+@@ -980,7 +980,7 @@ static int nf_ct_resolve_clash_harder(st
+       }
+ 
+       /* We want the clashing entry to go away real soon: 1 second timeout. */
+-      loser_ct->timeout = nfct_time_stamp + HZ;
++      WRITE_ONCE(loser_ct->timeout, nfct_time_stamp + HZ);
+ 
+       /* IPS_NAT_CLASH removes the entry automatically on the first
+        * reply.  Also prevents UDP tracker from moving the entry to
+@@ -1487,7 +1487,7 @@ __nf_conntrack_alloc(struct net *net,
+       /* save hash for reusing when confirming */
+       *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
+       ct->status = 0;
+-      ct->timeout = 0;
++      WRITE_ONCE(ct->timeout, 0);
+       write_pnet(&ct->ct_net, net);
+       memset(&ct->__nfct_init_offset, 0,
+              offsetof(struct nf_conn, proto) -
+--- a/net/netfilter/nf_conntrack_netlink.c
++++ b/net/netfilter/nf_conntrack_netlink.c
+@@ -1971,7 +1971,7 @@ static int ctnetlink_change_timeout(stru
+ 
+       if (timeout > INT_MAX)
+               timeout = INT_MAX;
+-      ct->timeout = nfct_time_stamp + (u32)timeout;
++      WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout);
+ 
+       if (test_bit(IPS_DYING_BIT, &ct->status))
+               return -ETIME;
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -151,8 +151,8 @@ static void flow_offload_fixup_ct_timeou
+       else
+               return;
+ 
+-      if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
+-              ct->timeout = nfct_time_stamp + timeout;
++      if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
++              WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
+ }
+ 
+ static void flow_offload_fixup_ct_state(struct nf_conn *ct)
diff --git a/queue-5.10/seg6-fix-the-iif-in-the-ipv6-socket-control-block.patch b/queue-5.10/seg6-fix-the-iif-in-the-ipv6-socket-control-block.patch

new file mode 100644 (file)

index 0000000..ee1f706
--- /dev/null
+++ b/queue-5.10/seg6-fix-the-iif-in-the-ipv6-socket-control-block.patch
@@ -0,0 +1,63 @@
+From ae68d93354e5bf5191ee673982251864ea24dd5c Mon Sep 17 00:00:00 2001
+From: Andrea Mayer <andrea.mayer@uniroma2.it>
+Date: Wed, 8 Dec 2021 20:54:09 +0100
+Subject: seg6: fix the iif in the IPv6 socket control block
+
+From: Andrea Mayer <andrea.mayer@uniroma2.it>
+
+commit ae68d93354e5bf5191ee673982251864ea24dd5c upstream.
+
+When an IPv4 packet is received, the ip_rcv_core(...) sets the receiving
+interface index into the IPv4 socket control block (v5.16-rc4,
+net/ipv4/ip_input.c line 510):
+
+    IPCB(skb)->iif = skb->skb_iif;
+
+If that IPv4 packet is meant to be encapsulated in an outer IPv6+SRH
+header, the seg6_do_srh_encap(...) performs the required encapsulation.
+In this case, the seg6_do_srh_encap function clears the IPv6 socket control
+block (v5.16-rc4 net/ipv6/seg6_iptunnel.c line 163):
+
+    memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+
+The memset(...) was introduced in commit ef489749aae5 ("ipv6: sr: clear
+IP6CB(skb) on SRH ip4ip6 encapsulation") a long time ago (2019-01-29).
+
+Since the IPv6 socket control block and the IPv4 socket control block share
+the same memory area (skb->cb), the receiving interface index info is lost
+(IP6CB(skb)->iif is set to zero).
+
+As a side effect, that condition triggers a NULL pointer dereference if
+commit 0857d6f8c759 ("ipv6: When forwarding count rx stats on the orig
+netdev") is applied.
+
+To fix that issue, we set the IP6CB(skb)->iif with the index of the
+receiving interface once again.
+
+Fixes: ef489749aae5 ("ipv6: sr: clear IP6CB(skb) on SRH ip4ip6 encapsulation")
+Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20211208195409.12169-1-andrea.mayer@uniroma2.it
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/seg6_iptunnel.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/net/ipv6/seg6_iptunnel.c
++++ b/net/ipv6/seg6_iptunnel.c
+@@ -160,6 +160,14 @@ int seg6_do_srh_encap(struct sk_buff *sk
+               hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
+ 
+               memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
++
++              /* the control block has been erased, so we have to set the
++               * iif once again.
++               * We read the receiving interface index directly from the
++               * skb->skb_iif as it is done in the IPv4 receiving path (i.e.:
++               * ip_rcv_core(...)).
++               */
++              IP6CB(skb)->iif = skb->skb_iif;
+       }
+ 
+       hdr->nexthdr = NEXTHDR_ROUTING;
diff --git a/queue-5.10/series b/queue-5.10/series

index 8f39142175ce82be976e86df650f7b43793c7c15..a0ef09f53bff058f508058761a9ea0998cc2b343 100644 (file)
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -32,3 +32,11 @@ bpf-fix-the-off-by-two-error-in-range-markings.patch
  ice-ignore-dropped-packets-during-init.patch
  bonding-make-tx_rebalance_counter-an-atomic.patch
  nfp-fix-memory-leak-in-nfp_cpp_area_cache_add.patch
+seg6-fix-the-iif-in-the-ipv6-socket-control-block.patch
+udp-using-datalen-to-cap-max-gso-segments.patch
+netfilter-conntrack-annotate-data-races-around-ct-timeout.patch
+iavf-restore-msi-state-on-reset.patch
+iavf-fix-reporting-when-setting-descriptor-count.patch
+ib-hfi1-correct-guard-on-eager-buffer-deallocation.patch
+devlink-fix-netns-refcount-leak-in-devlink_nl_cmd_reload.patch
+net-sched-fq_pie-prevent-dismantle-issue.patch
diff --git a/queue-5.10/udp-using-datalen-to-cap-max-gso-segments.patch b/queue-5.10/udp-using-datalen-to-cap-max-gso-segments.patch

new file mode 100644 (file)

index 0000000..7168069
--- /dev/null
+++ b/queue-5.10/udp-using-datalen-to-cap-max-gso-segments.patch
@@ -0,0 +1,41 @@
+From 158390e45612ef0fde160af0826f1740c36daf21 Mon Sep 17 00:00:00 2001
+From: Jianguo Wu <wujianguo@chinatelecom.cn>
+Date: Wed, 8 Dec 2021 18:03:33 +0800
+Subject: udp: using datalen to cap max gso segments
+
+From: Jianguo Wu <wujianguo@chinatelecom.cn>
+
+commit 158390e45612ef0fde160af0826f1740c36daf21 upstream.
+
+The max number of UDP gso segments is intended to cap to UDP_MAX_SEGMENTS,
+this is checked in udp_send_skb():
+
+    if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) {
+        kfree_skb(skb);
+        return -EINVAL;
+    }
+
+skb->len contains network and transport header len here, we should use
+only data len instead.
+
+Fixes: bec1f6f69736 ("udp: generate gso with UDP_SEGMENT")
+Signed-off-by: Jianguo Wu <wujianguo@chinatelecom.cn>
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Link: https://lore.kernel.org/r/900742e5-81fb-30dc-6e0b-375c6cdd7982@163.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/udp.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -899,7 +899,7 @@ static int udp_send_skb(struct sk_buff *
+                       kfree_skb(skb);
+                       return -EINVAL;
+               }
+-              if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) {
++              if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
+                       kfree_skb(skb);
+                       return -EINVAL;
+               }
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 10 Dec 2021 16:46:39 +0000 (17:46 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 10 Dec 2021 16:46:39 +0000 (17:46 +0100)
queue-5.10/devlink-fix-netns-refcount-leak-in-devlink_nl_cmd_reload.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/iavf-fix-reporting-when-setting-descriptor-count.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/iavf-restore-msi-state-on-reset.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/ib-hfi1-correct-guard-on-eager-buffer-deallocation.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/net-sched-fq_pie-prevent-dismantle-issue.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/netfilter-conntrack-annotate-data-races-around-ct-timeout.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/seg6-fix-the-iif-in-the-ipv6-socket-control-block.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/series		patch \| blob \| blame \| history
queue-5.10/udp-using-datalen-to-cap-max-gso-segments.patch	[new file with mode: 0644]	patch \| blob