From d97817fff05628ceac32d82c2029df9ae2230a37 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 13 Jun 2022 20:05:04 +0200 Subject: [PATCH] 5.18-stable patches added patches: dmaengine-idxd-add-missing-callback-function-to-support-dma_interrupt.patch net-mlx5-e-switch-pair-only-capable-devices.patch tcp-fix-tcp_mtup_probe_success-vs-wrong-snd_cwnd.patch --- ...ck-function-to-support-dma_interrupt.patch | 61 +++++++ ...5-e-switch-pair-only-capable-devices.patch | 172 ++++++++++++++++++ queue-5.18/series | 3 + ...mtup_probe_success-vs-wrong-snd_cwnd.patch | 107 +++++++++++ 4 files changed, 343 insertions(+) create mode 100644 queue-5.18/dmaengine-idxd-add-missing-callback-function-to-support-dma_interrupt.patch create mode 100644 queue-5.18/net-mlx5-e-switch-pair-only-capable-devices.patch create mode 100644 queue-5.18/tcp-fix-tcp_mtup_probe_success-vs-wrong-snd_cwnd.patch diff --git a/queue-5.18/dmaengine-idxd-add-missing-callback-function-to-support-dma_interrupt.patch b/queue-5.18/dmaengine-idxd-add-missing-callback-function-to-support-dma_interrupt.patch new file mode 100644 index 00000000000..163a91917d8 --- /dev/null +++ b/queue-5.18/dmaengine-idxd-add-missing-callback-function-to-support-dma_interrupt.patch @@ -0,0 +1,61 @@ +From 2112b8f4fb5cc35d1c384324763765953186b81f Mon Sep 17 00:00:00 2001 +From: Dave Jiang +Date: Tue, 26 Apr 2022 15:32:06 -0700 +Subject: dmaengine: idxd: add missing callback function to support DMA_INTERRUPT + +From: Dave Jiang + +commit 2112b8f4fb5cc35d1c384324763765953186b81f upstream. + +When setting DMA_INTERRUPT capability, a callback function +dma->device_prep_dma_interrupt() is needed to support this capability. +Without setting the callback, dma_async_device_register() will fail dma +capability check. + +Fixes: 4e5a4eb20393 ("dmaengine: idxd: set DMA_INTERRUPT cap bit") +Signed-off-by: Dave Jiang +Link: https://lore.kernel.org/r/165101232637.3951447.15765792791591763119.stgit@djiang5-desk3.ch.intel.com +Signed-off-by: Vinod Koul +Signed-off-by: Greg Kroah-Hartman +--- + drivers/dma/idxd/dma.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +--- a/drivers/dma/idxd/dma.c ++++ b/drivers/dma/idxd/dma.c +@@ -88,6 +88,27 @@ static inline void idxd_prep_desc_common + } + + static struct dma_async_tx_descriptor * ++idxd_dma_prep_interrupt(struct dma_chan *c, unsigned long flags) ++{ ++ struct idxd_wq *wq = to_idxd_wq(c); ++ u32 desc_flags; ++ struct idxd_desc *desc; ++ ++ if (wq->state != IDXD_WQ_ENABLED) ++ return NULL; ++ ++ op_flag_setup(flags, &desc_flags); ++ desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); ++ if (IS_ERR(desc)) ++ return NULL; ++ ++ idxd_prep_desc_common(wq, desc->hw, DSA_OPCODE_NOOP, ++ 0, 0, 0, desc->compl_dma, desc_flags); ++ desc->txd.flags = flags; ++ return &desc->txd; ++} ++ ++static struct dma_async_tx_descriptor * + idxd_dma_submit_memcpy(struct dma_chan *c, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) + { +@@ -198,6 +219,7 @@ int idxd_register_dma_device(struct idxd + dma_cap_set(DMA_COMPLETION_NO_ORDER, dma->cap_mask); + dma->device_release = idxd_dma_release; + ++ dma->device_prep_dma_interrupt = idxd_dma_prep_interrupt; + if (idxd->hw.opcap.bits[0] & IDXD_OPCAP_MEMMOVE) { + dma_cap_set(DMA_MEMCPY, dma->cap_mask); + dma->device_prep_dma_memcpy = idxd_dma_submit_memcpy; diff --git a/queue-5.18/net-mlx5-e-switch-pair-only-capable-devices.patch b/queue-5.18/net-mlx5-e-switch-pair-only-capable-devices.patch new file mode 100644 index 00000000000..b714d0692e5 --- /dev/null +++ b/queue-5.18/net-mlx5-e-switch-pair-only-capable-devices.patch @@ -0,0 +1,172 @@ +From 3008e6a0049361e731b803c60fe8f3ab44e1d73f Mon Sep 17 00:00:00 2001 +From: Mark Bloch +Date: Thu, 26 May 2022 08:15:28 +0300 +Subject: net/mlx5: E-Switch, pair only capable devices + +From: Mark Bloch + +commit 3008e6a0049361e731b803c60fe8f3ab44e1d73f upstream. + +OFFLOADS paring using devcom is possible only on devices +that support LAG. Filter based on lag capabilities. + +This fixes an issue where mlx5_get_next_phys_dev() was +called without holding the interface lock. + +This issue was found when commit +bc4c2f2e0179 ("net/mlx5: Lag, filter non compatible devices") +added an assert that verifies the interface lock is held. + +WARNING: CPU: 9 PID: 1706 at drivers/net/ethernet/mellanox/mlx5/core/dev.c:642 mlx5_get_next_phys_dev+0xd2/0x100 [mlx5_core] +Modules linked in: mlx5_vdpa vringh vhost_iotlb vdpa mlx5_ib mlx5_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi rdma_cm iw_cm ib_umad ib_ipoib ib_cm ib_uverbs ib_core overlay fuse [last unloaded: mlx5_core] +CPU: 9 PID: 1706 Comm: devlink Not tainted 5.18.0-rc7+ #11 +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 +RIP: 0010:mlx5_get_next_phys_dev+0xd2/0x100 [mlx5_core] +Code: 02 00 75 48 48 8b 85 80 04 00 00 5d c3 31 c0 5d c3 be ff ff ff ff 48 c7 c7 08 41 5b a0 e8 36 87 28 e3 85 c0 0f 85 6f ff ff ff <0f> 0b e9 68 ff ff ff 48 c7 c7 0c 91 cc 84 e8 cb 36 6f e1 e9 4d ff +RSP: 0018:ffff88811bf47458 EFLAGS: 00010246 +RAX: 0000000000000000 RBX: ffff88811b398000 RCX: 0000000000000001 +RDX: 0000000080000000 RSI: ffffffffa05b4108 RDI: ffff88812daaaa78 +RBP: ffff88812d050380 R08: 0000000000000001 R09: ffff88811d6b3437 +R10: 0000000000000001 R11: 00000000fddd3581 R12: ffff88815238c000 +R13: ffff88812d050380 R14: ffff8881018aa7e0 R15: ffff88811d6b3428 +FS: 00007fc82e18ae80(0000) GS:ffff88842e080000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f9630d1b421 CR3: 0000000149802004 CR4: 0000000000370ea0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + mlx5_esw_offloads_devcom_event+0x99/0x3b0 [mlx5_core] + mlx5_devcom_send_event+0x167/0x1d0 [mlx5_core] + esw_offloads_enable+0x1153/0x1500 [mlx5_core] + ? mlx5_esw_offloads_controller_valid+0x170/0x170 [mlx5_core] + ? wait_for_completion_io_timeout+0x20/0x20 + ? mlx5_rescan_drivers_locked+0x318/0x810 [mlx5_core] + mlx5_eswitch_enable_locked+0x586/0xc50 [mlx5_core] + ? mlx5_eswitch_disable_pf_vf_vports+0x1d0/0x1d0 [mlx5_core] + ? mlx5_esw_try_lock+0x1b/0xb0 [mlx5_core] + ? mlx5_eswitch_enable+0x270/0x270 [mlx5_core] + ? __debugfs_create_file+0x260/0x3e0 + mlx5_devlink_eswitch_mode_set+0x27e/0x870 [mlx5_core] + ? mutex_lock_io_nested+0x12c0/0x12c0 + ? esw_offloads_disable+0x250/0x250 [mlx5_core] + ? devlink_nl_cmd_trap_get_dumpit+0x470/0x470 + ? rcu_read_lock_sched_held+0x3f/0x70 + devlink_nl_cmd_eswitch_set_doit+0x217/0x620 + +Fixes: dd3fddb82780 ("net/mlx5: E-Switch, handle devcom events only for ports on the same device") +Signed-off-by: Mark Bloch +Reviewed-by: Roi Dayan +Reviewed-by: Moshe Shemesh +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/dev.c | 18 ------------- + drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 10 +++++-- + drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h | 10 +++++++ + drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 1 + 4 files changed, 17 insertions(+), 22 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c +@@ -579,17 +579,6 @@ static void *pci_get_other_drvdata(struc + return pci_get_drvdata(to_pci_dev(other)); + } + +-static int next_phys_dev(struct device *dev, const void *data) +-{ +- struct mlx5_core_dev *mdev, *this = (struct mlx5_core_dev *)data; +- +- mdev = pci_get_other_drvdata(this->device, dev); +- if (!mdev) +- return 0; +- +- return _next_phys_dev(mdev, data); +-} +- + static int next_phys_dev_lag(struct device *dev, const void *data) + { + struct mlx5_core_dev *mdev, *this = (struct mlx5_core_dev *)data; +@@ -623,13 +612,6 @@ static struct mlx5_core_dev *mlx5_get_ne + } + + /* Must be called with intf_mutex held */ +-struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) +-{ +- lockdep_assert_held(&mlx5_intf_mutex); +- return mlx5_get_next_dev(dev, &next_phys_dev); +-} +- +-/* Must be called with intf_mutex held */ + struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev) + { + lockdep_assert_held(&mlx5_intf_mutex); +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +@@ -49,6 +49,7 @@ + #include "en_tc.h" + #include "en/mapping.h" + #include "devlink.h" ++#include "lag/lag.h" + + #define mlx5_esw_for_each_rep(esw, i, rep) \ + xa_for_each(&((esw)->offloads.vport_reps), i, rep) +@@ -2687,9 +2688,6 @@ static int mlx5_esw_offloads_devcom_even + + switch (event) { + case ESW_OFFLOADS_DEVCOM_PAIR: +- if (mlx5_get_next_phys_dev(esw->dev) != peer_esw->dev) +- break; +- + if (mlx5_eswitch_vport_match_metadata_enabled(esw) != + mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) + break; +@@ -2741,6 +2739,9 @@ static void esw_offloads_devcom_init(str + if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) + return; + ++ if (!mlx5_is_lag_supported(esw->dev)) ++ return; ++ + mlx5_devcom_register_component(devcom, + MLX5_DEVCOM_ESW_OFFLOADS, + mlx5_esw_offloads_devcom_event, +@@ -2758,6 +2759,9 @@ static void esw_offloads_devcom_cleanup( + if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) + return; + ++ if (!mlx5_is_lag_supported(esw->dev)) ++ return; ++ + mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS, + ESW_OFFLOADS_DEVCOM_UNPAIR, esw); + +--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +@@ -58,6 +58,16 @@ struct mlx5_lag { + struct mlx5_lag_port_sel port_sel; + }; + ++static inline bool mlx5_is_lag_supported(struct mlx5_core_dev *dev) ++{ ++ if (!MLX5_CAP_GEN(dev, vport_group_manager) || ++ !MLX5_CAP_GEN(dev, lag_master) || ++ MLX5_CAP_GEN(dev, num_lag_ports) < 2 || ++ MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS) ++ return false; ++ return true; ++} ++ + static inline struct mlx5_lag * + mlx5_lag_dev(struct mlx5_core_dev *dev) + { +--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +@@ -209,7 +209,6 @@ int mlx5_attach_device(struct mlx5_core_ + void mlx5_detach_device(struct mlx5_core_dev *dev); + int mlx5_register_device(struct mlx5_core_dev *dev); + void mlx5_unregister_device(struct mlx5_core_dev *dev); +-struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev); + struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev); + void mlx5_dev_list_lock(void); + void mlx5_dev_list_unlock(void); diff --git a/queue-5.18/series b/queue-5.18/series index 471a0d4f907..a6af4b5589c 100644 --- a/queue-5.18/series +++ b/queue-5.18/series @@ -338,3 +338,6 @@ random-account-for-arch-randomness-in-bits.patch md-raid0-ignore-raid0-layout-if-the-second-zone-has-only-one-device.patch zonefs-fix-handling-of-explicit_open-option-on-mount.patch iov_iter-fix-build-issue-due-to-possible-type-mis-match.patch +dmaengine-idxd-add-missing-callback-function-to-support-dma_interrupt.patch +tcp-fix-tcp_mtup_probe_success-vs-wrong-snd_cwnd.patch +net-mlx5-e-switch-pair-only-capable-devices.patch diff --git a/queue-5.18/tcp-fix-tcp_mtup_probe_success-vs-wrong-snd_cwnd.patch b/queue-5.18/tcp-fix-tcp_mtup_probe_success-vs-wrong-snd_cwnd.patch new file mode 100644 index 00000000000..2ef05eae09b --- /dev/null +++ b/queue-5.18/tcp-fix-tcp_mtup_probe_success-vs-wrong-snd_cwnd.patch @@ -0,0 +1,107 @@ +From 11825765291a93d8e7f44230da67b9f607c777bf Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Fri, 27 May 2022 14:28:29 -0700 +Subject: tcp: fix tcp_mtup_probe_success vs wrong snd_cwnd + +From: Eric Dumazet + +commit 11825765291a93d8e7f44230da67b9f607c777bf upstream. + +syzbot got a new report [1] finally pointing to a very old bug, +added in initial support for MTU probing. + +tcp_mtu_probe() has checks about starting an MTU probe if +tcp_snd_cwnd(tp) >= 11. + +But nothing prevents tcp_snd_cwnd(tp) to be reduced later +and before the MTU probe succeeds. + +This bug would lead to potential zero-divides. + +Debugging added in commit 40570375356c ("tcp: add accessors +to read/set tp->snd_cwnd") has paid off :) + +While we are at it, address potential overflows in this code. + +[1] +WARNING: CPU: 1 PID: 14132 at include/net/tcp.h:1219 tcp_mtup_probe_success+0x366/0x570 net/ipv4/tcp_input.c:2712 +Modules linked in: +CPU: 1 PID: 14132 Comm: syz-executor.2 Not tainted 5.18.0-syzkaller-07857-gbabf0bb978e3 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +RIP: 0010:tcp_snd_cwnd_set include/net/tcp.h:1219 [inline] +RIP: 0010:tcp_mtup_probe_success+0x366/0x570 net/ipv4/tcp_input.c:2712 +Code: 74 08 48 89 ef e8 da 80 17 f9 48 8b 45 00 65 48 ff 80 80 03 00 00 48 83 c4 30 5b 41 5c 41 5d 41 5e 41 5f 5d c3 e8 aa b0 c5 f8 <0f> 0b e9 16 fe ff ff 48 8b 4c 24 08 80 e1 07 38 c1 0f 8c c7 fc ff +RSP: 0018:ffffc900079e70f8 EFLAGS: 00010287 +RAX: ffffffff88c0f7f6 RBX: ffff8880756e7a80 RCX: 0000000000040000 +RDX: ffffc9000c6c4000 RSI: 0000000000031f9e RDI: 0000000000031f9f +RBP: 0000000000000000 R08: ffffffff88c0f606 R09: ffffc900079e7520 +R10: ffffed101011226d R11: 1ffff1101011226c R12: 1ffff1100eadcf50 +R13: ffff8880756e72c0 R14: 1ffff1100eadcf89 R15: dffffc0000000000 +FS: 00007f643236e700(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f1ab3f1e2a0 CR3: 0000000064fe7000 CR4: 00000000003506e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + tcp_clean_rtx_queue+0x223a/0x2da0 net/ipv4/tcp_input.c:3356 + tcp_ack+0x1962/0x3c90 net/ipv4/tcp_input.c:3861 + tcp_rcv_established+0x7c8/0x1ac0 net/ipv4/tcp_input.c:5973 + tcp_v6_do_rcv+0x57b/0x1210 net/ipv6/tcp_ipv6.c:1476 + sk_backlog_rcv include/net/sock.h:1061 [inline] + __release_sock+0x1d8/0x4c0 net/core/sock.c:2849 + release_sock+0x5d/0x1c0 net/core/sock.c:3404 + sk_stream_wait_memory+0x700/0xdc0 net/core/stream.c:145 + tcp_sendmsg_locked+0x111d/0x3fc0 net/ipv4/tcp.c:1410 + tcp_sendmsg+0x2c/0x40 net/ipv4/tcp.c:1448 + sock_sendmsg_nosec net/socket.c:714 [inline] + sock_sendmsg net/socket.c:734 [inline] + __sys_sendto+0x439/0x5c0 net/socket.c:2119 + __do_sys_sendto net/socket.c:2131 [inline] + __se_sys_sendto net/socket.c:2127 [inline] + __x64_sys_sendto+0xda/0xf0 net/socket.c:2127 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x46/0xb0 +RIP: 0033:0x7f6431289109 +Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 +RSP: 002b:00007f643236e168 EFLAGS: 00000246 ORIG_RAX: 000000000000002c +RAX: ffffffffffffffda RBX: 00007f643139c100 RCX: 00007f6431289109 +RDX: 00000000d0d0c2ac RSI: 0000000020000080 RDI: 000000000000000a +RBP: 00007f64312e308d R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000000 +R13: 00007fff372533af R14: 00007f643236e300 R15: 0000000000022000 + +Fixes: 5d424d5a674f ("[TCP]: MTU probing") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Acked-by: Yuchung Cheng +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -2706,12 +2706,15 @@ static void tcp_mtup_probe_success(struc + { + struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); ++ u64 val; + +- /* FIXME: breaks with very large cwnd */ + tp->prior_ssthresh = tcp_current_ssthresh(sk); +- tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) * +- tcp_mss_to_mtu(sk, tp->mss_cache) / +- icsk->icsk_mtup.probe_size); ++ ++ val = (u64)tcp_snd_cwnd(tp) * tcp_mss_to_mtu(sk, tp->mss_cache); ++ do_div(val, icsk->icsk_mtup.probe_size); ++ WARN_ON_ONCE((u32)val != val); ++ tcp_snd_cwnd_set(tp, max_t(u32, 1U, val)); ++ + tp->snd_cwnd_cnt = 0; + tp->snd_cwnd_stamp = tcp_jiffies32; + tp->snd_ssthresh = tcp_current_ssthresh(sk); -- 2.47.3