From 0c7caa4c992599b6299915a5c1374d41b8221687 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 19 Dec 2020 12:34:50 +0100 Subject: [PATCH] 4.4-stable patches added patches: net-bridge-vlan-fix-error-return-code-in-__vlan_add.patch net-mlx4_en-avoid-scheduling-restart-task-if-it-is-already-running.patch net-stmmac-delete-the-eee_ctrl_timer-after-napi-disabled.patch tcp-fix-cwnd-limited-bug-for-tso-deferral-where-we-send-nothing.patch --- ...-fix-error-return-code-in-__vlan_add.patch | 37 ++++++ ...estart-task-if-it-is-already-running.patch | 114 ++++++++++++++++++ ...e-eee_ctrl_timer-after-napi-disabled.patch | 62 ++++++++++ queue-4.4/series | 4 + ...r-tso-deferral-where-we-send-nothing.patch | 86 +++++++++++++ 5 files changed, 303 insertions(+) create mode 100644 queue-4.4/net-bridge-vlan-fix-error-return-code-in-__vlan_add.patch create mode 100644 queue-4.4/net-mlx4_en-avoid-scheduling-restart-task-if-it-is-already-running.patch create mode 100644 queue-4.4/net-stmmac-delete-the-eee_ctrl_timer-after-napi-disabled.patch create mode 100644 queue-4.4/tcp-fix-cwnd-limited-bug-for-tso-deferral-where-we-send-nothing.patch diff --git a/queue-4.4/net-bridge-vlan-fix-error-return-code-in-__vlan_add.patch b/queue-4.4/net-bridge-vlan-fix-error-return-code-in-__vlan_add.patch new file mode 100644 index 00000000000..f05a68435d9 --- /dev/null +++ b/queue-4.4/net-bridge-vlan-fix-error-return-code-in-__vlan_add.patch @@ -0,0 +1,37 @@ +From foo@baz Sat Dec 19 12:04:16 PM CET 2020 +From: Zhang Changzhong +Date: Fri, 4 Dec 2020 16:48:56 +0800 +Subject: net: bridge: vlan: fix error return code in __vlan_add() + +From: Zhang Changzhong + +[ Upstream commit ee4f52a8de2c6f78b01f10b4c330867d88c1653a ] + +Fix to return a negative error code from the error handling +case instead of 0, as done elsewhere in this function. + +Fixes: f8ed289fab84 ("bridge: vlan: use br_vlan_(get|put)_master to deal with refcounts") +Reported-by: Hulk Robot +Signed-off-by: Zhang Changzhong +Acked-by: Nikolay Aleksandrov +Link: https://lore.kernel.org/r/1607071737-33875-1-git-send-email-zhangchangzhong@huawei.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_vlan.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/bridge/br_vlan.c ++++ b/net/bridge/br_vlan.c +@@ -225,8 +225,10 @@ static int __vlan_add(struct net_bridge_ + } + + masterv = br_vlan_get_master(br, v->vid); +- if (!masterv) ++ if (!masterv) { ++ err = -ENOMEM; + goto out_filt; ++ } + v->brvlan = masterv; + } + diff --git a/queue-4.4/net-mlx4_en-avoid-scheduling-restart-task-if-it-is-already-running.patch b/queue-4.4/net-mlx4_en-avoid-scheduling-restart-task-if-it-is-already-running.patch new file mode 100644 index 00000000000..eb19a8af040 --- /dev/null +++ b/queue-4.4/net-mlx4_en-avoid-scheduling-restart-task-if-it-is-already-running.patch @@ -0,0 +1,114 @@ +From foo@baz Sat Dec 19 12:14:44 PM CET 2020 +From: Moshe Shemesh +Date: Wed, 9 Dec 2020 15:03:38 +0200 +Subject: net/mlx4_en: Avoid scheduling restart task if it is already running + +From: Moshe Shemesh + +[ Upstream commit fed91613c9dd455dd154b22fa8e11b8526466082 ] + +Add restarting state flag to avoid scheduling another restart task while +such task is already running. Change task name from watchdog_task to +restart_task to better fit the task role. + +Fixes: 1e338db56e5a ("mlx4_en: Fix a race at restart task") +Signed-off-by: Moshe Shemesh +Signed-off-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 17 +++++++++++------ + drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 7 ++++++- + 2 files changed, 17 insertions(+), 7 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +@@ -1313,8 +1313,10 @@ static void mlx4_en_tx_timeout(struct ne + } + + priv->port_stats.tx_timeout++; +- en_dbg(DRV, priv, "Scheduling watchdog\n"); +- queue_work(mdev->workqueue, &priv->watchdog_task); ++ if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state)) { ++ en_dbg(DRV, priv, "Scheduling port restart\n"); ++ queue_work(mdev->workqueue, &priv->restart_task); ++ } + } + + +@@ -1730,6 +1732,7 @@ int mlx4_en_start_port(struct net_device + local_bh_enable(); + } + ++ clear_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state); + netif_tx_start_all_queues(dev); + netif_device_attach(dev); + +@@ -1891,7 +1894,7 @@ void mlx4_en_stop_port(struct net_device + static void mlx4_en_restart(struct work_struct *work) + { + struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, +- watchdog_task); ++ restart_task); + struct mlx4_en_dev *mdev = priv->mdev; + struct net_device *dev = priv->dev; + +@@ -2121,7 +2124,7 @@ static int mlx4_en_change_mtu(struct net + if (netif_running(dev)) { + mutex_lock(&mdev->state_lock); + if (!mdev->device_up) { +- /* NIC is probably restarting - let watchdog task reset ++ /* NIC is probably restarting - let restart task reset + * the port */ + en_dbg(DRV, priv, "Change MTU called with card down!?\n"); + } else { +@@ -2130,7 +2133,9 @@ static int mlx4_en_change_mtu(struct net + if (err) { + en_err(priv, "Failed restarting port:%d\n", + priv->port); +- queue_work(mdev->workqueue, &priv->watchdog_task); ++ if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, ++ &priv->state)) ++ queue_work(mdev->workqueue, &priv->restart_task); + } + } + mutex_unlock(&mdev->state_lock); +@@ -2850,7 +2855,7 @@ int mlx4_en_init_netdev(struct mlx4_en_d + priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev); + spin_lock_init(&priv->stats_lock); + INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode); +- INIT_WORK(&priv->watchdog_task, mlx4_en_restart); ++ INIT_WORK(&priv->restart_task, mlx4_en_restart); + INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); + INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); + INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task); +--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h ++++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +@@ -495,6 +495,10 @@ struct mlx4_en_stats_bitmap { + struct mutex mutex; /* for mutual access to stats bitmap */ + }; + ++enum { ++ MLX4_EN_STATE_FLAG_RESTARTING, ++}; ++ + struct mlx4_en_priv { + struct mlx4_en_dev *mdev; + struct mlx4_en_port_profile *prof; +@@ -560,7 +564,7 @@ struct mlx4_en_priv { + struct mlx4_en_cq *rx_cq[MAX_RX_RINGS]; + struct mlx4_qp drop_qp; + struct work_struct rx_mode_task; +- struct work_struct watchdog_task; ++ struct work_struct restart_task; + struct work_struct linkstate_task; + struct delayed_work stats_task; + struct delayed_work service_task; +@@ -605,6 +609,7 @@ struct mlx4_en_priv { + u32 pflags; + u8 rss_key[MLX4_EN_RSS_KEY_SIZE]; + u8 rss_hash_fn; ++ unsigned long state; + }; + + enum mlx4_en_wol { diff --git a/queue-4.4/net-stmmac-delete-the-eee_ctrl_timer-after-napi-disabled.patch b/queue-4.4/net-stmmac-delete-the-eee_ctrl_timer-after-napi-disabled.patch new file mode 100644 index 00000000000..acea24c99b2 --- /dev/null +++ b/queue-4.4/net-stmmac-delete-the-eee_ctrl_timer-after-napi-disabled.patch @@ -0,0 +1,62 @@ +From foo@baz Sat Dec 19 11:44:48 AM CET 2020 +From: Fugang Duan +Date: Mon, 7 Dec 2020 18:51:40 +0800 +Subject: net: stmmac: delete the eee_ctrl_timer after napi disabled + +From: Fugang Duan + +[ Upstream commit 5f58591323bf3f342920179f24515935c4b5fd60 ] + +There have chance to re-enable the eee_ctrl_timer and fire the timer +in napi callback after delete the timer in .stmmac_release(), which +introduces to access eee registers in the timer function after clocks +are disabled then causes system hang. Found this issue when do +suspend/resume and reboot stress test. + +It is safe to delete the timer after napi disabled and disable lpi mode. + +Fixes: d765955d2ae0b ("stmmac: add the Energy Efficient Ethernet support") +Signed-off-by: Fugang Duan +Signed-off-by: Joakim Zhang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -1897,9 +1897,6 @@ static int stmmac_release(struct net_dev + { + struct stmmac_priv *priv = netdev_priv(dev); + +- if (priv->eee_enabled) +- del_timer_sync(&priv->eee_ctrl_timer); +- + /* Stop and disconnect the PHY */ + if (priv->phydev) { + phy_stop(priv->phydev); +@@ -1920,6 +1917,11 @@ static int stmmac_release(struct net_dev + if (priv->lpi_irq > 0) + free_irq(priv->lpi_irq, dev); + ++ if (priv->eee_enabled) { ++ priv->tx_path_in_lpi_mode = false; ++ del_timer_sync(&priv->eee_ctrl_timer); ++ } ++ + /* Stop TX/RX DMA and clear the descriptors */ + priv->hw->dma->stop_tx(priv->ioaddr); + priv->hw->dma->stop_rx(priv->ioaddr); +@@ -3068,6 +3070,11 @@ int stmmac_suspend(struct net_device *nd + + napi_disable(&priv->napi); + ++ if (priv->eee_enabled) { ++ priv->tx_path_in_lpi_mode = false; ++ del_timer_sync(&priv->eee_ctrl_timer); ++ } ++ + /* Stop TX/RX DMA */ + priv->hw->dma->stop_tx(priv->ioaddr); + priv->hw->dma->stop_rx(priv->ioaddr); diff --git a/queue-4.4/series b/queue-4.4/series index 47a00eefe5a..cdbf28f13a4 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -6,3 +6,7 @@ input-cm109-do-not-stomp-on-control-urb.patch input-i8042-add-acer-laptops-to-the-i8042-reset-list.patch pinctrl-amd-remove-debounce-filter-setting-in-irq-type-setting.patch spi-prevent-adding-devices-below-an-unregistering-controller.patch +net-mlx4_en-avoid-scheduling-restart-task-if-it-is-already-running.patch +tcp-fix-cwnd-limited-bug-for-tso-deferral-where-we-send-nothing.patch +net-stmmac-delete-the-eee_ctrl_timer-after-napi-disabled.patch +net-bridge-vlan-fix-error-return-code-in-__vlan_add.patch diff --git a/queue-4.4/tcp-fix-cwnd-limited-bug-for-tso-deferral-where-we-send-nothing.patch b/queue-4.4/tcp-fix-cwnd-limited-bug-for-tso-deferral-where-we-send-nothing.patch new file mode 100644 index 00000000000..d9ab9379cd3 --- /dev/null +++ b/queue-4.4/tcp-fix-cwnd-limited-bug-for-tso-deferral-where-we-send-nothing.patch @@ -0,0 +1,86 @@ +From foo@baz Sat Dec 19 12:14:44 PM CET 2020 +From: Neal Cardwell +Date: Tue, 8 Dec 2020 22:57:59 -0500 +Subject: tcp: fix cwnd-limited bug for TSO deferral where we send nothing + +From: Neal Cardwell + +[ Upstream commit 299bcb55ecd1412f6df606e9dc0912d55610029e ] + +When cwnd is not a multiple of the TSO skb size of N*MSS, we can get +into persistent scenarios where we have the following sequence: + +(1) ACK for full-sized skb of N*MSS arrives + -> tcp_write_xmit() transmit full-sized skb with N*MSS + -> move pacing release time forward + -> exit tcp_write_xmit() because pacing time is in the future + +(2) TSQ callback or TCP internal pacing timer fires + -> try to transmit next skb, but TSO deferral finds remainder of + available cwnd is not big enough to trigger an immediate send + now, so we defer sending until the next ACK. + +(3) repeat... + +So we can get into a case where we never mark ourselves as +cwnd-limited for many seconds at a time, even with +bulk/infinite-backlog senders, because: + +o In case (1) above, every time in tcp_write_xmit() we have enough +cwnd to send a full-sized skb, we are not fully using the cwnd +(because cwnd is not a multiple of the TSO skb size). So every time we +send data, we are not cwnd limited, and so in the cwnd-limited +tracking code in tcp_cwnd_validate() we mark ourselves as not +cwnd-limited. + +o In case (2) above, every time in tcp_write_xmit() that we try to +transmit the "remainder" of the cwnd but defer, we set the local +variable is_cwnd_limited to true, but we do not send any packets, so +sent_pkts is zero, so we don't call the cwnd-limited logic to update +tp->is_cwnd_limited. + +Fixes: ca8a22634381 ("tcp: make cwnd-limited checks measurement-based, and gentler") +Reported-by: Ingemar Johansson +Signed-off-by: Neal Cardwell +Signed-off-by: Yuchung Cheng +Acked-by: Soheil Hassas Yeganeh +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20201209035759.1225145-1-ncardwell.kernel@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1501,7 +1501,8 @@ static void tcp_cwnd_validate(struct soc + * window, and remember whether we were cwnd-limited then. + */ + if (!before(tp->snd_una, tp->max_packets_seq) || +- tp->packets_out > tp->max_packets_out) { ++ tp->packets_out > tp->max_packets_out || ++ is_cwnd_limited) { + tp->max_packets_out = tp->packets_out; + tp->max_packets_seq = tp->snd_nxt; + tp->is_cwnd_limited = is_cwnd_limited; +@@ -2172,6 +2173,10 @@ repair: + break; + } + ++ is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd); ++ if (likely(sent_pkts || is_cwnd_limited)) ++ tcp_cwnd_validate(sk, is_cwnd_limited); ++ + if (likely(sent_pkts)) { + if (tcp_in_cwnd_reduction(sk)) + tp->prr_out += sent_pkts; +@@ -2179,8 +2184,6 @@ repair: + /* Send one loss probe per tail loss episode. */ + if (push_one != 2) + tcp_schedule_loss_probe(sk); +- is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd); +- tcp_cwnd_validate(sk, is_cwnd_limited); + return false; + } + return !tp->packets_out && tcp_send_head(sk); -- 2.47.3