]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 19 Dec 2020 11:34:50 +0000 (12:34 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 19 Dec 2020 11:34:50 +0000 (12:34 +0100)
added patches:
net-bridge-vlan-fix-error-return-code-in-__vlan_add.patch
net-mlx4_en-avoid-scheduling-restart-task-if-it-is-already-running.patch
net-stmmac-delete-the-eee_ctrl_timer-after-napi-disabled.patch
tcp-fix-cwnd-limited-bug-for-tso-deferral-where-we-send-nothing.patch

queue-4.4/net-bridge-vlan-fix-error-return-code-in-__vlan_add.patch [new file with mode: 0644]
queue-4.4/net-mlx4_en-avoid-scheduling-restart-task-if-it-is-already-running.patch [new file with mode: 0644]
queue-4.4/net-stmmac-delete-the-eee_ctrl_timer-after-napi-disabled.patch [new file with mode: 0644]
queue-4.4/series
queue-4.4/tcp-fix-cwnd-limited-bug-for-tso-deferral-where-we-send-nothing.patch [new file with mode: 0644]

diff --git a/queue-4.4/net-bridge-vlan-fix-error-return-code-in-__vlan_add.patch b/queue-4.4/net-bridge-vlan-fix-error-return-code-in-__vlan_add.patch
new file mode 100644 (file)
index 0000000..f05a684
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Sat Dec 19 12:04:16 PM CET 2020
+From: Zhang Changzhong <zhangchangzhong@huawei.com>
+Date: Fri, 4 Dec 2020 16:48:56 +0800
+Subject: net: bridge: vlan: fix error return code in __vlan_add()
+
+From: Zhang Changzhong <zhangchangzhong@huawei.com>
+
+[ Upstream commit ee4f52a8de2c6f78b01f10b4c330867d88c1653a ]
+
+Fix to return a negative error code from the error handling
+case instead of 0, as done elsewhere in this function.
+
+Fixes: f8ed289fab84 ("bridge: vlan: use br_vlan_(get|put)_master to deal with refcounts")
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
+Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
+Link: https://lore.kernel.org/r/1607071737-33875-1-git-send-email-zhangchangzhong@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_vlan.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/bridge/br_vlan.c
++++ b/net/bridge/br_vlan.c
+@@ -225,8 +225,10 @@ static int __vlan_add(struct net_bridge_
+               }
+               masterv = br_vlan_get_master(br, v->vid);
+-              if (!masterv)
++              if (!masterv) {
++                      err = -ENOMEM;
+                       goto out_filt;
++              }
+               v->brvlan = masterv;
+       }
diff --git a/queue-4.4/net-mlx4_en-avoid-scheduling-restart-task-if-it-is-already-running.patch b/queue-4.4/net-mlx4_en-avoid-scheduling-restart-task-if-it-is-already-running.patch
new file mode 100644 (file)
index 0000000..eb19a8a
--- /dev/null
@@ -0,0 +1,114 @@
+From foo@baz Sat Dec 19 12:14:44 PM CET 2020
+From: Moshe Shemesh <moshe@mellanox.com>
+Date: Wed, 9 Dec 2020 15:03:38 +0200
+Subject: net/mlx4_en: Avoid scheduling restart task if it is already running
+
+From: Moshe Shemesh <moshe@mellanox.com>
+
+[ Upstream commit fed91613c9dd455dd154b22fa8e11b8526466082 ]
+
+Add restarting state flag to avoid scheduling another restart task while
+such task is already running. Change task name from watchdog_task to
+restart_task to better fit the task role.
+
+Fixes: 1e338db56e5a ("mlx4_en: Fix a race at restart task")
+Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/en_netdev.c |   17 +++++++++++------
+ drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |    7 ++++++-
+ 2 files changed, 17 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+@@ -1313,8 +1313,10 @@ static void mlx4_en_tx_timeout(struct ne
+       }
+       priv->port_stats.tx_timeout++;
+-      en_dbg(DRV, priv, "Scheduling watchdog\n");
+-      queue_work(mdev->workqueue, &priv->watchdog_task);
++      if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state)) {
++              en_dbg(DRV, priv, "Scheduling port restart\n");
++              queue_work(mdev->workqueue, &priv->restart_task);
++      }
+ }
+@@ -1730,6 +1732,7 @@ int mlx4_en_start_port(struct net_device
+               local_bh_enable();
+       }
++      clear_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state);
+       netif_tx_start_all_queues(dev);
+       netif_device_attach(dev);
+@@ -1891,7 +1894,7 @@ void mlx4_en_stop_port(struct net_device
+ static void mlx4_en_restart(struct work_struct *work)
+ {
+       struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
+-                                               watchdog_task);
++                                               restart_task);
+       struct mlx4_en_dev *mdev = priv->mdev;
+       struct net_device *dev = priv->dev;
+@@ -2121,7 +2124,7 @@ static int mlx4_en_change_mtu(struct net
+       if (netif_running(dev)) {
+               mutex_lock(&mdev->state_lock);
+               if (!mdev->device_up) {
+-                      /* NIC is probably restarting - let watchdog task reset
++                      /* NIC is probably restarting - let restart task reset
+                        * the port */
+                       en_dbg(DRV, priv, "Change MTU called with card down!?\n");
+               } else {
+@@ -2130,7 +2133,9 @@ static int mlx4_en_change_mtu(struct net
+                       if (err) {
+                               en_err(priv, "Failed restarting port:%d\n",
+                                        priv->port);
+-                              queue_work(mdev->workqueue, &priv->watchdog_task);
++                              if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING,
++                                                    &priv->state))
++                                      queue_work(mdev->workqueue, &priv->restart_task);
+                       }
+               }
+               mutex_unlock(&mdev->state_lock);
+@@ -2850,7 +2855,7 @@ int mlx4_en_init_netdev(struct mlx4_en_d
+       priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev);
+       spin_lock_init(&priv->stats_lock);
+       INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode);
+-      INIT_WORK(&priv->watchdog_task, mlx4_en_restart);
++      INIT_WORK(&priv->restart_task, mlx4_en_restart);
+       INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate);
+       INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats);
+       INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task);
+--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
++++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+@@ -495,6 +495,10 @@ struct mlx4_en_stats_bitmap {
+       struct mutex mutex; /* for mutual access to stats bitmap */
+ };
++enum {
++      MLX4_EN_STATE_FLAG_RESTARTING,
++};
++
+ struct mlx4_en_priv {
+       struct mlx4_en_dev *mdev;
+       struct mlx4_en_port_profile *prof;
+@@ -560,7 +564,7 @@ struct mlx4_en_priv {
+       struct mlx4_en_cq *rx_cq[MAX_RX_RINGS];
+       struct mlx4_qp drop_qp;
+       struct work_struct rx_mode_task;
+-      struct work_struct watchdog_task;
++      struct work_struct restart_task;
+       struct work_struct linkstate_task;
+       struct delayed_work stats_task;
+       struct delayed_work service_task;
+@@ -605,6 +609,7 @@ struct mlx4_en_priv {
+       u32 pflags;
+       u8 rss_key[MLX4_EN_RSS_KEY_SIZE];
+       u8 rss_hash_fn;
++      unsigned long state;
+ };
+ enum mlx4_en_wol {
diff --git a/queue-4.4/net-stmmac-delete-the-eee_ctrl_timer-after-napi-disabled.patch b/queue-4.4/net-stmmac-delete-the-eee_ctrl_timer-after-napi-disabled.patch
new file mode 100644 (file)
index 0000000..acea24c
--- /dev/null
@@ -0,0 +1,62 @@
+From foo@baz Sat Dec 19 11:44:48 AM CET 2020
+From: Fugang Duan <fugang.duan@nxp.com>
+Date: Mon, 7 Dec 2020 18:51:40 +0800
+Subject: net: stmmac: delete the eee_ctrl_timer after napi disabled
+
+From: Fugang Duan <fugang.duan@nxp.com>
+
+[ Upstream commit 5f58591323bf3f342920179f24515935c4b5fd60 ]
+
+There have chance to re-enable the eee_ctrl_timer and fire the timer
+in napi callback after delete the timer in .stmmac_release(), which
+introduces to access eee registers in the timer function after clocks
+are disabled then causes system hang. Found this issue when do
+suspend/resume and reboot stress test.
+
+It is safe to delete the timer after napi disabled and disable lpi mode.
+
+Fixes: d765955d2ae0b ("stmmac: add the Energy Efficient Ethernet support")
+Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
+Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |   13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -1897,9 +1897,6 @@ static int stmmac_release(struct net_dev
+ {
+       struct stmmac_priv *priv = netdev_priv(dev);
+-      if (priv->eee_enabled)
+-              del_timer_sync(&priv->eee_ctrl_timer);
+-
+       /* Stop and disconnect the PHY */
+       if (priv->phydev) {
+               phy_stop(priv->phydev);
+@@ -1920,6 +1917,11 @@ static int stmmac_release(struct net_dev
+       if (priv->lpi_irq > 0)
+               free_irq(priv->lpi_irq, dev);
++      if (priv->eee_enabled) {
++              priv->tx_path_in_lpi_mode = false;
++              del_timer_sync(&priv->eee_ctrl_timer);
++      }
++
+       /* Stop TX/RX DMA and clear the descriptors */
+       priv->hw->dma->stop_tx(priv->ioaddr);
+       priv->hw->dma->stop_rx(priv->ioaddr);
+@@ -3068,6 +3070,11 @@ int stmmac_suspend(struct net_device *nd
+       napi_disable(&priv->napi);
++      if (priv->eee_enabled) {
++              priv->tx_path_in_lpi_mode = false;
++              del_timer_sync(&priv->eee_ctrl_timer);
++      }
++
+       /* Stop TX/RX DMA */
+       priv->hw->dma->stop_tx(priv->ioaddr);
+       priv->hw->dma->stop_rx(priv->ioaddr);
index 47a00eefe5a6824fceb6db431aaa840414e8e68f..cdbf28f13a4ced5408119bfe418071e9cae14573 100644 (file)
@@ -6,3 +6,7 @@ input-cm109-do-not-stomp-on-control-urb.patch
 input-i8042-add-acer-laptops-to-the-i8042-reset-list.patch
 pinctrl-amd-remove-debounce-filter-setting-in-irq-type-setting.patch
 spi-prevent-adding-devices-below-an-unregistering-controller.patch
+net-mlx4_en-avoid-scheduling-restart-task-if-it-is-already-running.patch
+tcp-fix-cwnd-limited-bug-for-tso-deferral-where-we-send-nothing.patch
+net-stmmac-delete-the-eee_ctrl_timer-after-napi-disabled.patch
+net-bridge-vlan-fix-error-return-code-in-__vlan_add.patch
diff --git a/queue-4.4/tcp-fix-cwnd-limited-bug-for-tso-deferral-where-we-send-nothing.patch b/queue-4.4/tcp-fix-cwnd-limited-bug-for-tso-deferral-where-we-send-nothing.patch
new file mode 100644 (file)
index 0000000..d9ab937
--- /dev/null
@@ -0,0 +1,86 @@
+From foo@baz Sat Dec 19 12:14:44 PM CET 2020
+From: Neal Cardwell <ncardwell@google.com>
+Date: Tue, 8 Dec 2020 22:57:59 -0500
+Subject: tcp: fix cwnd-limited bug for TSO deferral where we send nothing
+
+From: Neal Cardwell <ncardwell@google.com>
+
+[ Upstream commit 299bcb55ecd1412f6df606e9dc0912d55610029e ]
+
+When cwnd is not a multiple of the TSO skb size of N*MSS, we can get
+into persistent scenarios where we have the following sequence:
+
+(1) ACK for full-sized skb of N*MSS arrives
+  -> tcp_write_xmit() transmit full-sized skb with N*MSS
+  -> move pacing release time forward
+  -> exit tcp_write_xmit() because pacing time is in the future
+
+(2) TSQ callback or TCP internal pacing timer fires
+  -> try to transmit next skb, but TSO deferral finds remainder of
+     available cwnd is not big enough to trigger an immediate send
+     now, so we defer sending until the next ACK.
+
+(3) repeat...
+
+So we can get into a case where we never mark ourselves as
+cwnd-limited for many seconds at a time, even with
+bulk/infinite-backlog senders, because:
+
+o In case (1) above, every time in tcp_write_xmit() we have enough
+cwnd to send a full-sized skb, we are not fully using the cwnd
+(because cwnd is not a multiple of the TSO skb size). So every time we
+send data, we are not cwnd limited, and so in the cwnd-limited
+tracking code in tcp_cwnd_validate() we mark ourselves as not
+cwnd-limited.
+
+o In case (2) above, every time in tcp_write_xmit() that we try to
+transmit the "remainder" of the cwnd but defer, we set the local
+variable is_cwnd_limited to true, but we do not send any packets, so
+sent_pkts is zero, so we don't call the cwnd-limited logic to update
+tp->is_cwnd_limited.
+
+Fixes: ca8a22634381 ("tcp: make cwnd-limited checks measurement-based, and gentler")
+Reported-by: Ingemar Johansson <ingemar.s.johansson@ericsson.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20201209035759.1225145-1-ncardwell.kernel@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1501,7 +1501,8 @@ static void tcp_cwnd_validate(struct soc
+        * window, and remember whether we were cwnd-limited then.
+        */
+       if (!before(tp->snd_una, tp->max_packets_seq) ||
+-          tp->packets_out > tp->max_packets_out) {
++          tp->packets_out > tp->max_packets_out ||
++          is_cwnd_limited) {
+               tp->max_packets_out = tp->packets_out;
+               tp->max_packets_seq = tp->snd_nxt;
+               tp->is_cwnd_limited = is_cwnd_limited;
+@@ -2172,6 +2173,10 @@ repair:
+                       break;
+       }
++      is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
++      if (likely(sent_pkts || is_cwnd_limited))
++              tcp_cwnd_validate(sk, is_cwnd_limited);
++
+       if (likely(sent_pkts)) {
+               if (tcp_in_cwnd_reduction(sk))
+                       tp->prr_out += sent_pkts;
+@@ -2179,8 +2184,6 @@ repair:
+               /* Send one loss probe per tail loss episode. */
+               if (push_one != 2)
+                       tcp_schedule_loss_probe(sk);
+-              is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
+-              tcp_cwnd_validate(sk, is_cwnd_limited);
+               return false;
+       }
+       return !tp->packets_out && tcp_send_head(sk);