From 0c7caa4c992599b6299915a5c1374d41b8221687 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 19 Dec 2020 12:34:50 +0100
Subject: [PATCH] 4.4-stable patches

added patches:
	net-bridge-vlan-fix-error-return-code-in-__vlan_add.patch
	net-mlx4_en-avoid-scheduling-restart-task-if-it-is-already-running.patch
	net-stmmac-delete-the-eee_ctrl_timer-after-napi-disabled.patch
	tcp-fix-cwnd-limited-bug-for-tso-deferral-where-we-send-nothing.patch
---
 ...-fix-error-return-code-in-__vlan_add.patch |  37 ++++++
 ...estart-task-if-it-is-already-running.patch | 114 ++++++++++++++++++
 ...e-eee_ctrl_timer-after-napi-disabled.patch |  62 ++++++++++
 queue-4.4/series                              |   4 +
 ...r-tso-deferral-where-we-send-nothing.patch |  86 +++++++++++++
 5 files changed, 303 insertions(+)
 create mode 100644 queue-4.4/net-bridge-vlan-fix-error-return-code-in-__vlan_add.patch
 create mode 100644 queue-4.4/net-mlx4_en-avoid-scheduling-restart-task-if-it-is-already-running.patch
 create mode 100644 queue-4.4/net-stmmac-delete-the-eee_ctrl_timer-after-napi-disabled.patch
 create mode 100644 queue-4.4/tcp-fix-cwnd-limited-bug-for-tso-deferral-where-we-send-nothing.patch

diff --git a/queue-4.4/net-bridge-vlan-fix-error-return-code-in-__vlan_add.patch b/queue-4.4/net-bridge-vlan-fix-error-return-code-in-__vlan_add.patch
new file mode 100644
index 00000000000..f05a68435d9
--- /dev/null
+++ b/queue-4.4/net-bridge-vlan-fix-error-return-code-in-__vlan_add.patch
@@ -0,0 +1,37 @@
+From foo@baz Sat Dec 19 12:04:16 PM CET 2020
+From: Zhang Changzhong <zhangchangzhong@huawei.com>
+Date: Fri, 4 Dec 2020 16:48:56 +0800
+Subject: net: bridge: vlan: fix error return code in __vlan_add()
+
+From: Zhang Changzhong <zhangchangzhong@huawei.com>
+
+[ Upstream commit ee4f52a8de2c6f78b01f10b4c330867d88c1653a ]
+
+Fix to return a negative error code from the error handling
+case instead of 0, as done elsewhere in this function.
+
+Fixes: f8ed289fab84 ("bridge: vlan: use br_vlan_(get|put)_master to deal with refcounts")
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
+Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
+Link: https://lore.kernel.org/r/1607071737-33875-1-git-send-email-zhangchangzhong@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_vlan.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/bridge/br_vlan.c
++++ b/net/bridge/br_vlan.c
+@@ -225,8 +225,10 @@ static int __vlan_add(struct net_bridge_
+ 		}
+ 
+ 		masterv = br_vlan_get_master(br, v->vid);
+-		if (!masterv)
++		if (!masterv) {
++			err = -ENOMEM;
+ 			goto out_filt;
++		}
+ 		v->brvlan = masterv;
+ 	}
+ 
diff --git a/queue-4.4/net-mlx4_en-avoid-scheduling-restart-task-if-it-is-already-running.patch b/queue-4.4/net-mlx4_en-avoid-scheduling-restart-task-if-it-is-already-running.patch
new file mode 100644
index 00000000000..eb19a8af040
--- /dev/null
+++ b/queue-4.4/net-mlx4_en-avoid-scheduling-restart-task-if-it-is-already-running.patch
@@ -0,0 +1,114 @@
+From foo@baz Sat Dec 19 12:14:44 PM CET 2020
+From: Moshe Shemesh <moshe@mellanox.com>
+Date: Wed, 9 Dec 2020 15:03:38 +0200
+Subject: net/mlx4_en: Avoid scheduling restart task if it is already running
+
+From: Moshe Shemesh <moshe@mellanox.com>
+
+[ Upstream commit fed91613c9dd455dd154b22fa8e11b8526466082 ]
+
+Add restarting state flag to avoid scheduling another restart task while
+such task is already running. Change task name from watchdog_task to
+restart_task to better fit the task role.
+
+Fixes: 1e338db56e5a ("mlx4_en: Fix a race at restart task")
+Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
+Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/en_netdev.c |   17 +++++++++++------
+ drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |    7 ++++++-
+ 2 files changed, 17 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
++++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+@@ -1313,8 +1313,10 @@ static void mlx4_en_tx_timeout(struct ne
+ 	}
+ 
+ 	priv->port_stats.tx_timeout++;
+-	en_dbg(DRV, priv, "Scheduling watchdog\n");
+-	queue_work(mdev->workqueue, &priv->watchdog_task);
++	if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state)) {
++		en_dbg(DRV, priv, "Scheduling port restart\n");
++		queue_work(mdev->workqueue, &priv->restart_task);
++	}
+ }
+ 
+ 
+@@ -1730,6 +1732,7 @@ int mlx4_en_start_port(struct net_device
+ 		local_bh_enable();
+ 	}
+ 
++	clear_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state);
+ 	netif_tx_start_all_queues(dev);
+ 	netif_device_attach(dev);
+ 
+@@ -1891,7 +1894,7 @@ void mlx4_en_stop_port(struct net_device
+ static void mlx4_en_restart(struct work_struct *work)
+ {
+ 	struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
+-						 watchdog_task);
++						 restart_task);
+ 	struct mlx4_en_dev *mdev = priv->mdev;
+ 	struct net_device *dev = priv->dev;
+ 
+@@ -2121,7 +2124,7 @@ static int mlx4_en_change_mtu(struct net
+ 	if (netif_running(dev)) {
+ 		mutex_lock(&mdev->state_lock);
+ 		if (!mdev->device_up) {
+-			/* NIC is probably restarting - let watchdog task reset
++			/* NIC is probably restarting - let restart task reset
+ 			 * the port */
+ 			en_dbg(DRV, priv, "Change MTU called with card down!?\n");
+ 		} else {
+@@ -2130,7 +2133,9 @@ static int mlx4_en_change_mtu(struct net
+ 			if (err) {
+ 				en_err(priv, "Failed restarting port:%d\n",
+ 					 priv->port);
+-				queue_work(mdev->workqueue, &priv->watchdog_task);
++				if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING,
++						      &priv->state))
++					queue_work(mdev->workqueue, &priv->restart_task);
+ 			}
+ 		}
+ 		mutex_unlock(&mdev->state_lock);
+@@ -2850,7 +2855,7 @@ int mlx4_en_init_netdev(struct mlx4_en_d
+ 	priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev);
+ 	spin_lock_init(&priv->stats_lock);
+ 	INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode);
+-	INIT_WORK(&priv->watchdog_task, mlx4_en_restart);
++	INIT_WORK(&priv->restart_task, mlx4_en_restart);
+ 	INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate);
+ 	INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats);
+ 	INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task);
+--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
++++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+@@ -495,6 +495,10 @@ struct mlx4_en_stats_bitmap {
+ 	struct mutex mutex; /* for mutual access to stats bitmap */
+ };
+ 
++enum {
++	MLX4_EN_STATE_FLAG_RESTARTING,
++};
++
+ struct mlx4_en_priv {
+ 	struct mlx4_en_dev *mdev;
+ 	struct mlx4_en_port_profile *prof;
+@@ -560,7 +564,7 @@ struct mlx4_en_priv {
+ 	struct mlx4_en_cq *rx_cq[MAX_RX_RINGS];
+ 	struct mlx4_qp drop_qp;
+ 	struct work_struct rx_mode_task;
+-	struct work_struct watchdog_task;
++	struct work_struct restart_task;
+ 	struct work_struct linkstate_task;
+ 	struct delayed_work stats_task;
+ 	struct delayed_work service_task;
+@@ -605,6 +609,7 @@ struct mlx4_en_priv {
+ 	u32 pflags;
+ 	u8 rss_key[MLX4_EN_RSS_KEY_SIZE];
+ 	u8 rss_hash_fn;
++	unsigned long state;
+ };
+ 
+ enum mlx4_en_wol {
diff --git a/queue-4.4/net-stmmac-delete-the-eee_ctrl_timer-after-napi-disabled.patch b/queue-4.4/net-stmmac-delete-the-eee_ctrl_timer-after-napi-disabled.patch
new file mode 100644
index 00000000000..acea24c99b2
--- /dev/null
+++ b/queue-4.4/net-stmmac-delete-the-eee_ctrl_timer-after-napi-disabled.patch
@@ -0,0 +1,62 @@
+From foo@baz Sat Dec 19 11:44:48 AM CET 2020
+From: Fugang Duan <fugang.duan@nxp.com>
+Date: Mon, 7 Dec 2020 18:51:40 +0800
+Subject: net: stmmac: delete the eee_ctrl_timer after napi disabled
+
+From: Fugang Duan <fugang.duan@nxp.com>
+
+[ Upstream commit 5f58591323bf3f342920179f24515935c4b5fd60 ]
+
+There have chance to re-enable the eee_ctrl_timer and fire the timer
+in napi callback after delete the timer in .stmmac_release(), which
+introduces to access eee registers in the timer function after clocks
+are disabled then causes system hang. Found this issue when do
+suspend/resume and reboot stress test.
+
+It is safe to delete the timer after napi disabled and disable lpi mode.
+
+Fixes: d765955d2ae0b ("stmmac: add the Energy Efficient Ethernet support")
+Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
+Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |   13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -1897,9 +1897,6 @@ static int stmmac_release(struct net_dev
+ {
+ 	struct stmmac_priv *priv = netdev_priv(dev);
+ 
+-	if (priv->eee_enabled)
+-		del_timer_sync(&priv->eee_ctrl_timer);
+-
+ 	/* Stop and disconnect the PHY */
+ 	if (priv->phydev) {
+ 		phy_stop(priv->phydev);
+@@ -1920,6 +1917,11 @@ static int stmmac_release(struct net_dev
+ 	if (priv->lpi_irq > 0)
+ 		free_irq(priv->lpi_irq, dev);
+ 
++	if (priv->eee_enabled) {
++		priv->tx_path_in_lpi_mode = false;
++		del_timer_sync(&priv->eee_ctrl_timer);
++	}
++
+ 	/* Stop TX/RX DMA and clear the descriptors */
+ 	priv->hw->dma->stop_tx(priv->ioaddr);
+ 	priv->hw->dma->stop_rx(priv->ioaddr);
+@@ -3068,6 +3070,11 @@ int stmmac_suspend(struct net_device *nd
+ 
+ 	napi_disable(&priv->napi);
+ 
++	if (priv->eee_enabled) {
++		priv->tx_path_in_lpi_mode = false;
++		del_timer_sync(&priv->eee_ctrl_timer);
++	}
++
+ 	/* Stop TX/RX DMA */
+ 	priv->hw->dma->stop_tx(priv->ioaddr);
+ 	priv->hw->dma->stop_rx(priv->ioaddr);
diff --git a/queue-4.4/series b/queue-4.4/series
index 47a00eefe5a..cdbf28f13a4 100644
--- a/queue-4.4/series
+++ b/queue-4.4/series
@@ -6,3 +6,7 @@ input-cm109-do-not-stomp-on-control-urb.patch
 input-i8042-add-acer-laptops-to-the-i8042-reset-list.patch
 pinctrl-amd-remove-debounce-filter-setting-in-irq-type-setting.patch
 spi-prevent-adding-devices-below-an-unregistering-controller.patch
+net-mlx4_en-avoid-scheduling-restart-task-if-it-is-already-running.patch
+tcp-fix-cwnd-limited-bug-for-tso-deferral-where-we-send-nothing.patch
+net-stmmac-delete-the-eee_ctrl_timer-after-napi-disabled.patch
+net-bridge-vlan-fix-error-return-code-in-__vlan_add.patch
diff --git a/queue-4.4/tcp-fix-cwnd-limited-bug-for-tso-deferral-where-we-send-nothing.patch b/queue-4.4/tcp-fix-cwnd-limited-bug-for-tso-deferral-where-we-send-nothing.patch
new file mode 100644
index 00000000000..d9ab9379cd3
--- /dev/null
+++ b/queue-4.4/tcp-fix-cwnd-limited-bug-for-tso-deferral-where-we-send-nothing.patch
@@ -0,0 +1,86 @@
+From foo@baz Sat Dec 19 12:14:44 PM CET 2020
+From: Neal Cardwell <ncardwell@google.com>
+Date: Tue, 8 Dec 2020 22:57:59 -0500
+Subject: tcp: fix cwnd-limited bug for TSO deferral where we send nothing
+
+From: Neal Cardwell <ncardwell@google.com>
+
+[ Upstream commit 299bcb55ecd1412f6df606e9dc0912d55610029e ]
+
+When cwnd is not a multiple of the TSO skb size of N*MSS, we can get
+into persistent scenarios where we have the following sequence:
+
+(1) ACK for full-sized skb of N*MSS arrives
+  -> tcp_write_xmit() transmit full-sized skb with N*MSS
+  -> move pacing release time forward
+  -> exit tcp_write_xmit() because pacing time is in the future
+
+(2) TSQ callback or TCP internal pacing timer fires
+  -> try to transmit next skb, but TSO deferral finds remainder of
+     available cwnd is not big enough to trigger an immediate send
+     now, so we defer sending until the next ACK.
+
+(3) repeat...
+
+So we can get into a case where we never mark ourselves as
+cwnd-limited for many seconds at a time, even with
+bulk/infinite-backlog senders, because:
+
+o In case (1) above, every time in tcp_write_xmit() we have enough
+cwnd to send a full-sized skb, we are not fully using the cwnd
+(because cwnd is not a multiple of the TSO skb size). So every time we
+send data, we are not cwnd limited, and so in the cwnd-limited
+tracking code in tcp_cwnd_validate() we mark ourselves as not
+cwnd-limited.
+
+o In case (2) above, every time in tcp_write_xmit() that we try to
+transmit the "remainder" of the cwnd but defer, we set the local
+variable is_cwnd_limited to true, but we do not send any packets, so
+sent_pkts is zero, so we don't call the cwnd-limited logic to update
+tp->is_cwnd_limited.
+
+Fixes: ca8a22634381 ("tcp: make cwnd-limited checks measurement-based, and gentler")
+Reported-by: Ingemar Johansson <ingemar.s.johansson@ericsson.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20201209035759.1225145-1-ncardwell.kernel@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1501,7 +1501,8 @@ static void tcp_cwnd_validate(struct soc
+ 	 * window, and remember whether we were cwnd-limited then.
+ 	 */
+ 	if (!before(tp->snd_una, tp->max_packets_seq) ||
+-	    tp->packets_out > tp->max_packets_out) {
++	    tp->packets_out > tp->max_packets_out ||
++	    is_cwnd_limited) {
+ 		tp->max_packets_out = tp->packets_out;
+ 		tp->max_packets_seq = tp->snd_nxt;
+ 		tp->is_cwnd_limited = is_cwnd_limited;
+@@ -2172,6 +2173,10 @@ repair:
+ 			break;
+ 	}
+ 
++	is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
++	if (likely(sent_pkts || is_cwnd_limited))
++		tcp_cwnd_validate(sk, is_cwnd_limited);
++
+ 	if (likely(sent_pkts)) {
+ 		if (tcp_in_cwnd_reduction(sk))
+ 			tp->prr_out += sent_pkts;
+@@ -2179,8 +2184,6 @@ repair:
+ 		/* Send one loss probe per tail loss episode. */
+ 		if (push_one != 2)
+ 			tcp_schedule_loss_probe(sk);
+-		is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
+-		tcp_cwnd_validate(sk, is_cwnd_limited);
+ 		return false;
+ 	}
+ 	return !tp->packets_out && tcp_send_head(sk);
-- 
2.47.3