From: Sasha Levin <sashal@kernel.org>
Date: Wed, 29 Nov 2023 19:52:05 +0000 (-0500)
Subject: Fixes for 6.6
X-Git-Tag: v5.15.141~15^2~5
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=f932360cafc59a675f93e4e1cb59f431ea284cf9;p=thirdparty%2Fkernel%2Fstable-queue.git

Fixes for 6.6

Signed-off-by: Sasha Levin <sashal@kernel.org>
---

diff --git a/queue-6.6/dm-delay-fix-a-race-between-delay_presuspend-and-del.patch b/queue-6.6/dm-delay-fix-a-race-between-delay_presuspend-and-del.patch
new file mode 100644
index 00000000000..5122703ad86
--- /dev/null
+++ b/queue-6.6/dm-delay-fix-a-race-between-delay_presuspend-and-del.patch
@@ -0,0 +1,98 @@
+From e8071595ba85eccf5091cef594e6eab669a486f2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Nov 2023 13:38:43 -0500
+Subject: dm-delay: fix a race between delay_presuspend and delay_bio
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+[ Upstream commit 6fc45b6ed921dc00dfb264dc08c7d67ee63d2656 ]
+
+In delay_presuspend, we set the atomic variable may_delay and then stop
+the timer and flush pending bios. The intention here is to prevent the
+delay target from re-arming the timer again.
+
+However, this test is racy. Suppose that one thread goes to delay_bio,
+sees that dc->may_delay is one and proceeds; now, another thread executes
+delay_presuspend, it sets dc->may_delay to zero, deletes the timer and
+flushes pending bios. Then, the first thread continues and adds the bio to
+delayed->list despite the fact that dc->may_delay is false.
+
+Fix this bug by changing may_delay's type from atomic_t to bool and
+only access it while holding the delayed_bios_lock mutex. Note that we
+don't have to grab the mutex in delay_resume because there are no bios
+in flight at this point.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/dm-delay.c | 17 ++++++++++++-----
+ 1 file changed, 12 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
+index 7433525e59856..3726fae3006e3 100644
+--- a/drivers/md/dm-delay.c
++++ b/drivers/md/dm-delay.c
+@@ -31,7 +31,7 @@ struct delay_c {
+ 	struct workqueue_struct *kdelayd_wq;
+ 	struct work_struct flush_expired_bios;
+ 	struct list_head delayed_bios;
+-	atomic_t may_delay;
++	bool may_delay;
+ 
+ 	struct delay_class read;
+ 	struct delay_class write;
+@@ -192,7 +192,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+ 	INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
+ 	INIT_LIST_HEAD(&dc->delayed_bios);
+ 	mutex_init(&dc->timer_lock);
+-	atomic_set(&dc->may_delay, 1);
++	dc->may_delay = true;
+ 	dc->argc = argc;
+ 
+ 	ret = delay_class_ctr(ti, &dc->read, argv);
+@@ -247,7 +247,7 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
+ 	struct dm_delay_info *delayed;
+ 	unsigned long expires = 0;
+ 
+-	if (!c->delay || !atomic_read(&dc->may_delay))
++	if (!c->delay)
+ 		return DM_MAPIO_REMAPPED;
+ 
+ 	delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
+@@ -256,6 +256,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
+ 	delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
+ 
+ 	mutex_lock(&delayed_bios_lock);
++	if (unlikely(!dc->may_delay)) {
++		mutex_unlock(&delayed_bios_lock);
++		return DM_MAPIO_REMAPPED;
++	}
+ 	c->ops++;
+ 	list_add_tail(&delayed->list, &dc->delayed_bios);
+ 	mutex_unlock(&delayed_bios_lock);
+@@ -269,7 +273,10 @@ static void delay_presuspend(struct dm_target *ti)
+ {
+ 	struct delay_c *dc = ti->private;
+ 
+-	atomic_set(&dc->may_delay, 0);
++	mutex_lock(&delayed_bios_lock);
++	dc->may_delay = false;
++	mutex_unlock(&delayed_bios_lock);
++
+ 	del_timer_sync(&dc->delay_timer);
+ 	flush_bios(flush_delayed_bios(dc, 1));
+ }
+@@ -278,7 +285,7 @@ static void delay_resume(struct dm_target *ti)
+ {
+ 	struct delay_c *dc = ti->private;
+ 
+-	atomic_set(&dc->may_delay, 1);
++	dc->may_delay = true;
+ }
+ 
+ static int delay_map(struct dm_target *ti, struct bio *bio)
+-- 
+2.42.0
+
diff --git a/queue-6.6/series b/queue-6.6/series
index 3c78523897a..50575b3ba9c 100644
--- a/queue-6.6/series
+++ b/queue-6.6/series
@@ -61,3 +61,5 @@ cifs-account-for-primary-channel-in-the-interface-li.patch
 cifs-fix-leak-of-iface-for-primary-channel.patch
 alsa-hda-asus-um5302la-added-quirks-for-cs35l41-1043.patch
 alsa-hda-realtek-add-quirks-for-asus-2024-zenbooks.patch
+dm-delay-fix-a-race-between-delay_presuspend-and-del.patch
+veth-use-tstats-per-cpu-traffic-counters.patch
diff --git a/queue-6.6/veth-use-tstats-per-cpu-traffic-counters.patch b/queue-6.6/veth-use-tstats-per-cpu-traffic-counters.patch
new file mode 100644
index 00000000000..cd65f5be3fa
--- /dev/null
+++ b/queue-6.6/veth-use-tstats-per-cpu-traffic-counters.patch
@@ -0,0 +1,115 @@
+From 391c1fb72ce46b7e2d5fbd7fcfca26461653e4a3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Nov 2023 01:42:16 +0100
+Subject: veth: Use tstats per-CPU traffic counters
+
+From: Peilin Ye <peilin.ye@bytedance.com>
+
+[ Upstream commit 6f2684bf2b4460c84d0d34612a939f78b96b03fc ]
+
+Currently veth devices use the lstats per-CPU traffic counters, which only
+cover TX traffic. veth_get_stats64() actually populates RX stats of a veth
+device from its peer's TX counters, based on the assumption that a veth
+device can _only_ receive packets from its peer, which is no longer true:
+
+For example, recent CNIs (like Cilium) can use the bpf_redirect_peer() BPF
+helper to redirect traffic from NIC's tc ingress to veth's tc ingress (in
+a different netns), skipping veth's peer device. Unfortunately, this kind
+of traffic isn't currently accounted for in veth's RX stats.
+
+In preparation for the fix, use tstats (instead of lstats) to maintain
+both RX and TX counters for each veth device. We'll use RX counters for
+bpf_redirect_peer() traffic, and keep using TX counters for the usual
+"peer-to-peer" traffic. In veth_get_stats64(), calculate RX stats by
+_adding_ RX count to peer's TX count, in order to cover both kinds of
+traffic.
+
+veth_stats_rx() might need a name change (perhaps to "veth_stats_xdp()")
+for less confusion, but let's leave it to another patch to keep the fix
+minimal.
+
+Signed-off-by: Peilin Ye <peilin.ye@bytedance.com>
+Co-developed-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
+Link: https://lore.kernel.org/r/20231114004220.6495-5-daniel@iogearbox.net
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/veth.c | 30 +++++++++++-------------------
+ 1 file changed, 11 insertions(+), 19 deletions(-)
+
+diff --git a/drivers/net/veth.c b/drivers/net/veth.c
+index af326b91506eb..0f798bcbe25cd 100644
+--- a/drivers/net/veth.c
++++ b/drivers/net/veth.c
+@@ -373,7 +373,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
+ 	skb_tx_timestamp(skb);
+ 	if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
+ 		if (!use_napi)
+-			dev_lstats_add(dev, length);
++			dev_sw_netstats_tx_add(dev, 1, length);
+ 		else
+ 			__veth_xdp_flush(rq);
+ 	} else {
+@@ -387,14 +387,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
+ 	return ret;
+ }
+ 
+-static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
+-{
+-	struct veth_priv *priv = netdev_priv(dev);
+-
+-	dev_lstats_read(dev, packets, bytes);
+-	return atomic64_read(&priv->dropped);
+-}
+-
+ static void veth_stats_rx(struct veth_stats *result, struct net_device *dev)
+ {
+ 	struct veth_priv *priv = netdev_priv(dev);
+@@ -432,24 +424,24 @@ static void veth_get_stats64(struct net_device *dev,
+ 	struct veth_priv *priv = netdev_priv(dev);
+ 	struct net_device *peer;
+ 	struct veth_stats rx;
+-	u64 packets, bytes;
+ 
+-	tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes);
+-	tot->tx_bytes = bytes;
+-	tot->tx_packets = packets;
++	tot->tx_dropped = atomic64_read(&priv->dropped);
++	dev_fetch_sw_netstats(tot, dev->tstats);
+ 
+ 	veth_stats_rx(&rx, dev);
+ 	tot->tx_dropped += rx.xdp_tx_err;
+ 	tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err;
+-	tot->rx_bytes = rx.xdp_bytes;
+-	tot->rx_packets = rx.xdp_packets;
++	tot->rx_bytes += rx.xdp_bytes;
++	tot->rx_packets += rx.xdp_packets;
+ 
+ 	rcu_read_lock();
+ 	peer = rcu_dereference(priv->peer);
+ 	if (peer) {
+-		veth_stats_tx(peer, &packets, &bytes);
+-		tot->rx_bytes += bytes;
+-		tot->rx_packets += packets;
++		struct rtnl_link_stats64 tot_peer = {};
++
++		dev_fetch_sw_netstats(&tot_peer, peer->tstats);
++		tot->rx_bytes += tot_peer.tx_bytes;
++		tot->rx_packets += tot_peer.tx_packets;
+ 
+ 		veth_stats_rx(&rx, peer);
+ 		tot->tx_dropped += rx.peer_tq_xdp_xmit_err;
+@@ -1776,7 +1768,7 @@ static void veth_setup(struct net_device *dev)
+ 			       NETIF_F_HW_VLAN_STAG_RX);
+ 	dev->needs_free_netdev = true;
+ 	dev->priv_destructor = veth_dev_free;
+-	dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS;
++	dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
+ 	dev->max_mtu = ETH_MAX_MTU;
+ 
+ 	dev->hw_features = VETH_FEATURES;
+-- 
+2.42.0
+