]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.6
authorSasha Levin <sashal@kernel.org>
Wed, 29 Nov 2023 19:52:05 +0000 (14:52 -0500)
committerSasha Levin <sashal@kernel.org>
Thu, 30 Nov 2023 13:51:52 +0000 (08:51 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-6.6/dm-delay-fix-a-race-between-delay_presuspend-and-del.patch [new file with mode: 0644]
queue-6.6/series
queue-6.6/veth-use-tstats-per-cpu-traffic-counters.patch [new file with mode: 0644]

diff --git a/queue-6.6/dm-delay-fix-a-race-between-delay_presuspend-and-del.patch b/queue-6.6/dm-delay-fix-a-race-between-delay_presuspend-and-del.patch
new file mode 100644 (file)
index 0000000..5122703
--- /dev/null
@@ -0,0 +1,98 @@
+From e8071595ba85eccf5091cef594e6eab669a486f2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Nov 2023 13:38:43 -0500
+Subject: dm-delay: fix a race between delay_presuspend and delay_bio
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+[ Upstream commit 6fc45b6ed921dc00dfb264dc08c7d67ee63d2656 ]
+
+In delay_presuspend, we set the atomic variable may_delay and then stop
+the timer and flush pending bios. The intention here is to prevent the
+delay target from re-arming the timer again.
+
+However, this test is racy. Suppose that one thread goes to delay_bio,
+sees that dc->may_delay is one and proceeds; now, another thread executes
+delay_presuspend, it sets dc->may_delay to zero, deletes the timer and
+flushes pending bios. Then, the first thread continues and adds the bio to
+delayed->list despite the fact that dc->may_delay is false.
+
+Fix this bug by changing may_delay's type from atomic_t to bool and
+only access it while holding the delayed_bios_lock mutex. Note that we
+don't have to grab the mutex in delay_resume because there are no bios
+in flight at this point.
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/dm-delay.c | 17 ++++++++++++-----
+ 1 file changed, 12 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
+index 7433525e59856..3726fae3006e3 100644
+--- a/drivers/md/dm-delay.c
++++ b/drivers/md/dm-delay.c
+@@ -31,7 +31,7 @@ struct delay_c {
+       struct workqueue_struct *kdelayd_wq;
+       struct work_struct flush_expired_bios;
+       struct list_head delayed_bios;
+-      atomic_t may_delay;
++      bool may_delay;
+       struct delay_class read;
+       struct delay_class write;
+@@ -192,7 +192,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+       INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
+       INIT_LIST_HEAD(&dc->delayed_bios);
+       mutex_init(&dc->timer_lock);
+-      atomic_set(&dc->may_delay, 1);
++      dc->may_delay = true;
+       dc->argc = argc;
+       ret = delay_class_ctr(ti, &dc->read, argv);
+@@ -247,7 +247,7 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
+       struct dm_delay_info *delayed;
+       unsigned long expires = 0;
+-      if (!c->delay || !atomic_read(&dc->may_delay))
++      if (!c->delay)
+               return DM_MAPIO_REMAPPED;
+       delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
+@@ -256,6 +256,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
+       delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
+       mutex_lock(&delayed_bios_lock);
++      if (unlikely(!dc->may_delay)) {
++              mutex_unlock(&delayed_bios_lock);
++              return DM_MAPIO_REMAPPED;
++      }
+       c->ops++;
+       list_add_tail(&delayed->list, &dc->delayed_bios);
+       mutex_unlock(&delayed_bios_lock);
+@@ -269,7 +273,10 @@ static void delay_presuspend(struct dm_target *ti)
+ {
+       struct delay_c *dc = ti->private;
+-      atomic_set(&dc->may_delay, 0);
++      mutex_lock(&delayed_bios_lock);
++      dc->may_delay = false;
++      mutex_unlock(&delayed_bios_lock);
++
+       del_timer_sync(&dc->delay_timer);
+       flush_bios(flush_delayed_bios(dc, 1));
+ }
+@@ -278,7 +285,7 @@ static void delay_resume(struct dm_target *ti)
+ {
+       struct delay_c *dc = ti->private;
+-      atomic_set(&dc->may_delay, 1);
++      dc->may_delay = true;
+ }
+ static int delay_map(struct dm_target *ti, struct bio *bio)
+-- 
+2.42.0
+
index 3c78523897a528f390df4256cb268c8edec0d2c5..50575b3ba9c0e83914f91ac69f1361c8ede9c923 100644 (file)
@@ -61,3 +61,5 @@ cifs-account-for-primary-channel-in-the-interface-li.patch
 cifs-fix-leak-of-iface-for-primary-channel.patch
 alsa-hda-asus-um5302la-added-quirks-for-cs35l41-1043.patch
 alsa-hda-realtek-add-quirks-for-asus-2024-zenbooks.patch
+dm-delay-fix-a-race-between-delay_presuspend-and-del.patch
+veth-use-tstats-per-cpu-traffic-counters.patch
diff --git a/queue-6.6/veth-use-tstats-per-cpu-traffic-counters.patch b/queue-6.6/veth-use-tstats-per-cpu-traffic-counters.patch
new file mode 100644 (file)
index 0000000..cd65f5b
--- /dev/null
@@ -0,0 +1,115 @@
+From 391c1fb72ce46b7e2d5fbd7fcfca26461653e4a3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Nov 2023 01:42:16 +0100
+Subject: veth: Use tstats per-CPU traffic counters
+
+From: Peilin Ye <peilin.ye@bytedance.com>
+
+[ Upstream commit 6f2684bf2b4460c84d0d34612a939f78b96b03fc ]
+
+Currently veth devices use the lstats per-CPU traffic counters, which only
+cover TX traffic. veth_get_stats64() actually populates RX stats of a veth
+device from its peer's TX counters, based on the assumption that a veth
+device can _only_ receive packets from its peer, which is no longer true:
+
+For example, recent CNIs (like Cilium) can use the bpf_redirect_peer() BPF
+helper to redirect traffic from NIC's tc ingress to veth's tc ingress (in
+a different netns), skipping veth's peer device. Unfortunately, this kind
+of traffic isn't currently accounted for in veth's RX stats.
+
+In preparation for the fix, use tstats (instead of lstats) to maintain
+both RX and TX counters for each veth device. We'll use RX counters for
+bpf_redirect_peer() traffic, and keep using TX counters for the usual
+"peer-to-peer" traffic. In veth_get_stats64(), calculate RX stats by
+_adding_ RX count to peer's TX count, in order to cover both kinds of
+traffic.
+
+veth_stats_rx() might need a name change (perhaps to "veth_stats_xdp()")
+for less confusion, but let's leave it to another patch to keep the fix
+minimal.
+
+Signed-off-by: Peilin Ye <peilin.ye@bytedance.com>
+Co-developed-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
+Link: https://lore.kernel.org/r/20231114004220.6495-5-daniel@iogearbox.net
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/veth.c | 30 +++++++++++-------------------
+ 1 file changed, 11 insertions(+), 19 deletions(-)
+
+diff --git a/drivers/net/veth.c b/drivers/net/veth.c
+index af326b91506eb..0f798bcbe25cd 100644
+--- a/drivers/net/veth.c
++++ b/drivers/net/veth.c
+@@ -373,7 +373,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
+       skb_tx_timestamp(skb);
+       if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
+               if (!use_napi)
+-                      dev_lstats_add(dev, length);
++                      dev_sw_netstats_tx_add(dev, 1, length);
+               else
+                       __veth_xdp_flush(rq);
+       } else {
+@@ -387,14 +387,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
+       return ret;
+ }
+-static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
+-{
+-      struct veth_priv *priv = netdev_priv(dev);
+-
+-      dev_lstats_read(dev, packets, bytes);
+-      return atomic64_read(&priv->dropped);
+-}
+-
+ static void veth_stats_rx(struct veth_stats *result, struct net_device *dev)
+ {
+       struct veth_priv *priv = netdev_priv(dev);
+@@ -432,24 +424,24 @@ static void veth_get_stats64(struct net_device *dev,
+       struct veth_priv *priv = netdev_priv(dev);
+       struct net_device *peer;
+       struct veth_stats rx;
+-      u64 packets, bytes;
+-      tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes);
+-      tot->tx_bytes = bytes;
+-      tot->tx_packets = packets;
++      tot->tx_dropped = atomic64_read(&priv->dropped);
++      dev_fetch_sw_netstats(tot, dev->tstats);
+       veth_stats_rx(&rx, dev);
+       tot->tx_dropped += rx.xdp_tx_err;
+       tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err;
+-      tot->rx_bytes = rx.xdp_bytes;
+-      tot->rx_packets = rx.xdp_packets;
++      tot->rx_bytes += rx.xdp_bytes;
++      tot->rx_packets += rx.xdp_packets;
+       rcu_read_lock();
+       peer = rcu_dereference(priv->peer);
+       if (peer) {
+-              veth_stats_tx(peer, &packets, &bytes);
+-              tot->rx_bytes += bytes;
+-              tot->rx_packets += packets;
++              struct rtnl_link_stats64 tot_peer = {};
++
++              dev_fetch_sw_netstats(&tot_peer, peer->tstats);
++              tot->rx_bytes += tot_peer.tx_bytes;
++              tot->rx_packets += tot_peer.tx_packets;
+               veth_stats_rx(&rx, peer);
+               tot->tx_dropped += rx.peer_tq_xdp_xmit_err;
+@@ -1776,7 +1768,7 @@ static void veth_setup(struct net_device *dev)
+                              NETIF_F_HW_VLAN_STAG_RX);
+       dev->needs_free_netdev = true;
+       dev->priv_destructor = veth_dev_free;
+-      dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS;
++      dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
+       dev->max_mtu = ETH_MAX_MTU;
+       dev->hw_features = VETH_FEATURES;
+-- 
+2.42.0
+