]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.1
authorSasha Levin <sashal@kernel.org>
Sat, 29 Jun 2024 11:50:00 +0000 (07:50 -0400)
committerSasha Levin <sashal@kernel.org>
Sat, 29 Jun 2024 11:50:00 +0000 (07:50 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
21 files changed:
queue-6.1/asoc-amd-acp-remove-i2s-configuration-check-in-acp_i.patch [new file with mode: 0644]
queue-6.1/asoc-fsl-asoc-card-set-priv-pdev-before-using-it.patch [new file with mode: 0644]
queue-6.1/asoc-rockchip-i2s-tdm-fix-trcm-mode-by-setting-clock.patch [new file with mode: 0644]
queue-6.1/bpf-fix-overrunning-reservations-in-ringbuf.patch [new file with mode: 0644]
queue-6.1/fix-race-for-duplicate-reqsk-on-identical-syn.patch [new file with mode: 0644]
queue-6.1/ibmvnic-free-any-outstanding-tx-skbs-during-scrq-res.patch [new file with mode: 0644]
queue-6.1/mlxsw-spectrum_buffers-fix-memory-corruptions-on-spe.patch [new file with mode: 0644]
queue-6.1/net-dsa-microchip-fix-initial-port-flush-problem.patch [new file with mode: 0644]
queue-6.1/net-dsa-microchip-fix-wrong-register-write-when-mask.patch [new file with mode: 0644]
queue-6.1/net-dsa-microchip-use-collision-based-back-pressure-.patch [new file with mode: 0644]
queue-6.1/net-phy-micrel-add-microchip-ksz-9477-to-the-device-.patch [new file with mode: 0644]
queue-6.1/netfilter-nf_tables-fully-validate-nft_data_value-on.patch [new file with mode: 0644]
queue-6.1/parisc-use-correct-compat-recv-recvfrom-syscalls.patch [new file with mode: 0644]
queue-6.1/powerpc-restore-some-missing-spu-syscalls.patch [new file with mode: 0644]
queue-6.1/s390-pci-add-missing-virt_to_phys-for-directed-dibv.patch [new file with mode: 0644]
queue-6.1/series
queue-6.1/sparc-fix-compat-recv-recvfrom-syscalls.patch [new file with mode: 0644]
queue-6.1/sparc-fix-old-compat_sys_select.patch [new file with mode: 0644]
queue-6.1/tcp-fix-tcp_rcv_fastopen_synack-to-enter-tcp_ca_loss.patch [new file with mode: 0644]
queue-6.1/tracing-net_sched-null-pointer-dereference-in-perf_t.patch [new file with mode: 0644]
queue-6.1/xdp-remove-warn-from-__xdp_reg_mem_model.patch [new file with mode: 0644]

diff --git a/queue-6.1/asoc-amd-acp-remove-i2s-configuration-check-in-acp_i.patch b/queue-6.1/asoc-amd-acp-remove-i2s-configuration-check-in-acp_i.patch
new file mode 100644 (file)
index 0000000..027e882
--- /dev/null
@@ -0,0 +1,53 @@
+From 849c98404a6d03e16facedc79bc4331b5cffd315 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jun 2024 12:58:35 +0530
+Subject: ASoC: amd: acp: remove i2s configuration check in acp_i2s_probe()
+
+From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
+
+[ Upstream commit 70fa3900c3ed92158628710e81d274e5cb52f92b ]
+
+ACP supports different pin configurations for I2S IO. Checking ACP pin
+configuration value against specific value breaks the functionality for
+other I2S pin configurations. This check is no longer required in i2s dai
+driver probe call as i2s configuration check will be verified during acp
+platform device creation sequence.
+Remove i2s_mode check in acp_i2s_probe() function.
+
+Fixes: b24484c18b10 ("ASoC: amd: acp: ACP code generic to support newer platforms")
+Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
+Link: https://msgid.link/r/20240617072844.871468-2-Vijendar.Mukunda@amd.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/amd/acp/acp-i2s.c | 8 --------
+ 1 file changed, 8 deletions(-)
+
+diff --git a/sound/soc/amd/acp/acp-i2s.c b/sound/soc/amd/acp/acp-i2s.c
+index ac416572db0d3..3c78207f9ad9d 100644
+--- a/sound/soc/amd/acp/acp-i2s.c
++++ b/sound/soc/amd/acp/acp-i2s.c
+@@ -537,20 +537,12 @@ int asoc_acp_i2s_probe(struct snd_soc_dai *dai)
+ {
+       struct device *dev = dai->component->dev;
+       struct acp_dev_data *adata = dev_get_drvdata(dev);
+-      struct acp_resource *rsrc = adata->rsrc;
+-      unsigned int val;
+       if (!adata->acp_base) {
+               dev_err(dev, "I2S base is NULL\n");
+               return -EINVAL;
+       }
+-      val = readl(adata->acp_base + rsrc->i2s_pin_cfg_offset);
+-      if (val != rsrc->i2s_mode) {
+-              dev_err(dev, "I2S Mode not supported val %x\n", val);
+-              return -EINVAL;
+-      }
+-
+       return 0;
+ }
+ EXPORT_SYMBOL_NS_GPL(asoc_acp_i2s_probe, SND_SOC_ACP_COMMON);
+-- 
+2.43.0
+
diff --git a/queue-6.1/asoc-fsl-asoc-card-set-priv-pdev-before-using-it.patch b/queue-6.1/asoc-fsl-asoc-card-set-priv-pdev-before-using-it.patch
new file mode 100644 (file)
index 0000000..46e9737
--- /dev/null
@@ -0,0 +1,54 @@
+From e33ae4e8493ce1764be8a517cb35a5fc10aec2c1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jun 2024 15:25:03 +0200
+Subject: ASoC: fsl-asoc-card: set priv->pdev before using it
+
+From: Elinor Montmasson <elinor.montmasson@savoirfairelinux.com>
+
+[ Upstream commit 90f3feb24172185f1832636264943e8b5e289245 ]
+
+priv->pdev pointer was set after being used in
+fsl_asoc_card_audmux_init().
+Move this assignment at the start of the probe function, so
+sub-functions can correctly use pdev through priv.
+
+fsl_asoc_card_audmux_init() dereferences priv->pdev to get access to the
+dev struct, used with dev_err macros.
+As priv is zero-initialised, there would be a NULL pointer dereference.
+Note that if priv->dev is dereferenced before assignment but never used,
+for example if there is no error to be printed, the driver won't crash
+probably due to compiler optimisations.
+
+Fixes: 708b4351f08c ("ASoC: fsl: Add Freescale Generic ASoC Sound Card with ASRC support")
+Signed-off-by: Elinor Montmasson <elinor.montmasson@savoirfairelinux.com>
+Link: https://patch.msgid.link/20240620132511.4291-2-elinor.montmasson@savoirfairelinux.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/fsl/fsl-asoc-card.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c
+index 8d14b5593658d..8b29099975c91 100644
+--- a/sound/soc/fsl/fsl-asoc-card.c
++++ b/sound/soc/fsl/fsl-asoc-card.c
+@@ -545,6 +545,8 @@ static int fsl_asoc_card_probe(struct platform_device *pdev)
+       if (!priv)
+               return -ENOMEM;
++      priv->pdev = pdev;
++
+       cpu_np = of_parse_phandle(np, "audio-cpu", 0);
+       /* Give a chance to old DT binding */
+       if (!cpu_np)
+@@ -754,7 +756,6 @@ static int fsl_asoc_card_probe(struct platform_device *pdev)
+       }
+       /* Initialize sound card */
+-      priv->pdev = pdev;
+       priv->card.dev = &pdev->dev;
+       priv->card.owner = THIS_MODULE;
+       ret = snd_soc_of_parse_card_name(&priv->card, "model");
+-- 
+2.43.0
+
diff --git a/queue-6.1/asoc-rockchip-i2s-tdm-fix-trcm-mode-by-setting-clock.patch b/queue-6.1/asoc-rockchip-i2s-tdm-fix-trcm-mode-by-setting-clock.patch
new file mode 100644 (file)
index 0000000..a4cd6d0
--- /dev/null
@@ -0,0 +1,56 @@
+From db81fed80cf1355b4166ae2a80e1119dd08bd4e5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jun 2024 21:47:52 +0300
+Subject: ASoC: rockchip: i2s-tdm: Fix trcm mode by setting clock on right mclk
+
+From: Alibek Omarov <a1ba.omarov@gmail.com>
+
+[ Upstream commit ccd8d753f0fe8f16745fa2b6be5946349731d901 ]
+
+When TRCM mode is enabled, I2S RX and TX clocks are synchronized through
+selected clock source. Without this fix BCLK and LRCK might get parented
+to an uninitialized MCLK and the DAI will receive data at wrong pace.
+
+However, unlike in original i2s-tdm driver, there is no need to manually
+synchronize mclk_rx and mclk_tx, as only one gets used anyway.
+
+Tested on a board with RK3568 SoC and Silergy SY24145S codec with enabled and
+disabled TRCM mode.
+
+Fixes: 9e2ab4b18ebd ("ASoC: rockchip: i2s-tdm: Fix inaccurate sampling rates")
+Signed-off-by: Alibek Omarov <a1ba.omarov@gmail.com>
+Reviewed-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
+Link: https://msgid.link/r/20240604184752.697313-1-a1ba.omarov@gmail.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/rockchip/rockchip_i2s_tdm.c | 13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c
+index 2e36a97077b99..bcea52fa45a50 100644
+--- a/sound/soc/rockchip/rockchip_i2s_tdm.c
++++ b/sound/soc/rockchip/rockchip_i2s_tdm.c
+@@ -651,8 +651,17 @@ static int rockchip_i2s_tdm_hw_params(struct snd_pcm_substream *substream,
+       int err;
+       if (i2s_tdm->is_master_mode) {
+-              struct clk *mclk = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ?
+-                      i2s_tdm->mclk_tx : i2s_tdm->mclk_rx;
++              struct clk *mclk;
++
++              if (i2s_tdm->clk_trcm == TRCM_TX) {
++                      mclk = i2s_tdm->mclk_tx;
++              } else if (i2s_tdm->clk_trcm == TRCM_RX) {
++                      mclk = i2s_tdm->mclk_rx;
++              } else if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
++                      mclk = i2s_tdm->mclk_tx;
++              } else {
++                      mclk = i2s_tdm->mclk_rx;
++              }
+               err = clk_set_rate(mclk, DEFAULT_MCLK_FS * params_rate(params));
+               if (err)
+-- 
+2.43.0
+
diff --git a/queue-6.1/bpf-fix-overrunning-reservations-in-ringbuf.patch b/queue-6.1/bpf-fix-overrunning-reservations-in-ringbuf.patch
new file mode 100644 (file)
index 0000000..04163c8
--- /dev/null
@@ -0,0 +1,152 @@
+From cba35458b700bf2674a59f16e0ef8c6436eb09c9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jun 2024 16:08:27 +0200
+Subject: bpf: Fix overrunning reservations in ringbuf
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit cfa1a2329a691ffd991fcf7248a57d752e712881 ]
+
+The BPF ring buffer internally is implemented as a power-of-2 sized circular
+buffer, with two logical and ever-increasing counters: consumer_pos is the
+consumer counter to show which logical position the consumer consumed the
+data, and producer_pos which is the producer counter denoting the amount of
+data reserved by all producers.
+
+Each time a record is reserved, the producer that "owns" the record will
+successfully advance producer counter. In user space each time a record is
+read, the consumer of the data advanced the consumer counter once it finished
+processing. Both counters are stored in separate pages so that from user
+space, the producer counter is read-only and the consumer counter is read-write.
+
+One aspect that simplifies and thus speeds up the implementation of both
+producers and consumers is how the data area is mapped twice contiguously
+back-to-back in the virtual memory, allowing to not take any special measures
+for samples that have to wrap around at the end of the circular buffer data
+area, because the next page after the last data page would be first data page
+again, and thus the sample will still appear completely contiguous in virtual
+memory.
+
+Each record has a struct bpf_ringbuf_hdr { u32 len; u32 pg_off; } header for
+book-keeping the length and offset, and is inaccessible to the BPF program.
+Helpers like bpf_ringbuf_reserve() return `(void *)hdr + BPF_RINGBUF_HDR_SZ`
+for the BPF program to use. Bing-Jhong and Muhammad reported that it is however
+possible to make a second allocated memory chunk overlapping with the first
+chunk and as a result, the BPF program is now able to edit first chunk's
+header.
+
+For example, consider the creation of a BPF_MAP_TYPE_RINGBUF map with size
+of 0x4000. Next, the consumer_pos is modified to 0x3000 /before/ a call to
+bpf_ringbuf_reserve() is made. This will allocate a chunk A, which is in
+[0x0,0x3008], and the BPF program is able to edit [0x8,0x3008]. Now, lets
+allocate a chunk B with size 0x3000. This will succeed because consumer_pos
+was edited ahead of time to pass the `new_prod_pos - cons_pos > rb->mask`
+check. Chunk B will be in range [0x3008,0x6010], and the BPF program is able
+to edit [0x3010,0x6010]. Due to the ring buffer memory layout mentioned
+earlier, the ranges [0x0,0x4000] and [0x4000,0x8000] point to the same data
+pages. This means that chunk B at [0x4000,0x4008] is chunk A's header.
+bpf_ringbuf_submit() / bpf_ringbuf_discard() use the header's pg_off to then
+locate the bpf_ringbuf itself via bpf_ringbuf_restore_from_rec(). Once chunk
+B modified chunk A's header, then bpf_ringbuf_commit() refers to the wrong
+page and could cause a crash.
+
+Fix it by calculating the oldest pending_pos and check whether the range
+from the oldest outstanding record to the newest would span beyond the ring
+buffer size. If that is the case, then reject the request. We've tested with
+the ring buffer benchmark in BPF selftests (./benchs/run_bench_ringbufs.sh)
+before/after the fix and while it seems a bit slower on some benchmarks, it
+is still not significantly enough to matter.
+
+Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it")
+Reported-by: Bing-Jhong Billy Jheng <billy@starlabs.sg>
+Reported-by: Muhammad Ramdhan <ramdhan@starlabs.sg>
+Co-developed-by: Bing-Jhong Billy Jheng <billy@starlabs.sg>
+Co-developed-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Bing-Jhong Billy Jheng <billy@starlabs.sg>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Link: https://lore.kernel.org/bpf/20240621140828.18238-1-daniel@iogearbox.net
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/ringbuf.c | 31 +++++++++++++++++++++++++------
+ 1 file changed, 25 insertions(+), 6 deletions(-)
+
+diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
+index 9e832acf46925..a1911391a864c 100644
+--- a/kernel/bpf/ringbuf.c
++++ b/kernel/bpf/ringbuf.c
+@@ -59,7 +59,8 @@ struct bpf_ringbuf {
+        * This prevents a user-space application from modifying the
+        * position and ruining in-kernel tracking. The permissions of the
+        * pages depend on who is producing samples: user-space or the
+-       * kernel.
++       * kernel. Note that the pending counter is placed in the same
++       * page as the producer, so that it shares the same cache line.
+        *
+        * Kernel-producer
+        * ---------------
+@@ -78,6 +79,7 @@ struct bpf_ringbuf {
+        */
+       unsigned long consumer_pos __aligned(PAGE_SIZE);
+       unsigned long producer_pos __aligned(PAGE_SIZE);
++      unsigned long pending_pos;
+       char data[] __aligned(PAGE_SIZE);
+ };
+@@ -176,6 +178,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
+       rb->mask = data_sz - 1;
+       rb->consumer_pos = 0;
+       rb->producer_pos = 0;
++      rb->pending_pos = 0;
+       return rb;
+ }
+@@ -390,9 +393,9 @@ bpf_ringbuf_restore_from_rec(struct bpf_ringbuf_hdr *hdr)
+ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
+ {
+-      unsigned long cons_pos, prod_pos, new_prod_pos, flags;
+-      u32 len, pg_off;
++      unsigned long cons_pos, prod_pos, new_prod_pos, pend_pos, flags;
+       struct bpf_ringbuf_hdr *hdr;
++      u32 len, pg_off, tmp_size, hdr_len;
+       if (unlikely(size > RINGBUF_MAX_RECORD_SZ))
+               return NULL;
+@@ -410,13 +413,29 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
+               spin_lock_irqsave(&rb->spinlock, flags);
+       }
++      pend_pos = rb->pending_pos;
+       prod_pos = rb->producer_pos;
+       new_prod_pos = prod_pos + len;
+-      /* check for out of ringbuf space by ensuring producer position
+-       * doesn't advance more than (ringbuf_size - 1) ahead
++      while (pend_pos < prod_pos) {
++              hdr = (void *)rb->data + (pend_pos & rb->mask);
++              hdr_len = READ_ONCE(hdr->len);
++              if (hdr_len & BPF_RINGBUF_BUSY_BIT)
++                      break;
++              tmp_size = hdr_len & ~BPF_RINGBUF_DISCARD_BIT;
++              tmp_size = round_up(tmp_size + BPF_RINGBUF_HDR_SZ, 8);
++              pend_pos += tmp_size;
++      }
++      rb->pending_pos = pend_pos;
++
++      /* check for out of ringbuf space:
++       * - by ensuring producer position doesn't advance more than
++       *   (ringbuf_size - 1) ahead
++       * - by ensuring oldest not yet committed record until newest
++       *   record does not span more than (ringbuf_size - 1)
+        */
+-      if (new_prod_pos - cons_pos > rb->mask) {
++      if (new_prod_pos - cons_pos > rb->mask ||
++          new_prod_pos - pend_pos > rb->mask) {
+               spin_unlock_irqrestore(&rb->spinlock, flags);
+               return NULL;
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.1/fix-race-for-duplicate-reqsk-on-identical-syn.patch b/queue-6.1/fix-race-for-duplicate-reqsk-on-identical-syn.patch
new file mode 100644 (file)
index 0000000..7609664
--- /dev/null
@@ -0,0 +1,195 @@
+From 230a6bb738b4adc62cf9e728bd8667bdbb967c13 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jun 2024 09:39:29 +0800
+Subject: Fix race for duplicate reqsk on identical SYN
+
+From: luoxuanqiang <luoxuanqiang@kylinos.cn>
+
+[ Upstream commit ff46e3b4421923937b7f6e44ffcd3549a074f321 ]
+
+When bonding is configured in BOND_MODE_BROADCAST mode, if two identical
+SYN packets are received at the same time and processed on different CPUs,
+it can potentially create the same sk (sock) but two different reqsk
+(request_sock) in tcp_conn_request().
+
+These two different reqsk will respond with two SYNACK packets, and since
+the generation of the seq (ISN) incorporates a timestamp, the final two
+SYNACK packets will have different seq values.
+
+The consequence is that when the Client receives and replies with an ACK
+to the earlier SYNACK packet, we will reset(RST) it.
+
+========================================================================
+
+This behavior is consistently reproducible in my local setup,
+which comprises:
+
+                  | NETA1 ------ NETB1 |
+PC_A --- bond --- |                    | --- bond --- PC_B
+                  | NETA2 ------ NETB2 |
+
+- PC_A is the Server and has two network cards, NETA1 and NETA2. I have
+  bonded these two cards using BOND_MODE_BROADCAST mode and configured
+  them to be handled by different CPU.
+
+- PC_B is the Client, also equipped with two network cards, NETB1 and
+  NETB2, which are also bonded and configured in BOND_MODE_BROADCAST mode.
+
+If the client attempts a TCP connection to the server, it might encounter
+a failure. Capturing packets from the server side reveals:
+
+10.10.10.10.45182 > localhost: Flags [S], seq 320236027,
+10.10.10.10.45182 > localhost: Flags [S], seq 320236027,
+localhost > 10.10.10.10.45182: Flags [S.], seq 2967855116,
+localhost > 10.10.10.10.45182: Flags [S.], seq 2967855123, <==
+10.10.10.10.45182 > localhost: Flags [.], ack 4294967290,
+10.10.10.10.45182 > localhost: Flags [.], ack 4294967290,
+localhost > 10.10.10.10.45182: Flags [R], seq 2967855117, <==
+localhost > 10.10.10.10.45182: Flags [R], seq 2967855117,
+
+Two SYNACKs with different seq numbers are sent by localhost,
+resulting in an anomaly.
+
+========================================================================
+
+The attempted solution is as follows:
+Add a return value to inet_csk_reqsk_queue_hash_add() to confirm if the
+ehash insertion is successful (Up to now, the reason for unsuccessful
+insertion is that a reqsk for the same connection has already been
+inserted). If the insertion fails, release the reqsk.
+
+Due to the refcnt, Kuniyuki suggests also adding a return value check
+for the DCCP module; if ehash insertion fails, indicating a successful
+insertion of the same connection, simply release the reqsk as well.
+
+Simultaneously, In the reqsk_queue_hash_req(), the start of the
+req->rsk_timer is adjusted to be after successful insertion.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: luoxuanqiang <luoxuanqiang@kylinos.cn>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20240621013929.1386815-1-luoxuanqiang@kylinos.cn
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/inet_connection_sock.h |  2 +-
+ net/dccp/ipv4.c                    |  7 +++++--
+ net/dccp/ipv6.c                    |  7 +++++--
+ net/ipv4/inet_connection_sock.c    | 17 +++++++++++++----
+ net/ipv4/tcp_input.c               |  7 ++++++-
+ 5 files changed, 30 insertions(+), 10 deletions(-)
+
+diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
+index 8132f330306db..4242f863f5601 100644
+--- a/include/net/inet_connection_sock.h
++++ b/include/net/inet_connection_sock.h
+@@ -263,7 +263,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
+ struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
+                                     struct request_sock *req,
+                                     struct sock *child);
+-void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
++bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
+                                  unsigned long timeout);
+ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
+                                        struct request_sock *req,
+diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
+index 9fe6d96797169..f4a2dce3e1048 100644
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -654,8 +654,11 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
+       if (dccp_v4_send_response(sk, req))
+               goto drop_and_free;
+-      inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
+-      reqsk_put(req);
++      if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT)))
++              reqsk_free(req);
++      else
++              reqsk_put(req);
++
+       return 0;
+ drop_and_free:
+diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
+index e0b0bf75a46c2..016af0301366d 100644
+--- a/net/dccp/ipv6.c
++++ b/net/dccp/ipv6.c
+@@ -397,8 +397,11 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
+       if (dccp_v6_send_response(sk, req))
+               goto drop_and_free;
+-      inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
+-      reqsk_put(req);
++      if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT)))
++              reqsk_free(req);
++      else
++              reqsk_put(req);
++
+       return 0;
+ drop_and_free:
+diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
+index 8407098a59391..c267c5e066e94 100644
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -1113,25 +1113,34 @@ static void reqsk_timer_handler(struct timer_list *t)
+       inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
+ }
+-static void reqsk_queue_hash_req(struct request_sock *req,
++static bool reqsk_queue_hash_req(struct request_sock *req,
+                                unsigned long timeout)
+ {
++      bool found_dup_sk = false;
++
++      if (!inet_ehash_insert(req_to_sk(req), NULL, &found_dup_sk))
++              return false;
++
++      /* The timer needs to be setup after a successful insertion. */
+       timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
+       mod_timer(&req->rsk_timer, jiffies + timeout);
+-      inet_ehash_insert(req_to_sk(req), NULL, NULL);
+       /* before letting lookups find us, make sure all req fields
+        * are committed to memory and refcnt initialized.
+        */
+       smp_wmb();
+       refcount_set(&req->rsk_refcnt, 2 + 1);
++      return true;
+ }
+-void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
++bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
+                                  unsigned long timeout)
+ {
+-      reqsk_queue_hash_req(req, timeout);
++      if (!reqsk_queue_hash_req(req, timeout))
++              return false;
++
+       inet_csk_reqsk_queue_added(sk);
++      return true;
+ }
+ EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index d85dd394d5b44..852745a90aa8d 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -7053,7 +7053,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
+               tcp_rsk(req)->tfo_listener = false;
+               if (!want_cookie) {
+                       req->timeout = tcp_timeout_init((struct sock *)req);
+-                      inet_csk_reqsk_queue_hash_add(sk, req, req->timeout);
++                      if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req,
++                                                                  req->timeout))) {
++                              reqsk_free(req);
++                              return 0;
++                      }
++
+               }
+               af_ops->send_synack(sk, dst, &fl, req, &foc,
+                                   !want_cookie ? TCP_SYNACK_NORMAL :
+-- 
+2.43.0
+
diff --git a/queue-6.1/ibmvnic-free-any-outstanding-tx-skbs-during-scrq-res.patch b/queue-6.1/ibmvnic-free-any-outstanding-tx-skbs-during-scrq-res.patch
new file mode 100644 (file)
index 0000000..a5e1368
--- /dev/null
@@ -0,0 +1,60 @@
+From 3a73a798b882b6724decc8929bb87cde362d93a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jun 2024 10:23:12 -0500
+Subject: ibmvnic: Free any outstanding tx skbs during scrq reset
+
+From: Nick Child <nnac123@linux.ibm.com>
+
+[ Upstream commit 49bbeb5719c2f56907d3a9623b47c6c15c2c431d ]
+
+There are 2 types of outstanding tx skb's:
+Type 1: Packets that are sitting in the drivers ind_buff that are
+waiting to be batch sent to the NIC. During a device reset, these are
+freed with a call to ibmvnic_tx_scrq_clean_buffer()
+Type 2: Packets that have been sent to the NIC and are awaiting a TX
+completion IRQ. These are free'd during a reset with a call to
+clean_tx_pools()
+
+During any reset which requires us to free the tx irq, ensure that the
+Type 2 skb references are freed. Since the irq is released, it is
+impossible for the NIC to inform of any completions.
+
+Furthermore, later in the reset process is a call to init_tx_pools()
+which marks every entry in the tx pool as free (ie not outstanding).
+So if the driver is to make a call to init_tx_pools(), it must first
+be sure that the tx pool is empty of skb references.
+
+This issue was discovered by observing the following in the logs during
+EEH testing:
+       TX free map points to untracked skb (tso_pool 0 idx=4)
+       TX free map points to untracked skb (tso_pool 0 idx=5)
+       TX free map points to untracked skb (tso_pool 1 idx=36)
+
+Fixes: 65d6470d139a ("ibmvnic: clean pending indirect buffs during reset")
+Signed-off-by: Nick Child <nnac123@linux.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/ibm/ibmvnic.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
+index 157be4e9be4b7..8f377d0a80fe6 100644
+--- a/drivers/net/ethernet/ibm/ibmvnic.c
++++ b/drivers/net/ethernet/ibm/ibmvnic.c
+@@ -3859,6 +3859,12 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free)
+               adapter->num_active_tx_scrqs = 0;
+       }
++      /* Clean any remaining outstanding SKBs
++       * we freed the irq so we won't be hearing
++       * from them
++       */
++      clean_tx_pools(adapter);
++
+       if (adapter->rx_scrq) {
+               for (i = 0; i < adapter->num_active_rx_scrqs; i++) {
+                       if (!adapter->rx_scrq[i])
+-- 
+2.43.0
+
diff --git a/queue-6.1/mlxsw-spectrum_buffers-fix-memory-corruptions-on-spe.patch b/queue-6.1/mlxsw-spectrum_buffers-fix-memory-corruptions-on-spe.patch
new file mode 100644 (file)
index 0000000..7491c33
--- /dev/null
@@ -0,0 +1,161 @@
+From ee07c5f33cb90b9a6e64bbd02aec3ec02baed713 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jun 2024 09:19:14 +0200
+Subject: mlxsw: spectrum_buffers: Fix memory corruptions on Spectrum-4 systems
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+[ Upstream commit c28947de2bed40217cf256c5d0d16880054fcf13 ]
+
+The following two shared buffer operations make use of the Shared Buffer
+Status Register (SBSR):
+
+ # devlink sb occupancy snapshot pci/0000:01:00.0
+ # devlink sb occupancy clearmax pci/0000:01:00.0
+
+The register has two masks of 256 bits to denote on which ingress /
+egress ports the register should operate on. Spectrum-4 has more than
+256 ports, so the register was extended by cited commit with a new
+'port_page' field.
+
+However, when filling the register's payload, the driver specifies the
+ports as absolute numbers and not relative to the first port of the port
+page, resulting in memory corruptions [1].
+
+Fix by specifying the ports relative to the first port of the port page.
+
+[1]
+BUG: KASAN: slab-use-after-free in mlxsw_sp_sb_occ_snapshot+0xb6d/0xbc0
+Read of size 1 at addr ffff8881068cb00f by task devlink/1566
+[...]
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0xc6/0x120
+ print_report+0xce/0x670
+ kasan_report+0xd7/0x110
+ mlxsw_sp_sb_occ_snapshot+0xb6d/0xbc0
+ mlxsw_devlink_sb_occ_snapshot+0x75/0xb0
+ devlink_nl_sb_occ_snapshot_doit+0x1f9/0x2a0
+ genl_family_rcv_msg_doit+0x20c/0x300
+ genl_rcv_msg+0x567/0x800
+ netlink_rcv_skb+0x170/0x450
+ genl_rcv+0x2d/0x40
+ netlink_unicast+0x547/0x830
+ netlink_sendmsg+0x8d4/0xdb0
+ __sys_sendto+0x49b/0x510
+ __x64_sys_sendto+0xe5/0x1c0
+ do_syscall_64+0xc1/0x1d0
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+[...]
+Allocated by task 1:
+ kasan_save_stack+0x33/0x60
+ kasan_save_track+0x14/0x30
+ __kasan_kmalloc+0x8f/0xa0
+ copy_verifier_state+0xbc2/0xfb0
+ do_check_common+0x2c51/0xc7e0
+ bpf_check+0x5107/0x9960
+ bpf_prog_load+0xf0e/0x2690
+ __sys_bpf+0x1a61/0x49d0
+ __x64_sys_bpf+0x7d/0xc0
+ do_syscall_64+0xc1/0x1d0
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+Freed by task 1:
+ kasan_save_stack+0x33/0x60
+ kasan_save_track+0x14/0x30
+ kasan_save_free_info+0x3b/0x60
+ poison_slab_object+0x109/0x170
+ __kasan_slab_free+0x14/0x30
+ kfree+0xca/0x2b0
+ free_verifier_state+0xce/0x270
+ do_check_common+0x4828/0xc7e0
+ bpf_check+0x5107/0x9960
+ bpf_prog_load+0xf0e/0x2690
+ __sys_bpf+0x1a61/0x49d0
+ __x64_sys_bpf+0x7d/0xc0
+ do_syscall_64+0xc1/0x1d0
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+
+Fixes: f8538aec88b4 ("mlxsw: Add support for more than 256 ports in SBSR register")
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Petr Machata <petrm@nvidia.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: Petr Machata <petrm@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlxsw/spectrum_buffers.c         | 20 +++++++++++++------
+ 1 file changed, 14 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+index c9f1c79f3f9d0..ba090262e27ef 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+@@ -1607,8 +1607,8 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core,
+ int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core,
+                            unsigned int sb_index)
+ {
++      u16 local_port, local_port_1, first_local_port, last_local_port;
+       struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+-      u16 local_port, local_port_1, last_local_port;
+       struct mlxsw_sp_sb_sr_occ_query_cb_ctx cb_ctx;
+       u8 masked_count, current_page = 0;
+       unsigned long cb_priv = 0;
+@@ -1628,6 +1628,7 @@ int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core,
+       masked_count = 0;
+       mlxsw_reg_sbsr_pack(sbsr_pl, false);
+       mlxsw_reg_sbsr_port_page_set(sbsr_pl, current_page);
++      first_local_port = current_page * MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE;
+       last_local_port = current_page * MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE +
+                         MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE - 1;
+@@ -1645,9 +1646,12 @@ int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core,
+               if (local_port != MLXSW_PORT_CPU_PORT) {
+                       /* Ingress quotas are not supported for the CPU port */
+                       mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl,
+-                                                           local_port, 1);
++                                                           local_port - first_local_port,
++                                                           1);
+               }
+-              mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1);
++              mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl,
++                                                  local_port - first_local_port,
++                                                  1);
+               for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) {
+                       err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i,
+                                                      &bulk_list);
+@@ -1684,7 +1688,7 @@ int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core,
+                             unsigned int sb_index)
+ {
+       struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+-      u16 local_port, last_local_port;
++      u16 local_port, first_local_port, last_local_port;
+       LIST_HEAD(bulk_list);
+       unsigned int masked_count;
+       u8 current_page = 0;
+@@ -1702,6 +1706,7 @@ int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core,
+       masked_count = 0;
+       mlxsw_reg_sbsr_pack(sbsr_pl, true);
+       mlxsw_reg_sbsr_port_page_set(sbsr_pl, current_page);
++      first_local_port = current_page * MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE;
+       last_local_port = current_page * MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE +
+                         MLXSW_REG_SBSR_NUM_PORTS_IN_PAGE - 1;
+@@ -1719,9 +1724,12 @@ int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core,
+               if (local_port != MLXSW_PORT_CPU_PORT) {
+                       /* Ingress quotas are not supported for the CPU port */
+                       mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl,
+-                                                           local_port, 1);
++                                                           local_port - first_local_port,
++                                                           1);
+               }
+-              mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1);
++              mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl,
++                                                  local_port - first_local_port,
++                                                  1);
+               for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) {
+                       err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i,
+                                                      &bulk_list);
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-dsa-microchip-fix-initial-port-flush-problem.patch b/queue-6.1/net-dsa-microchip-fix-initial-port-flush-problem.patch
new file mode 100644 (file)
index 0000000..06fbc64
--- /dev/null
@@ -0,0 +1,49 @@
+From 3a856dfbe46ed84f832f3d411855ec1700d258f4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Jun 2024 17:16:42 -0700
+Subject: net: dsa: microchip: fix initial port flush problem
+
+From: Tristram Ha <tristram.ha@microchip.com>
+
+[ Upstream commit ad53f5f54f351e967128edbc431f0f26427172cf ]
+
+The very first flush in any port will flush all learned addresses in all
+ports.  This can be observed by unplugging the cable from one port while
+additional ports are connected and dumping the fdb entries.
+
+This problem is caused by the initially wrong value programmed to the
+REG_SW_LUE_CTRL_1 register.  Setting SW_FLUSH_STP_TABLE and
+SW_FLUSH_MSTP_TABLE bits does not have an immediate effect.  It is when
+ksz9477_flush_dyn_mac_table() is called then the SW_FLUSH_STP_TABLE bit
+takes effect and flushes all learned entries.  After that call both bits
+are reset and so the next port flush will not cause such problem again.
+
+Fixes: b987e98e50ab ("dsa: add DSA switch driver for Microchip KSZ9477")
+Signed-off-by: Tristram Ha <tristram.ha@microchip.com>
+Link: https://patch.msgid.link/1718756202-2731-1-git-send-email-Tristram.Ha@microchip.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/microchip/ksz9477.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
+index a736971470534..9181be2d8abb2 100644
+--- a/drivers/net/dsa/microchip/ksz9477.c
++++ b/drivers/net/dsa/microchip/ksz9477.c
+@@ -183,10 +183,8 @@ int ksz9477_reset_switch(struct ksz_device *dev)
+                          SPI_AUTO_EDGE_DETECTION, 0);
+       /* default configuration */
+-      ksz_read8(dev, REG_SW_LUE_CTRL_1, &data8);
+-      data8 = SW_AGING_ENABLE | SW_LINK_AUTO_AGING |
+-            SW_SRC_ADDR_FILTER | SW_FLUSH_STP_TABLE | SW_FLUSH_MSTP_TABLE;
+-      ksz_write8(dev, REG_SW_LUE_CTRL_1, data8);
++      ksz_write8(dev, REG_SW_LUE_CTRL_1,
++                 SW_AGING_ENABLE | SW_LINK_AUTO_AGING | SW_SRC_ADDR_FILTER);
+       /* disable interrupts */
+       ksz_write32(dev, REG_SW_INT_MASK__4, SWITCH_INT_MASK);
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-dsa-microchip-fix-wrong-register-write-when-mask.patch b/queue-6.1/net-dsa-microchip-fix-wrong-register-write-when-mask.patch
new file mode 100644 (file)
index 0000000..3b053ed
--- /dev/null
@@ -0,0 +1,53 @@
+From 989c64e4d0dcc911e767fc653461cb8ead06721a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jun 2024 15:34:22 -0700
+Subject: net: dsa: microchip: fix wrong register write when masking interrupt
+
+From: Tristram Ha <tristram.ha@microchip.com>
+
+[ Upstream commit b1c4b4d45263241ec6c2405a8df8265d4b58e707 ]
+
+The switch global port interrupt mask, REG_SW_PORT_INT_MASK__4, is
+defined as 0x001C in ksz9477_reg.h.  The designers used 32-bit value in
+anticipation for increase of port count in future product but currently
+the maximum port count is 7 and the effective value is 0x7F in register
+0x001F.  Each port has its own interrupt mask and is defined as 0x#01F.
+It uses only 4 bits for different interrupts.
+
+The developer who implemented the current interrupt mechanism in the
+switch driver noticed there are similarities between the mechanism to
+mask port interrupts in global interrupt and individual interrupts in
+each port and so used the same code to handle these interrupts.  He
+updated the code to use the new macro REG_SW_PORT_INT_MASK__1 which is
+defined as 0x1F in ksz_common.h but he forgot to update the 32-bit write
+to 8-bit as now the mask registers are 0x1F and 0x#01F.
+
+In addition all KSZ switches other than the KSZ9897/KSZ9893 and LAN937X
+families use only 8-bit access and so this common code will eventually
+be changed to accommodate them.
+
+Fixes: e1add7dd6183 ("net: dsa: microchip: use common irq routines for girq and pirq")
+Signed-off-by: Tristram Ha <tristram.ha@microchip.com>
+Link: https://lore.kernel.org/r/1719009262-2948-1-git-send-email-Tristram.Ha@microchip.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/microchip/ksz_common.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
+index f9f43897f86c1..9dbe188f09c3c 100644
+--- a/drivers/net/dsa/microchip/ksz_common.c
++++ b/drivers/net/dsa/microchip/ksz_common.c
+@@ -1790,7 +1790,7 @@ static void ksz_irq_bus_sync_unlock(struct irq_data *d)
+       struct ksz_device *dev = kirq->dev;
+       int ret;
+-      ret = ksz_write32(dev, kirq->reg_mask, kirq->masked);
++      ret = ksz_write8(dev, kirq->reg_mask, kirq->masked);
+       if (ret)
+               dev_err(dev->dev, "failed to change IRQ mask\n");
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-dsa-microchip-use-collision-based-back-pressure-.patch b/queue-6.1/net-dsa-microchip-use-collision-based-back-pressure-.patch
new file mode 100644 (file)
index 0000000..d6707b4
--- /dev/null
@@ -0,0 +1,55 @@
+From 311d00a2eb2f1dcd18654788e857f2787f34ad26 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jun 2024 16:43:21 +0200
+Subject: net: dsa: microchip: use collision based back pressure mode
+
+From: Enguerrand de Ribaucourt <enguerrand.de-ribaucourt@savoirfairelinux.com>
+
+[ Upstream commit d963c95bc9840d070a788c35e41b715a648717f7 ]
+
+Errata DS80000758 states that carrier sense back pressure mode can cause
+link down issues in 100BASE-TX half duplex mode. The datasheet also
+recommends to always use the collision based back pressure mode.
+
+Fixes: b987e98e50ab ("dsa: add DSA switch driver for Microchip KSZ9477")
+Signed-off-by: Enguerrand de Ribaucourt <enguerrand.de-ribaucourt@savoirfairelinux.com>
+Reviewed-by: Woojung Huh <Woojung.huh@microchip.com>
+Acked-by: Arun Ramadoss <arun.ramadoss@microchip.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/microchip/ksz9477.c     | 4 ++++
+ drivers/net/dsa/microchip/ksz9477_reg.h | 1 +
+ 2 files changed, 5 insertions(+)
+
+diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
+index 9181be2d8abb2..e9fa92a833227 100644
+--- a/drivers/net/dsa/microchip/ksz9477.c
++++ b/drivers/net/dsa/microchip/ksz9477.c
+@@ -1150,6 +1150,10 @@ int ksz9477_setup(struct dsa_switch *ds)
+       /* Enable REG_SW_MTU__2 reg by setting SW_JUMBO_PACKET */
+       ksz_cfg(dev, REG_SW_MAC_CTRL_1, SW_JUMBO_PACKET, true);
++      /* Use collision based back pressure mode. */
++      ksz_cfg(dev, REG_SW_MAC_CTRL_1, SW_BACK_PRESSURE,
++              SW_BACK_PRESSURE_COLLISION);
++
+       /* Now we can configure default MTU value */
+       ret = regmap_update_bits(dev->regmap[1], REG_SW_MTU__2, REG_SW_MTU_MASK,
+                                VLAN_ETH_FRAME_LEN + ETH_FCS_LEN);
+diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h
+index 53c68d286dd3a..04086e9ab0a0f 100644
+--- a/drivers/net/dsa/microchip/ksz9477_reg.h
++++ b/drivers/net/dsa/microchip/ksz9477_reg.h
+@@ -267,6 +267,7 @@
+ #define REG_SW_MAC_CTRL_1             0x0331
+ #define SW_BACK_PRESSURE              BIT(5)
++#define SW_BACK_PRESSURE_COLLISION    0
+ #define FAIR_FLOW_CTRL                        BIT(4)
+ #define NO_EXC_COLLISION_DROP         BIT(3)
+ #define SW_JUMBO_PACKET                       BIT(2)
+-- 
+2.43.0
+
diff --git a/queue-6.1/net-phy-micrel-add-microchip-ksz-9477-to-the-device-.patch b/queue-6.1/net-phy-micrel-add-microchip-ksz-9477-to-the-device-.patch
new file mode 100644 (file)
index 0000000..c98f1b0
--- /dev/null
@@ -0,0 +1,36 @@
+From 220e6b2f4191ae15ffb414abab249a142c6fd079 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jun 2024 16:43:20 +0200
+Subject: net: phy: micrel: add Microchip KSZ 9477 to the device table
+
+From: Enguerrand de Ribaucourt <enguerrand.de-ribaucourt@savoirfairelinux.com>
+
+[ Upstream commit 54a4e5c16382e871c01dd82b47e930fdce30406b ]
+
+PHY_ID_KSZ9477 was supported but not added to the device table passed to
+MODULE_DEVICE_TABLE.
+
+Fixes: fc3973a1fa09 ("phy: micrel: add Microchip KSZ 9477 Switch PHY support")
+Signed-off-by: Enguerrand de Ribaucourt <enguerrand.de-ribaucourt@savoirfairelinux.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/micrel.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
+index 98c6d0caf8faf..90f3953cf9066 100644
+--- a/drivers/net/phy/micrel.c
++++ b/drivers/net/phy/micrel.c
+@@ -3405,6 +3405,7 @@ static struct mdio_device_id __maybe_unused micrel_tbl[] = {
+       { PHY_ID_KSZ8081, MICREL_PHY_ID_MASK },
+       { PHY_ID_KSZ8873MLL, MICREL_PHY_ID_MASK },
+       { PHY_ID_KSZ886X, MICREL_PHY_ID_MASK },
++      { PHY_ID_KSZ9477, MICREL_PHY_ID_MASK },
+       { PHY_ID_LAN8814, MICREL_PHY_ID_MASK },
+       { PHY_ID_LAN8804, MICREL_PHY_ID_MASK },
+       { }
+-- 
+2.43.0
+
diff --git a/queue-6.1/netfilter-nf_tables-fully-validate-nft_data_value-on.patch b/queue-6.1/netfilter-nf_tables-fully-validate-nft_data_value-on.patch
new file mode 100644 (file)
index 0000000..fccf063
--- /dev/null
@@ -0,0 +1,92 @@
+From 2d83807192d1e26bfa3e7c9400d88cde12ba33bd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Jun 2024 23:15:38 +0200
+Subject: netfilter: nf_tables: fully validate NFT_DATA_VALUE on store to data
+ registers
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 7931d32955e09d0a11b1fe0b6aac1bfa061c005c ]
+
+register store validation for NFT_DATA_VALUE is conditional, however,
+the datatype is always either NFT_DATA_VALUE or NFT_DATA_VERDICT. This
+only requires a new helper function to infer the register type from the
+set datatype so this conditional check can be removed. Otherwise,
+pointer to chain object can be leaked through the registers.
+
+Fixes: 96518518cc41 ("netfilter: add nftables")
+Reported-by: Linus Torvalds <torvalds@linuxfoundation.org>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_tables.h | 5 +++++
+ net/netfilter/nf_tables_api.c     | 8 ++++----
+ net/netfilter/nft_lookup.c        | 3 ++-
+ 3 files changed, 11 insertions(+), 5 deletions(-)
+
+diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
+index 964cf7578bd50..9a80d0251d8f3 100644
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -582,6 +582,11 @@ static inline void *nft_set_priv(const struct nft_set *set)
+       return (void *)set->data;
+ }
++static inline enum nft_data_types nft_set_datatype(const struct nft_set *set)
++{
++      return set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE;
++}
++
+ static inline bool nft_set_gc_is_pending(const struct nft_set *s)
+ {
+       return refcount_read(&s->refs) != 1;
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index e838a6617b0aa..97ea72d31bd35 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -5398,8 +5398,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
+       if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
+           nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext),
+-                        set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE,
+-                        set->dlen) < 0)
++                        nft_set_datatype(set), set->dlen) < 0)
+               goto nla_put_failure;
+       if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS) &&
+@@ -10448,6 +10447,9 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
+               return 0;
+       default:
++              if (type != NFT_DATA_VALUE)
++                      return -EINVAL;
++
+               if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
+                       return -EINVAL;
+               if (len == 0)
+@@ -10456,8 +10458,6 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
+                   sizeof_field(struct nft_regs, data))
+                       return -ERANGE;
+-              if (data != NULL && type != NFT_DATA_VALUE)
+-                      return -EINVAL;
+               return 0;
+       }
+ }
+diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
+index 68a5dea805480..33daee2e54c5c 100644
+--- a/net/netfilter/nft_lookup.c
++++ b/net/netfilter/nft_lookup.c
+@@ -136,7 +136,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
+                       return -EINVAL;
+               err = nft_parse_register_store(ctx, tb[NFTA_LOOKUP_DREG],
+-                                             &priv->dreg, NULL, set->dtype,
++                                             &priv->dreg, NULL,
++                                             nft_set_datatype(set),
+                                              set->dlen);
+               if (err < 0)
+                       return err;
+-- 
+2.43.0
+
diff --git a/queue-6.1/parisc-use-correct-compat-recv-recvfrom-syscalls.patch b/queue-6.1/parisc-use-correct-compat-recv-recvfrom-syscalls.patch
new file mode 100644 (file)
index 0000000..9f5ae1e
--- /dev/null
@@ -0,0 +1,48 @@
+From 10bbf4b48abd737f6f0fcc2df268846385cddf68 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jun 2024 14:27:55 +0200
+Subject: parisc: use correct compat recv/recvfrom syscalls
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+[ Upstream commit 20a50787349fadf66ac5c48f62e58d753878d2bb ]
+
+Johannes missed parisc back when he introduced the compat version
+of these syscalls, so receiving cmsg messages that require a compat
+conversion is still broken.
+
+Use the correct calls like the other architectures do.
+
+Fixes: 1dacc76d0014 ("net/compat/wext: send different messages to compat tasks")
+Acked-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/parisc/kernel/syscalls/syscall.tbl | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
+index 0e42fceb2d5e2..ba4884eaa5057 100644
+--- a/arch/parisc/kernel/syscalls/syscall.tbl
++++ b/arch/parisc/kernel/syscalls/syscall.tbl
+@@ -108,7 +108,7 @@
+ 95    common  fchown                  sys_fchown
+ 96    common  getpriority             sys_getpriority
+ 97    common  setpriority             sys_setpriority
+-98    common  recv                    sys_recv
++98    common  recv                    sys_recv                        compat_sys_recv
+ 99    common  statfs                  sys_statfs                      compat_sys_statfs
+ 100   common  fstatfs                 sys_fstatfs                     compat_sys_fstatfs
+ 101   common  stat64                  sys_stat64
+@@ -135,7 +135,7 @@
+ 120   common  clone                   sys_clone_wrapper
+ 121   common  setdomainname           sys_setdomainname
+ 122   common  sendfile                sys_sendfile                    compat_sys_sendfile
+-123   common  recvfrom                sys_recvfrom
++123   common  recvfrom                sys_recvfrom                    compat_sys_recvfrom
+ 124   32      adjtimex                sys_adjtimex_time32
+ 124   64      adjtimex                sys_adjtimex
+ 125   common  mprotect                sys_mprotect
+-- 
+2.43.0
+
diff --git a/queue-6.1/powerpc-restore-some-missing-spu-syscalls.patch b/queue-6.1/powerpc-restore-some-missing-spu-syscalls.patch
new file mode 100644 (file)
index 0000000..b24e6e0
--- /dev/null
@@ -0,0 +1,54 @@
+From c360daeb045c97166c098aefb5037d5ec3151dc5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Apr 2024 16:36:13 +0200
+Subject: powerpc: restore some missing spu syscalls
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+[ Upstream commit b1e31c134a8ab2e8f5fd62323b6b45a950ac704d ]
+
+A couple of system calls were inadventently removed from the table during
+a bugfix for 32-bit powerpc entry. Restore the original behavior.
+
+Fixes: e23750623835 ("powerpc/32: fix syscall wrappers with 64-bit arguments of unaligned register-pairs")
+Acked-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/kernel/syscalls/syscall.tbl | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
+index a0be127475b1f..df585d804790e 100644
+--- a/arch/powerpc/kernel/syscalls/syscall.tbl
++++ b/arch/powerpc/kernel/syscalls/syscall.tbl
+@@ -230,8 +230,10 @@
+ 178   nospu   rt_sigsuspend                   sys_rt_sigsuspend               compat_sys_rt_sigsuspend
+ 179   32      pread64                         sys_ppc_pread64                 compat_sys_ppc_pread64
+ 179   64      pread64                         sys_pread64
++179   spu     pread64                         sys_pread64
+ 180   32      pwrite64                        sys_ppc_pwrite64                compat_sys_ppc_pwrite64
+ 180   64      pwrite64                        sys_pwrite64
++180   spu     pwrite64                        sys_pwrite64
+ 181   common  chown                           sys_chown
+ 182   common  getcwd                          sys_getcwd
+ 183   common  capget                          sys_capget
+@@ -246,6 +248,7 @@
+ 190   common  ugetrlimit                      sys_getrlimit                   compat_sys_getrlimit
+ 191   32      readahead                       sys_ppc_readahead               compat_sys_ppc_readahead
+ 191   64      readahead                       sys_readahead
++191   spu     readahead                       sys_readahead
+ 192   32      mmap2                           sys_mmap2                       compat_sys_mmap2
+ 193   32      truncate64                      sys_ppc_truncate64              compat_sys_ppc_truncate64
+ 194   32      ftruncate64                     sys_ppc_ftruncate64             compat_sys_ppc_ftruncate64
+@@ -293,6 +296,7 @@
+ 232   nospu   set_tid_address                 sys_set_tid_address
+ 233   32      fadvise64                       sys_ppc32_fadvise64             compat_sys_ppc32_fadvise64
+ 233   64      fadvise64                       sys_fadvise64
++233   spu     fadvise64                       sys_fadvise64
+ 234   nospu   exit_group                      sys_exit_group
+ 235   nospu   lookup_dcookie                  sys_lookup_dcookie              compat_sys_lookup_dcookie
+ 236   common  epoll_create                    sys_epoll_create
+-- 
+2.43.0
+
diff --git a/queue-6.1/s390-pci-add-missing-virt_to_phys-for-directed-dibv.patch b/queue-6.1/s390-pci-add-missing-virt_to_phys-for-directed-dibv.patch
new file mode 100644 (file)
index 0000000..be8b55f
--- /dev/null
@@ -0,0 +1,39 @@
+From ebf28da8654d35da148d8da874dadf54c8bc5bd2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Jun 2024 14:06:31 +0200
+Subject: s390/pci: Add missing virt_to_phys() for directed DIBV
+
+From: Niklas Schnelle <schnelle@linux.ibm.com>
+
+[ Upstream commit 4181b51c38875de9f6f11248fa0bcf3246c19c82 ]
+
+In commit 4e4dc65ab578 ("s390/pci: use phys_to_virt() for AIBVs/DIBVs")
+the setting of dibv_addr was missed when adding virt_to_phys(). This
+only affects systems with directed interrupt delivery enabled which are
+not generally available.
+
+Fixes: 4e4dc65ab578 ("s390/pci: use phys_to_virt() for AIBVs/DIBVs")
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/pci/pci_irq.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
+index a2b42a63a53ba..04c19ab93a329 100644
+--- a/arch/s390/pci/pci_irq.c
++++ b/arch/s390/pci/pci_irq.c
+@@ -410,7 +410,7 @@ static void __init cpu_enable_directed_irq(void *unused)
+       union zpci_sic_iib iib = {{0}};
+       union zpci_sic_iib ziib = {{0}};
+-      iib.cdiib.dibv_addr = (u64) zpci_ibv[smp_processor_id()]->vector;
++      iib.cdiib.dibv_addr = virt_to_phys(zpci_ibv[smp_processor_id()]->vector);
+       zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
+       zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &ziib);
+-- 
+2.43.0
+
index a11b5403bcb43d9124841cc9ba3fe6e804841846..1d92000aeb940112562cf242a7906ec8012e6ad6 100644 (file)
@@ -14,3 +14,23 @@ mips-pci-lantiq-restore-reset-gpio-polarity.patch
 dt-bindings-i2c-drop-unneeded-quotes.patch
 dt-bindings-i2c-atmel-at91sam-correct-path-to-i2c-co.patch
 netfilter-nf_tables-use-timestamp-to-check-for-set-e.patch
+asoc-rockchip-i2s-tdm-fix-trcm-mode-by-setting-clock.patch
+s390-pci-add-missing-virt_to_phys-for-directed-dibv.patch
+asoc-amd-acp-remove-i2s-configuration-check-in-acp_i.patch
+asoc-fsl-asoc-card-set-priv-pdev-before-using-it.patch
+net-dsa-microchip-fix-initial-port-flush-problem.patch
+mlxsw-spectrum_buffers-fix-memory-corruptions-on-spe.patch
+bpf-fix-overrunning-reservations-in-ringbuf.patch
+ibmvnic-free-any-outstanding-tx-skbs-during-scrq-res.patch
+net-phy-micrel-add-microchip-ksz-9477-to-the-device-.patch
+net-dsa-microchip-use-collision-based-back-pressure-.patch
+xdp-remove-warn-from-__xdp_reg_mem_model.patch
+fix-race-for-duplicate-reqsk-on-identical-syn.patch
+net-dsa-microchip-fix-wrong-register-write-when-mask.patch
+sparc-fix-old-compat_sys_select.patch
+sparc-fix-compat-recv-recvfrom-syscalls.patch
+parisc-use-correct-compat-recv-recvfrom-syscalls.patch
+powerpc-restore-some-missing-spu-syscalls.patch
+tcp-fix-tcp_rcv_fastopen_synack-to-enter-tcp_ca_loss.patch
+netfilter-nf_tables-fully-validate-nft_data_value-on.patch
+tracing-net_sched-null-pointer-dereference-in-perf_t.patch
diff --git a/queue-6.1/sparc-fix-compat-recv-recvfrom-syscalls.patch b/queue-6.1/sparc-fix-compat-recv-recvfrom-syscalls.patch
new file mode 100644 (file)
index 0000000..44490ea
--- /dev/null
@@ -0,0 +1,279 @@
+From 9d8de821bf3d74f0fabe86f42d5672bf308fef8a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jun 2024 12:49:39 +0200
+Subject: sparc: fix compat recv/recvfrom syscalls
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+[ Upstream commit d6fbd26fb872ec518d25433a12e8ce8163e20909 ]
+
+sparc has the wrong compat version of recv() and recvfrom() for both the
+direct syscalls and socketcall().
+
+The direct syscalls just need to use the compat version. For socketcall,
+the same thing could be done, but it seems better to completely remove
+the custom assembler code for it and just use the same implementation that
+everyone else has.
+
+Fixes: 1dacc76d0014 ("net/compat/wext: send different messages to compat tasks")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/sparc/kernel/sys32.S              | 221 -------------------------
+ arch/sparc/kernel/syscalls/syscall.tbl |   4 +-
+ 2 files changed, 2 insertions(+), 223 deletions(-)
+
+diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
+index a45f0f31fe51a..a3d308f2043e5 100644
+--- a/arch/sparc/kernel/sys32.S
++++ b/arch/sparc/kernel/sys32.S
+@@ -18,224 +18,3 @@ sys32_mmap2:
+       sethi           %hi(sys_mmap), %g1
+       jmpl            %g1 + %lo(sys_mmap), %g0
+        sllx           %o5, 12, %o5
+-
+-      .align          32
+-      .globl          sys32_socketcall
+-sys32_socketcall:     /* %o0=call, %o1=args */
+-      cmp             %o0, 1
+-      bl,pn           %xcc, do_einval
+-       cmp            %o0, 18
+-      bg,pn           %xcc, do_einval
+-       sub            %o0, 1, %o0
+-      sllx            %o0, 5, %o0
+-      sethi           %hi(__socketcall_table_begin), %g2
+-      or              %g2, %lo(__socketcall_table_begin), %g2
+-      jmpl            %g2 + %o0, %g0
+-       nop
+-do_einval:
+-      retl
+-       mov            -EINVAL, %o0
+-
+-      .align          32
+-__socketcall_table_begin:
+-
+-      /* Each entry is exactly 32 bytes. */
+-do_sys_socket: /* sys_socket(int, int, int) */
+-1:    ldswa           [%o1 + 0x0] %asi, %o0
+-      sethi           %hi(sys_socket), %g1
+-2:    ldswa           [%o1 + 0x8] %asi, %o2
+-      jmpl            %g1 + %lo(sys_socket), %g0
+-3:     ldswa          [%o1 + 0x4] %asi, %o1
+-      nop
+-      nop
+-      nop
+-do_sys_bind: /* sys_bind(int fd, struct sockaddr *, int) */
+-4:    ldswa           [%o1 + 0x0] %asi, %o0
+-      sethi           %hi(sys_bind), %g1
+-5:    ldswa           [%o1 + 0x8] %asi, %o2
+-      jmpl            %g1 + %lo(sys_bind), %g0
+-6:     lduwa          [%o1 + 0x4] %asi, %o1
+-      nop
+-      nop
+-      nop
+-do_sys_connect: /* sys_connect(int, struct sockaddr *, int) */
+-7:    ldswa           [%o1 + 0x0] %asi, %o0
+-      sethi           %hi(sys_connect), %g1
+-8:    ldswa           [%o1 + 0x8] %asi, %o2
+-      jmpl            %g1 + %lo(sys_connect), %g0
+-9:     lduwa          [%o1 + 0x4] %asi, %o1
+-      nop
+-      nop
+-      nop
+-do_sys_listen: /* sys_listen(int, int) */
+-10:   ldswa           [%o1 + 0x0] %asi, %o0
+-      sethi           %hi(sys_listen), %g1
+-      jmpl            %g1 + %lo(sys_listen), %g0
+-11:    ldswa          [%o1 + 0x4] %asi, %o1
+-      nop
+-      nop
+-      nop
+-      nop
+-do_sys_accept: /* sys_accept(int, struct sockaddr *, int *) */
+-12:   ldswa           [%o1 + 0x0] %asi, %o0
+-      sethi           %hi(sys_accept), %g1
+-13:   lduwa           [%o1 + 0x8] %asi, %o2
+-      jmpl            %g1 + %lo(sys_accept), %g0
+-14:    lduwa          [%o1 + 0x4] %asi, %o1
+-      nop
+-      nop
+-      nop
+-do_sys_getsockname: /* sys_getsockname(int, struct sockaddr *, int *) */
+-15:   ldswa           [%o1 + 0x0] %asi, %o0
+-      sethi           %hi(sys_getsockname), %g1
+-16:   lduwa           [%o1 + 0x8] %asi, %o2
+-      jmpl            %g1 + %lo(sys_getsockname), %g0
+-17:    lduwa          [%o1 + 0x4] %asi, %o1
+-      nop
+-      nop
+-      nop
+-do_sys_getpeername: /* sys_getpeername(int, struct sockaddr *, int *) */
+-18:   ldswa           [%o1 + 0x0] %asi, %o0
+-      sethi           %hi(sys_getpeername), %g1
+-19:   lduwa           [%o1 + 0x8] %asi, %o2
+-      jmpl            %g1 + %lo(sys_getpeername), %g0
+-20:    lduwa          [%o1 + 0x4] %asi, %o1
+-      nop
+-      nop
+-      nop
+-do_sys_socketpair: /* sys_socketpair(int, int, int, int *) */
+-21:   ldswa           [%o1 + 0x0] %asi, %o0
+-      sethi           %hi(sys_socketpair), %g1
+-22:   ldswa           [%o1 + 0x8] %asi, %o2
+-23:   lduwa           [%o1 + 0xc] %asi, %o3
+-      jmpl            %g1 + %lo(sys_socketpair), %g0
+-24:    ldswa          [%o1 + 0x4] %asi, %o1
+-      nop
+-      nop
+-do_sys_send: /* sys_send(int, void *, size_t, unsigned int) */
+-25:   ldswa           [%o1 + 0x0] %asi, %o0
+-      sethi           %hi(sys_send), %g1
+-26:   lduwa           [%o1 + 0x8] %asi, %o2
+-27:   lduwa           [%o1 + 0xc] %asi, %o3
+-      jmpl            %g1 + %lo(sys_send), %g0
+-28:    lduwa          [%o1 + 0x4] %asi, %o1
+-      nop
+-      nop
+-do_sys_recv: /* sys_recv(int, void *, size_t, unsigned int) */
+-29:   ldswa           [%o1 + 0x0] %asi, %o0
+-      sethi           %hi(sys_recv), %g1
+-30:   lduwa           [%o1 + 0x8] %asi, %o2
+-31:   lduwa           [%o1 + 0xc] %asi, %o3
+-      jmpl            %g1 + %lo(sys_recv), %g0
+-32:    lduwa          [%o1 + 0x4] %asi, %o1
+-      nop
+-      nop
+-do_sys_sendto: /* sys_sendto(int, u32, compat_size_t, unsigned int, u32, int) */
+-33:   ldswa           [%o1 + 0x0] %asi, %o0
+-      sethi           %hi(sys_sendto), %g1
+-34:   lduwa           [%o1 + 0x8] %asi, %o2
+-35:   lduwa           [%o1 + 0xc] %asi, %o3
+-36:   lduwa           [%o1 + 0x10] %asi, %o4
+-37:   ldswa           [%o1 + 0x14] %asi, %o5
+-      jmpl            %g1 + %lo(sys_sendto), %g0
+-38:    lduwa          [%o1 + 0x4] %asi, %o1
+-do_sys_recvfrom: /* sys_recvfrom(int, u32, compat_size_t, unsigned int, u32, u32) */
+-39:   ldswa           [%o1 + 0x0] %asi, %o0
+-      sethi           %hi(sys_recvfrom), %g1
+-40:   lduwa           [%o1 + 0x8] %asi, %o2
+-41:   lduwa           [%o1 + 0xc] %asi, %o3
+-42:   lduwa           [%o1 + 0x10] %asi, %o4
+-43:   lduwa           [%o1 + 0x14] %asi, %o5
+-      jmpl            %g1 + %lo(sys_recvfrom), %g0
+-44:    lduwa          [%o1 + 0x4] %asi, %o1
+-do_sys_shutdown: /* sys_shutdown(int, int) */
+-45:   ldswa           [%o1 + 0x0] %asi, %o0
+-      sethi           %hi(sys_shutdown), %g1
+-      jmpl            %g1 + %lo(sys_shutdown), %g0
+-46:    ldswa          [%o1 + 0x4] %asi, %o1
+-      nop
+-      nop
+-      nop
+-      nop
+-do_sys_setsockopt: /* sys_setsockopt(int, int, int, char *, int) */
+-47:   ldswa           [%o1 + 0x0] %asi, %o0
+-      sethi           %hi(sys_setsockopt), %g1
+-48:   ldswa           [%o1 + 0x8] %asi, %o2
+-49:   lduwa           [%o1 + 0xc] %asi, %o3
+-50:   ldswa           [%o1 + 0x10] %asi, %o4
+-      jmpl            %g1 + %lo(sys_setsockopt), %g0
+-51:    ldswa          [%o1 + 0x4] %asi, %o1
+-      nop
+-do_sys_getsockopt: /* sys_getsockopt(int, int, int, u32, u32) */
+-52:   ldswa           [%o1 + 0x0] %asi, %o0
+-      sethi           %hi(sys_getsockopt), %g1
+-53:   ldswa           [%o1 + 0x8] %asi, %o2
+-54:   lduwa           [%o1 + 0xc] %asi, %o3
+-55:   lduwa           [%o1 + 0x10] %asi, %o4
+-      jmpl            %g1 + %lo(sys_getsockopt), %g0
+-56:    ldswa          [%o1 + 0x4] %asi, %o1
+-      nop
+-do_sys_sendmsg: /* compat_sys_sendmsg(int, struct compat_msghdr *, unsigned int) */
+-57:   ldswa           [%o1 + 0x0] %asi, %o0
+-      sethi           %hi(compat_sys_sendmsg), %g1
+-58:   lduwa           [%o1 + 0x8] %asi, %o2
+-      jmpl            %g1 + %lo(compat_sys_sendmsg), %g0
+-59:    lduwa          [%o1 + 0x4] %asi, %o1
+-      nop
+-      nop
+-      nop
+-do_sys_recvmsg: /* compat_sys_recvmsg(int, struct compat_msghdr *, unsigned int) */
+-60:   ldswa           [%o1 + 0x0] %asi, %o0
+-      sethi           %hi(compat_sys_recvmsg), %g1
+-61:   lduwa           [%o1 + 0x8] %asi, %o2
+-      jmpl            %g1 + %lo(compat_sys_recvmsg), %g0
+-62:    lduwa          [%o1 + 0x4] %asi, %o1
+-      nop
+-      nop
+-      nop
+-do_sys_accept4: /* sys_accept4(int, struct sockaddr *, int *, int) */
+-63:   ldswa           [%o1 + 0x0] %asi, %o0
+-      sethi           %hi(sys_accept4), %g1
+-64:   lduwa           [%o1 + 0x8] %asi, %o2
+-65:   ldswa           [%o1 + 0xc] %asi, %o3
+-      jmpl            %g1 + %lo(sys_accept4), %g0
+-66:    lduwa          [%o1 + 0x4] %asi, %o1
+-      nop
+-      nop
+-
+-      .section        __ex_table,"a"
+-      .align          4
+-      .word           1b, __retl_efault, 2b, __retl_efault
+-      .word           3b, __retl_efault, 4b, __retl_efault
+-      .word           5b, __retl_efault, 6b, __retl_efault
+-      .word           7b, __retl_efault, 8b, __retl_efault
+-      .word           9b, __retl_efault, 10b, __retl_efault
+-      .word           11b, __retl_efault, 12b, __retl_efault
+-      .word           13b, __retl_efault, 14b, __retl_efault
+-      .word           15b, __retl_efault, 16b, __retl_efault
+-      .word           17b, __retl_efault, 18b, __retl_efault
+-      .word           19b, __retl_efault, 20b, __retl_efault
+-      .word           21b, __retl_efault, 22b, __retl_efault
+-      .word           23b, __retl_efault, 24b, __retl_efault
+-      .word           25b, __retl_efault, 26b, __retl_efault
+-      .word           27b, __retl_efault, 28b, __retl_efault
+-      .word           29b, __retl_efault, 30b, __retl_efault
+-      .word           31b, __retl_efault, 32b, __retl_efault
+-      .word           33b, __retl_efault, 34b, __retl_efault
+-      .word           35b, __retl_efault, 36b, __retl_efault
+-      .word           37b, __retl_efault, 38b, __retl_efault
+-      .word           39b, __retl_efault, 40b, __retl_efault
+-      .word           41b, __retl_efault, 42b, __retl_efault
+-      .word           43b, __retl_efault, 44b, __retl_efault
+-      .word           45b, __retl_efault, 46b, __retl_efault
+-      .word           47b, __retl_efault, 48b, __retl_efault
+-      .word           49b, __retl_efault, 50b, __retl_efault
+-      .word           51b, __retl_efault, 52b, __retl_efault
+-      .word           53b, __retl_efault, 54b, __retl_efault
+-      .word           55b, __retl_efault, 56b, __retl_efault
+-      .word           57b, __retl_efault, 58b, __retl_efault
+-      .word           59b, __retl_efault, 60b, __retl_efault
+-      .word           61b, __retl_efault, 62b, __retl_efault
+-      .word           63b, __retl_efault, 64b, __retl_efault
+-      .word           65b, __retl_efault, 66b, __retl_efault
+-      .previous
+diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
+index 5119e9609903c..5399fab3ce631 100644
+--- a/arch/sparc/kernel/syscalls/syscall.tbl
++++ b/arch/sparc/kernel/syscalls/syscall.tbl
+@@ -155,7 +155,7 @@
+ 123   32      fchown                  sys_fchown16
+ 123   64      fchown                  sys_fchown
+ 124   common  fchmod                  sys_fchmod
+-125   common  recvfrom                sys_recvfrom
++125   common  recvfrom                sys_recvfrom                    compat_sys_recvfrom
+ 126   32      setreuid                sys_setreuid16
+ 126   64      setreuid                sys_setreuid
+ 127   32      setregid                sys_setregid16
+@@ -247,7 +247,7 @@
+ 204   32      readdir                 sys_old_readdir                 compat_sys_old_readdir
+ 204   64      readdir                 sys_nis_syscall
+ 205   common  readahead               sys_readahead                   compat_sys_readahead
+-206   common  socketcall              sys_socketcall                  sys32_socketcall
++206   common  socketcall              sys_socketcall                  compat_sys_socketcall
+ 207   common  syslog                  sys_syslog
+ 208   common  lookup_dcookie          sys_lookup_dcookie              compat_sys_lookup_dcookie
+ 209   common  fadvise64               sys_fadvise64                   compat_sys_fadvise64
+-- 
+2.43.0
+
diff --git a/queue-6.1/sparc-fix-old-compat_sys_select.patch b/queue-6.1/sparc-fix-old-compat_sys_select.patch
new file mode 100644 (file)
index 0000000..9084e4b
--- /dev/null
@@ -0,0 +1,39 @@
+From 4cb1f2710a268a45b0a3c2012010cb43bba4d05c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jun 2024 14:07:30 +0200
+Subject: sparc: fix old compat_sys_select()
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+[ Upstream commit bae6428a9fffb2023191b0723e276cf1377a7c9f ]
+
+sparc has two identical select syscalls at numbers 93 and 230, respectively.
+During the conversion to the modern syscall.tbl format, the older one of the
+two broke in compat mode, and now refers to the native 64-bit syscall.
+
+Restore the correct behavior. This has very little effect, as glibc has
+been using the newer number anyway.
+
+Fixes: 6ff645dd683a ("sparc: add system call table generation support")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/sparc/kernel/syscalls/syscall.tbl | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
+index 4398cc6fb68dd..5119e9609903c 100644
+--- a/arch/sparc/kernel/syscalls/syscall.tbl
++++ b/arch/sparc/kernel/syscalls/syscall.tbl
+@@ -117,7 +117,7 @@
+ 90    common  dup2                    sys_dup2
+ 91    32      setfsuid32              sys_setfsuid
+ 92    common  fcntl                   sys_fcntl                       compat_sys_fcntl
+-93    common  select                  sys_select
++93    common  select                  sys_select                      compat_sys_select
+ 94    32      setfsgid32              sys_setfsgid
+ 95    common  fsync                   sys_fsync
+ 96    common  setpriority             sys_setpriority
+-- 
+2.43.0
+
diff --git a/queue-6.1/tcp-fix-tcp_rcv_fastopen_synack-to-enter-tcp_ca_loss.patch b/queue-6.1/tcp-fix-tcp_rcv_fastopen_synack-to-enter-tcp_ca_loss.patch
new file mode 100644 (file)
index 0000000..ba97c09
--- /dev/null
@@ -0,0 +1,168 @@
+From b2df673c52d7f708afbec030fbef2985bd2a8d84 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 24 Jun 2024 14:43:23 +0000
+Subject: tcp: fix tcp_rcv_fastopen_synack() to enter TCP_CA_Loss for failed
+ TFO
+
+From: Neal Cardwell <ncardwell@google.com>
+
+[ Upstream commit 5dfe9d273932c647bdc9d664f939af9a5a398cbc ]
+
+Testing determined that the recent commit 9e046bb111f1 ("tcp: clear
+tp->retrans_stamp in tcp_rcv_fastopen_synack()") has a race, and does
+not always ensure retrans_stamp is 0 after a TFO payload retransmit.
+
+If transmit completion for the SYN+data skb happens after the client
+TCP stack receives the SYNACK (which sometimes happens), then
+retrans_stamp can erroneously remain non-zero for the lifetime of the
+connection, causing a premature ETIMEDOUT later.
+
+Testing and tracing showed that the buggy scenario is the following
+somewhat tricky sequence:
+
++ Client attempts a TFO handshake. tcp_send_syn_data() sends SYN + TFO
+  cookie + data in a single packet in the syn_data skb. It hands the
+  syn_data skb to tcp_transmit_skb(), which makes a clone. Crucially,
+  it then reuses the same original (non-clone) syn_data skb,
+  transforming it by advancing the seq by one byte and removing the
+  FIN bit, and enques the resulting payload-only skb in the
+  sk->tcp_rtx_queue.
+
++ Client sets retrans_stamp to the start time of the three-way
+  handshake.
+
++ Cookie mismatches or server has TFO disabled, and server only ACKs
+  SYN.
+
++ tcp_ack() sees SYN is acked, tcp_clean_rtx_queue() clears
+  retrans_stamp.
+
++ Since the client SYN was acked but not the payload, the TFO failure
+  code path in tcp_rcv_fastopen_synack() tries to retransmit the
+  payload skb.  However, in some cases the transmit completion for the
+  clone of the syn_data (which had SYN + TFO cookie + data) hasn't
+  happened.  In those cases, skb_still_in_host_queue() returns true
+  for the retransmitted TFO payload, because the clone of the syn_data
+  skb has not had its tx completetion.
+
++ Because skb_still_in_host_queue() finds skb_fclone_busy() is true,
+  it sets the TSQ_THROTTLED bit and the retransmit does not happen in
+  the tcp_rcv_fastopen_synack() call chain.
+
++ The tcp_rcv_fastopen_synack() code next implicitly assumes the
+  retransmit process is finished, and sets retrans_stamp to 0 to clear
+  it, but this is later overwritten (see below).
+
++ Later, upon tx completion, tcp_tsq_write() calls
+  tcp_xmit_retransmit_queue(), which puts the retransmit in flight and
+  sets retrans_stamp to a non-zero value.
+
++ The client receives an ACK for the retransmitted TFO payload data.
+
++ Since we're in CA_Open and there are no dupacks/SACKs/DSACKs/ECN to
+  make tcp_ack_is_dubious() true and make us call
+  tcp_fastretrans_alert() and reach a code path that clears
+  retrans_stamp, retrans_stamp stays nonzero.
+
++ Later, if there is a TLP, RTO, RTO sequence, then the connection
+  will suffer an early ETIMEDOUT due to the erroneously ancient
+  retrans_stamp.
+
+The fix: this commit refactors the code to have
+tcp_rcv_fastopen_synack() retransmit by reusing the relevant parts of
+tcp_simple_retransmit() that enter CA_Loss (without changing cwnd) and
+call tcp_xmit_retransmit_queue(). We have tcp_simple_retransmit() and
+tcp_rcv_fastopen_synack() share code in this way because in both cases
+we get a packet indicating non-congestion loss (MTU reduction or TFO
+failure) and thus in both cases we want to retransmit as many packets
+as cwnd allows, without reducing cwnd. And given that retransmits will
+set retrans_stamp to a non-zero value (and may do so in a later
+calling context due to TSQ), we also want to enter CA_Loss so that we
+track when all retransmitted packets are ACked and clear retrans_stamp
+when that happens (to ensure later recurring RTOs are using the
+correct retrans_stamp and don't declare ETIMEDOUT prematurely).
+
+Fixes: 9e046bb111f1 ("tcp: clear tp->retrans_stamp in tcp_rcv_fastopen_synack()")
+Fixes: a7abf3cd76e1 ("tcp: consider using standard rtx logic in tcp_rcv_fastopen_synack()")
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Link: https://patch.msgid.link/20240624144323.2371403-1-ncardwell.sw@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 38 +++++++++++++++++++++++++++-----------
+ 1 file changed, 27 insertions(+), 11 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 852745a90aa8d..2146299016eda 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2754,13 +2754,37 @@ static void tcp_mtup_probe_success(struct sock *sk)
+       NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
+ }
++/* Sometimes we deduce that packets have been dropped due to reasons other than
++ * congestion, like path MTU reductions or failed client TFO attempts. In these
++ * cases we call this function to retransmit as many packets as cwnd allows,
++ * without reducing cwnd. Given that retransmits will set retrans_stamp to a
++ * non-zero value (and may do so in a later calling context due to TSQ), we
++ * also enter CA_Loss so that we track when all retransmitted packets are ACKed
++ * and clear retrans_stamp when that happens (to ensure later recurring RTOs
++ * are using the correct retrans_stamp and don't declare ETIMEDOUT
++ * prematurely).
++ */
++static void tcp_non_congestion_loss_retransmit(struct sock *sk)
++{
++      const struct inet_connection_sock *icsk = inet_csk(sk);
++      struct tcp_sock *tp = tcp_sk(sk);
++
++      if (icsk->icsk_ca_state != TCP_CA_Loss) {
++              tp->high_seq = tp->snd_nxt;
++              tp->snd_ssthresh = tcp_current_ssthresh(sk);
++              tp->prior_ssthresh = 0;
++              tp->undo_marker = 0;
++              tcp_set_ca_state(sk, TCP_CA_Loss);
++      }
++      tcp_xmit_retransmit_queue(sk);
++}
++
+ /* Do a simple retransmit without using the backoff mechanisms in
+  * tcp_timer. This is used for path mtu discovery.
+  * The socket is already locked here.
+  */
+ void tcp_simple_retransmit(struct sock *sk)
+ {
+-      const struct inet_connection_sock *icsk = inet_csk(sk);
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct sk_buff *skb;
+       int mss;
+@@ -2800,14 +2824,7 @@ void tcp_simple_retransmit(struct sock *sk)
+        * in network, but units changed and effective
+        * cwnd/ssthresh really reduced now.
+        */
+-      if (icsk->icsk_ca_state != TCP_CA_Loss) {
+-              tp->high_seq = tp->snd_nxt;
+-              tp->snd_ssthresh = tcp_current_ssthresh(sk);
+-              tp->prior_ssthresh = 0;
+-              tp->undo_marker = 0;
+-              tcp_set_ca_state(sk, TCP_CA_Loss);
+-      }
+-      tcp_xmit_retransmit_queue(sk);
++      tcp_non_congestion_loss_retransmit(sk);
+ }
+ EXPORT_SYMBOL(tcp_simple_retransmit);
+@@ -6145,8 +6162,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
+                       tp->fastopen_client_fail = TFO_DATA_NOT_ACKED;
+               skb_rbtree_walk_from(data)
+                        tcp_mark_skb_lost(sk, data);
+-              tcp_xmit_retransmit_queue(sk);
+-              tp->retrans_stamp = 0;
++              tcp_non_congestion_loss_retransmit(sk);
+               NET_INC_STATS(sock_net(sk),
+                               LINUX_MIB_TCPFASTOPENACTIVEFAIL);
+               return true;
+-- 
+2.43.0
+
diff --git a/queue-6.1/tracing-net_sched-null-pointer-dereference-in-perf_t.patch b/queue-6.1/tracing-net_sched-null-pointer-dereference-in-perf_t.patch
new file mode 100644 (file)
index 0000000..6db4099
--- /dev/null
@@ -0,0 +1,306 @@
+From 98aa00e407702acdde8aad81051c597a4dcbffec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jun 2024 02:33:23 +0900
+Subject: tracing/net_sched: NULL pointer dereference in
+ perf_trace_qdisc_reset()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Yunseong Kim <yskelg@gmail.com>
+
+[ Upstream commit bab4923132feb3e439ae45962979c5d9d5c7c1f1 ]
+
+In the TRACE_EVENT(qdisc_reset) NULL dereference occurred from
+
+ qdisc->dev_queue->dev <NULL> ->name
+
+This situation simulated from bunch of veths and Bluetooth disconnection
+and reconnection.
+
+During qdisc initialization, qdisc was being set to noop_queue.
+In veth_init_queue, the initial tx_num was reduced back to one,
+causing the qdisc reset to be called with noop, which led to the kernel
+panic.
+
+I've attached the GitHub gist link that C converted syz-execprogram
+source code and 3 log of reproduced vmcore-dmesg.
+
+ https://gist.github.com/yskelg/cc64562873ce249cdd0d5a358b77d740
+
+Yeoreum and I use two fuzzing tool simultaneously.
+
+One process with syz-executor : https://github.com/google/syzkaller
+
+ $ ./syz-execprog -executor=./syz-executor -repeat=1 -sandbox=setuid \
+    -enable=none -collide=false log1
+
+The other process with perf fuzzer:
+ https://github.com/deater/perf_event_tests/tree/master/fuzzer
+
+ $ perf_event_tests/fuzzer/perf_fuzzer
+
+I think this will happen on the kernel version.
+
+ Linux kernel version +v6.7.10, +v6.8, +v6.9 and it could happen in v6.10.
+
+This occurred from 51270d573a8d. I think this patch is absolutely
+necessary. Previously, It was showing not intended string value of name.
+
+I've reproduced 3 time from my fedora 40 Debug Kernel with any other module
+or patched.
+
+ version: 6.10.0-0.rc2.20240608gitdc772f8237f9.29.fc41.aarch64+debug
+
+[ 5287.164555] veth0_vlan: left promiscuous mode
+[ 5287.164929] veth1_macvtap: left promiscuous mode
+[ 5287.164950] veth0_macvtap: left promiscuous mode
+[ 5287.164983] veth1_vlan: left promiscuous mode
+[ 5287.165008] veth0_vlan: left promiscuous mode
+[ 5287.165450] veth1_macvtap: left promiscuous mode
+[ 5287.165472] veth0_macvtap: left promiscuous mode
+[ 5287.165502] veth1_vlan: left promiscuous mode
+…
+[ 5297.598240] bridge0: port 2(bridge_slave_1) entered blocking state
+[ 5297.598262] bridge0: port 2(bridge_slave_1) entered forwarding state
+[ 5297.598296] bridge0: port 1(bridge_slave_0) entered blocking state
+[ 5297.598313] bridge0: port 1(bridge_slave_0) entered forwarding state
+[ 5297.616090] 8021q: adding VLAN 0 to HW filter on device bond0
+[ 5297.620405] bridge0: port 1(bridge_slave_0) entered disabled state
+[ 5297.620730] bridge0: port 2(bridge_slave_1) entered disabled state
+[ 5297.627247] 8021q: adding VLAN 0 to HW filter on device team0
+[ 5297.629636] bridge0: port 1(bridge_slave_0) entered blocking state
+…
+[ 5298.002798] bridge_slave_0: left promiscuous mode
+[ 5298.002869] bridge0: port 1(bridge_slave_0) entered disabled state
+[ 5298.309444] bond0 (unregistering): (slave bond_slave_0): Releasing backup interface
+[ 5298.315206] bond0 (unregistering): (slave bond_slave_1): Releasing backup interface
+[ 5298.320207] bond0 (unregistering): Released all slaves
+[ 5298.354296] hsr_slave_0: left promiscuous mode
+[ 5298.360750] hsr_slave_1: left promiscuous mode
+[ 5298.374889] veth1_macvtap: left promiscuous mode
+[ 5298.374931] veth0_macvtap: left promiscuous mode
+[ 5298.374988] veth1_vlan: left promiscuous mode
+[ 5298.375024] veth0_vlan: left promiscuous mode
+[ 5299.109741] team0 (unregistering): Port device team_slave_1 removed
+[ 5299.185870] team0 (unregistering): Port device team_slave_0 removed
+…
+[ 5300.155443] Bluetooth: hci3: unexpected cc 0x0c03 length: 249 > 1
+[ 5300.155724] Bluetooth: hci3: unexpected cc 0x1003 length: 249 > 9
+[ 5300.155988] Bluetooth: hci3: unexpected cc 0x1001 length: 249 > 9
+….
+[ 5301.075531] team0: Port device team_slave_1 added
+[ 5301.085515] bridge0: port 1(bridge_slave_0) entered blocking state
+[ 5301.085531] bridge0: port 1(bridge_slave_0) entered disabled state
+[ 5301.085588] bridge_slave_0: entered allmulticast mode
+[ 5301.085800] bridge_slave_0: entered promiscuous mode
+[ 5301.095617] bridge0: port 1(bridge_slave_0) entered blocking state
+[ 5301.095633] bridge0: port 1(bridge_slave_0) entered disabled state
+…
+[ 5301.149734] bond0: (slave bond_slave_0): Enslaving as an active interface with an up link
+[ 5301.173234] bond0: (slave bond_slave_0): Enslaving as an active interface with an up link
+[ 5301.180517] bond0: (slave bond_slave_1): Enslaving as an active interface with an up link
+[ 5301.193481] hsr_slave_0: entered promiscuous mode
+[ 5301.204425] hsr_slave_1: entered promiscuous mode
+[ 5301.210172] debugfs: Directory 'hsr0' with parent 'hsr' already present!
+[ 5301.210185] Cannot create hsr debugfs directory
+[ 5301.224061] bond0: (slave bond_slave_1): Enslaving as an active interface with an up link
+[ 5301.246901] bond0: (slave bond_slave_0): Enslaving as an active interface with an up link
+[ 5301.255934] team0: Port device team_slave_0 added
+[ 5301.256480] team0: Port device team_slave_1 added
+[ 5301.256948] team0: Port device team_slave_0 added
+…
+[ 5301.435928] hsr_slave_0: entered promiscuous mode
+[ 5301.446029] hsr_slave_1: entered promiscuous mode
+[ 5301.455872] debugfs: Directory 'hsr0' with parent 'hsr' already present!
+[ 5301.455884] Cannot create hsr debugfs directory
+[ 5301.502664] hsr_slave_0: entered promiscuous mode
+[ 5301.513675] hsr_slave_1: entered promiscuous mode
+[ 5301.526155] debugfs: Directory 'hsr0' with parent 'hsr' already present!
+[ 5301.526164] Cannot create hsr debugfs directory
+[ 5301.563662] hsr_slave_0: entered promiscuous mode
+[ 5301.576129] hsr_slave_1: entered promiscuous mode
+[ 5301.580259] debugfs: Directory 'hsr0' with parent 'hsr' already present!
+[ 5301.580270] Cannot create hsr debugfs directory
+[ 5301.590269] 8021q: adding VLAN 0 to HW filter on device bond0
+
+[ 5301.595872] KASAN: null-ptr-deref in range [0x0000000000000130-0x0000000000000137]
+[ 5301.595877] Mem abort info:
+[ 5301.595881]   ESR = 0x0000000096000006
+[ 5301.595885]   EC = 0x25: DABT (current EL), IL = 32 bits
+[ 5301.595889]   SET = 0, FnV = 0
+[ 5301.595893]   EA = 0, S1PTW = 0
+[ 5301.595896]   FSC = 0x06: level 2 translation fault
+[ 5301.595900] Data abort info:
+[ 5301.595903]   ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000
+[ 5301.595907]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
+[ 5301.595911]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
+[ 5301.595915] [dfff800000000026] address between user and kernel address ranges
+[ 5301.595971] Internal error: Oops: 0000000096000006 [#1] SMP
+…
+[ 5301.596076] CPU: 2 PID: 102769 Comm:
+syz-executor.3 Kdump: loaded Tainted:
+ G        W         -------  ---  6.10.0-0.rc2.20240608gitdc772f8237f9.29.fc41.aarch64+debug #1
+[ 5301.596080] Hardware name: VMware, Inc. VMware20,1/VBSA,
+ BIOS VMW201.00V.21805430.BA64.2305221830 05/22/2023
+[ 5301.596082] pstate: 01400005 (nzcv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--)
+[ 5301.596085] pc : strnlen+0x40/0x88
+[ 5301.596114] lr : trace_event_get_offsets_qdisc_reset+0x6c/0x2b0
+[ 5301.596124] sp : ffff8000beef6b40
+[ 5301.596126] x29: ffff8000beef6b40 x28: dfff800000000000 x27: 0000000000000001
+[ 5301.596131] x26: 6de1800082c62bd0 x25: 1ffff000110aa9e0 x24: ffff800088554f00
+[ 5301.596136] x23: ffff800088554ec0 x22: 0000000000000130 x21: 0000000000000140
+[ 5301.596140] x20: dfff800000000000 x19: ffff8000beef6c60 x18: ffff7000115106d8
+[ 5301.596143] x17: ffff800121bad000 x16: ffff800080020000 x15: 0000000000000006
+[ 5301.596147] x14: 0000000000000002 x13: ffff0001f3ed8d14 x12: ffff700017ddeda5
+[ 5301.596151] x11: 1ffff00017ddeda4 x10: ffff700017ddeda4 x9 : ffff800082cc5eec
+[ 5301.596155] x8 : 0000000000000004 x7 : 00000000f1f1f1f1 x6 : 00000000f2f2f200
+[ 5301.596158] x5 : 00000000f3f3f3f3 x4 : ffff700017dded80 x3 : 00000000f204f1f1
+[ 5301.596162] x2 : 0000000000000026 x1 : 0000000000000000 x0 : 0000000000000130
+[ 5301.596166] Call trace:
+[ 5301.596175]  strnlen+0x40/0x88
+[ 5301.596179]  trace_event_get_offsets_qdisc_reset+0x6c/0x2b0
+[ 5301.596182]  perf_trace_qdisc_reset+0xb0/0x538
+[ 5301.596184]  __traceiter_qdisc_reset+0x68/0xc0
+[ 5301.596188]  qdisc_reset+0x43c/0x5e8
+[ 5301.596190]  netif_set_real_num_tx_queues+0x288/0x770
+[ 5301.596194]  veth_init_queues+0xfc/0x130 [veth]
+[ 5301.596198]  veth_newlink+0x45c/0x850 [veth]
+[ 5301.596202]  rtnl_newlink_create+0x2c8/0x798
+[ 5301.596205]  __rtnl_newlink+0x92c/0xb60
+[ 5301.596208]  rtnl_newlink+0xd8/0x130
+[ 5301.596211]  rtnetlink_rcv_msg+0x2e0/0x890
+[ 5301.596214]  netlink_rcv_skb+0x1c4/0x380
+[ 5301.596225]  rtnetlink_rcv+0x20/0x38
+[ 5301.596227]  netlink_unicast+0x3c8/0x640
+[ 5301.596231]  netlink_sendmsg+0x658/0xa60
+[ 5301.596234]  __sock_sendmsg+0xd0/0x180
+[ 5301.596243]  __sys_sendto+0x1c0/0x280
+[ 5301.596246]  __arm64_sys_sendto+0xc8/0x150
+[ 5301.596249]  invoke_syscall+0xdc/0x268
+[ 5301.596256]  el0_svc_common.constprop.0+0x16c/0x240
+[ 5301.596259]  do_el0_svc+0x48/0x68
+[ 5301.596261]  el0_svc+0x50/0x188
+[ 5301.596265]  el0t_64_sync_handler+0x120/0x130
+[ 5301.596268]  el0t_64_sync+0x194/0x198
+[ 5301.596272] Code: eb15001f 54000120 d343fc02 12000801 (38f46842)
+[ 5301.596285] SMP: stopping secondary CPUs
+[ 5301.597053] Starting crashdump kernel...
+[ 5301.597057] Bye!
+
+After applying our patch, I didn't find any kernel panic errors.
+
+We've found a simple reproducer
+
+ # echo 1 > /sys/kernel/debug/tracing/events/qdisc/qdisc_reset/enable
+
+ # ip link add veth0 type veth peer name veth1
+
+ Error: Unknown device type.
+
+However, without our patch applied, I tested upstream 6.10.0-rc3 kernel
+using the qdisc_reset event and the ip command on my qemu virtual machine.
+
+This 2 commands makes always kernel panic.
+
+Linux version: 6.10.0-rc3
+
+[    0.000000] Linux version 6.10.0-rc3-00164-g44ef20baed8e-dirty
+(paran@fedora) (gcc (GCC) 14.1.1 20240522 (Red Hat 14.1.1-4), GNU ld
+version 2.41-34.fc40) #20 SMP PREEMPT Sat Jun 15 16:51:25 KST 2024
+
+Kernel panic message:
+
+[  615.236484] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP
+[  615.237250] Dumping ftrace buffer:
+[  615.237679]    (ftrace buffer empty)
+[  615.238097] Modules linked in: veth crct10dif_ce virtio_gpu
+virtio_dma_buf drm_shmem_helper drm_kms_helper zynqmp_fpga xilinx_can
+xilinx_spi xilinx_selectmap xilinx_core xilinx_pr_decoupler versal_fpga
+uvcvideo uvc videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videodev
+videobuf2_common mc usbnet deflate zstd ubifs ubi rcar_canfd rcar_can
+omap_mailbox ntb_msi_test ntb_hw_epf lattice_sysconfig_spi
+lattice_sysconfig ice40_spi gpio_xilinx dwmac_altr_socfpga mdio_regmap
+stmmac_platform stmmac pcs_xpcs dfl_fme_region dfl_fme_mgr dfl_fme_br
+dfl_afu dfl fpga_region fpga_bridge can can_dev br_netfilter bridge stp
+llc atl1c ath11k_pci mhi ath11k_ahb ath11k qmi_helpers ath10k_sdio
+ath10k_pci ath10k_core ath mac80211 libarc4 cfg80211 drm fuse backlight ipv6
+Jun 22 02:36:5[3   6k152.62-4sm98k4-0k]v  kCePUr:n e1l :P IUDn:a b4le6
+8t oC ohmma: nidpl eN oketr nteali nptaedg i6n.g1 0re.0q-urecs3t- 0at0
+1v6i4r-tgu4a4le fa2d0dbraeeds0se-dir tyd f#f2f08
+  615.252376] Hardware name: linux,dummy-virt (DT)
+[  615.253220] pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS
+BTYPE=--)
+[  615.254433] pc : strnlen+0x6c/0xe0
+[  615.255096] lr : trace_event_get_offsets_qdisc_reset+0x94/0x3d0
+[  615.256088] sp : ffff800080b269a0
+[  615.256615] x29: ffff800080b269a0 x28: ffffc070f3f98500 x27:
+0000000000000001
+[  615.257831] x26: 0000000000000010 x25: ffffc070f3f98540 x24:
+ffffc070f619cf60
+[  615.259020] x23: 0000000000000128 x22: 0000000000000138 x21:
+dfff800000000000
+[  615.260241] x20: ffffc070f631ad00 x19: 0000000000000128 x18:
+ffffc070f448b800
+[  615.261454] x17: 0000000000000000 x16: 0000000000000001 x15:
+ffffc070f4ba2a90
+[  615.262635] x14: ffff700010164d73 x13: 1ffff80e1e8d5eb3 x12:
+1ffff00010164d72
+[  615.263877] x11: ffff700010164d72 x10: dfff800000000000 x9 :
+ffffc070e85d6184
+[  615.265047] x8 : ffffc070e4402070 x7 : 000000000000f1f1 x6 :
+000000001504a6d3
+[  615.266336] x5 : ffff28ca21122140 x4 : ffffc070f5043ea8 x3 :
+0000000000000000
+[  615.267528] x2 : 0000000000000025 x1 : 0000000000000000 x0 :
+0000000000000000
+[  615.268747] Call trace:
+[  615.269180]  strnlen+0x6c/0xe0
+[  615.269767]  trace_event_get_offsets_qdisc_reset+0x94/0x3d0
+[  615.270716]  trace_event_raw_event_qdisc_reset+0xe8/0x4e8
+[  615.271667]  __traceiter_qdisc_reset+0xa0/0x140
+[  615.272499]  qdisc_reset+0x554/0x848
+[  615.273134]  netif_set_real_num_tx_queues+0x360/0x9a8
+[  615.274050]  veth_init_queues+0x110/0x220 [veth]
+[  615.275110]  veth_newlink+0x538/0xa50 [veth]
+[  615.276172]  __rtnl_newlink+0x11e4/0x1bc8
+[  615.276944]  rtnl_newlink+0xac/0x120
+[  615.277657]  rtnetlink_rcv_msg+0x4e4/0x1370
+[  615.278409]  netlink_rcv_skb+0x25c/0x4f0
+[  615.279122]  rtnetlink_rcv+0x48/0x70
+[  615.279769]  netlink_unicast+0x5a8/0x7b8
+[  615.280462]  netlink_sendmsg+0xa70/0x1190
+
+Yeoreum and I don't know if the patch we wrote will fix the underlying
+cause, but we think that priority is to prevent kernel panic happening.
+So, we're sending this patch.
+
+Fixes: 51270d573a8d ("tracing/net_sched: Fix tracepoints that save qdisc_dev() as a string")
+Link: https://lore.kernel.org/lkml/20240229143432.273b4871@gandalf.local.home/t/
+Cc: netdev@vger.kernel.org
+Tested-by: Yunseong Kim <yskelg@gmail.com>
+Signed-off-by: Yunseong Kim <yskelg@gmail.com>
+Signed-off-by: Yeoreum Yun <yeoreum.yun@arm.com>
+Link: https://lore.kernel.org/r/20240624173320.24945-4-yskelg@gmail.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/trace/events/qdisc.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h
+index 1f4258308b967..69453b8de29e6 100644
+--- a/include/trace/events/qdisc.h
++++ b/include/trace/events/qdisc.h
+@@ -81,7 +81,7 @@ TRACE_EVENT(qdisc_reset,
+       TP_ARGS(q),
+       TP_STRUCT__entry(
+-              __string(       dev,            qdisc_dev(q)->name      )
++              __string(       dev,            qdisc_dev(q) ? qdisc_dev(q)->name : "(null)"    )
+               __string(       kind,           q->ops->id              )
+               __field(        u32,            parent                  )
+               __field(        u32,            handle                  )
+-- 
+2.43.0
+
diff --git a/queue-6.1/xdp-remove-warn-from-__xdp_reg_mem_model.patch b/queue-6.1/xdp-remove-warn-from-__xdp_reg_mem_model.patch
new file mode 100644 (file)
index 0000000..0fd0497
--- /dev/null
@@ -0,0 +1,76 @@
+From cce889c3cafcab861049d53b8cf5f427e9aae6d4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 24 Jun 2024 11:07:47 +0300
+Subject: xdp: Remove WARN() from __xdp_reg_mem_model()
+
+From: Daniil Dulov <d.dulov@aladdin.ru>
+
+[ Upstream commit 7e9f79428372c6eab92271390851be34ab26bfb4 ]
+
+syzkaller reports a warning in __xdp_reg_mem_model().
+
+The warning occurs only if __mem_id_init_hash_table() returns an error. It
+returns the error in two cases:
+
+  1. memory allocation fails;
+  2. rhashtable_init() fails when some fields of rhashtable_params
+     struct are not initialized properly.
+
+The second case cannot happen since there is a static const rhashtable_params
+struct with valid fields. So, warning is only triggered when there is a
+problem with memory allocation.
+
+Thus, there is no sense in using WARN() to handle this error and it can be
+safely removed.
+
+WARNING: CPU: 0 PID: 5065 at net/core/xdp.c:299 __xdp_reg_mem_model+0x2d9/0x650 net/core/xdp.c:299
+
+CPU: 0 PID: 5065 Comm: syz-executor883 Not tainted 6.8.0-syzkaller-05271-gf99c5f563c17 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/27/2024
+RIP: 0010:__xdp_reg_mem_model+0x2d9/0x650 net/core/xdp.c:299
+
+Call Trace:
+ xdp_reg_mem_model+0x22/0x40 net/core/xdp.c:344
+ xdp_test_run_setup net/bpf/test_run.c:188 [inline]
+ bpf_test_run_xdp_live+0x365/0x1e90 net/bpf/test_run.c:377
+ bpf_prog_test_run_xdp+0x813/0x11b0 net/bpf/test_run.c:1267
+ bpf_prog_test_run+0x33a/0x3b0 kernel/bpf/syscall.c:4240
+ __sys_bpf+0x48d/0x810 kernel/bpf/syscall.c:5649
+ __do_sys_bpf kernel/bpf/syscall.c:5738 [inline]
+ __se_sys_bpf kernel/bpf/syscall.c:5736 [inline]
+ __x64_sys_bpf+0x7c/0x90 kernel/bpf/syscall.c:5736
+ do_syscall_64+0xfb/0x240
+ entry_SYSCALL_64_after_hwframe+0x6d/0x75
+
+Found by Linux Verification Center (linuxtesting.org) with syzkaller.
+
+Fixes: 8d5d88527587 ("xdp: rhashtable with allocator ID to pointer mapping")
+Signed-off-by: Daniil Dulov <d.dulov@aladdin.ru>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
+Link: https://lore.kernel.org/all/20240617162708.492159-1-d.dulov@aladdin.ru
+Link: https://lore.kernel.org/bpf/20240624080747.36858-1-d.dulov@aladdin.ru
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/xdp.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/net/core/xdp.c b/net/core/xdp.c
+index 844c9d99dc0ec..c3f6653b42742 100644
+--- a/net/core/xdp.c
++++ b/net/core/xdp.c
+@@ -292,10 +292,8 @@ static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem,
+               mutex_lock(&mem_id_lock);
+               ret = __mem_id_init_hash_table();
+               mutex_unlock(&mem_id_lock);
+-              if (ret < 0) {
+-                      WARN_ON(1);
++              if (ret < 0)
+                       return ERR_PTR(ret);
+-              }
+       }
+       xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
+-- 
+2.43.0
+