]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.6
authorSasha Levin <sashal@kernel.org>
Thu, 24 Oct 2024 11:16:24 +0000 (07:16 -0400)
committerSasha Levin <sashal@kernel.org>
Thu, 24 Oct 2024 11:16:24 +0000 (07:16 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
21 files changed:
queue-6.6/arm64-force-position-independent-veneers.patch [new file with mode: 0644]
queue-6.6/asoc-amd-yc-add-quirk-for-hp-dragonfly-pro-one.patch [new file with mode: 0644]
queue-6.6/asoc-codecs-lpass-rx-macro-add-missing-cdc_rx_bcl_vb.patch [new file with mode: 0644]
queue-6.6/asoc-fsl_sai-enable-fifo-continue-on-error-fcont-bit.patch [new file with mode: 0644]
queue-6.6/asoc-qcom-sm8250-add-qrb4210-rb2-sndcard-compatible-.patch [new file with mode: 0644]
queue-6.6/cifs-validate-content-of-nfs-reparse-point-buffer.patch [new file with mode: 0644]
queue-6.6/drm-vboxvideo-replace-fake-vla-at-end-of-vbva_mouse_.patch [new file with mode: 0644]
queue-6.6/exec-don-t-warn-for-racy-path_noexec-check.patch [new file with mode: 0644]
queue-6.6/platform-x86-dell-sysman-add-support-for-alienware-p.patch [new file with mode: 0644]
queue-6.6/series
queue-6.6/tracing-fprobe-event-cleanup-fix-a-wrong-comment-in-.patch [new file with mode: 0644]
queue-6.6/tracing-probes-cleanup-set-trace_probe-nr_args-at-tr.patch [new file with mode: 0644]
queue-6.6/tracing-probes-support-argn-in-return-probe-kprobe-a.patch [new file with mode: 0644]
queue-6.6/udf-fix-uninit-value-use-in-udf_get_fileshortad.patch [new file with mode: 0644]
queue-6.6/udf-refactor-inode_bmap-to-handle-error.patch [new file with mode: 0644]
queue-6.6/udf-refactor-udf_current_aext-to-handle-error.patch [new file with mode: 0644]
queue-6.6/udf-refactor-udf_next_aext-to-handle-error.patch [new file with mode: 0644]
queue-6.6/uprobe-avoid-out-of-bounds-memory-access-of-fetching.patch [new file with mode: 0644]
queue-6.6/uprobes-encapsulate-preparation-of-uprobe-args-buffe.patch [new file with mode: 0644]
queue-6.6/uprobes-prepare-uprobe-args-buffer-lazily.patch [new file with mode: 0644]
queue-6.6/uprobes-prevent-mutex_lock-under-rcu_read_lock.patch [new file with mode: 0644]

diff --git a/queue-6.6/arm64-force-position-independent-veneers.patch b/queue-6.6/arm64-force-position-independent-veneers.patch
new file mode 100644 (file)
index 0000000..e945d21
--- /dev/null
@@ -0,0 +1,113 @@
+From 5b5759c4d1354b1a3d52e6cf9e301df2102c555f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Sep 2024 11:18:38 +0100
+Subject: arm64: Force position-independent veneers
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+[ Upstream commit 9abe390e689f4f5c23c5f507754f8678431b4f72 ]
+
+Certain portions of code always need to be position-independent
+regardless of CONFIG_RELOCATABLE, including code which is executed in an
+idmap or which is executed before relocations are applied. In some
+kernel configurations the LLD linker generates position-dependent
+veneers for such code, and when executed these result in early boot-time
+failures.
+
+Marc Zyngier encountered a boot failure resulting from this when
+building a (particularly cursed) configuration with LLVM, as he reported
+to the list:
+
+  https://lore.kernel.org/linux-arm-kernel/86wmjwvatn.wl-maz@kernel.org/
+
+In Marc's kernel configuration, the .head.text and .rodata.text sections
+end up more than 128MiB apart, requiring a veneer to branch between the
+two:
+
+| [mark@lakrids:~/src/linux]% usekorg 14.1.0 aarch64-linux-objdump -t vmlinux | grep -w _text
+| ffff800080000000 g       .head.text     0000000000000000 _text
+| [mark@lakrids:~/src/linux]% usekorg 14.1.0 aarch64-linux-objdump -t vmlinux | grep -w primary_entry
+| ffff8000889df0e0 g       .rodata.text   000000000000006c primary_entry,
+
+... consequently, LLD inserts a position-dependent veneer for the branch
+from _stext (in .head.text) to primary_entry (in .rodata.text):
+
+| ffff800080000000 <_text>:
+| ffff800080000000:       fa405a4d        ccmp    x18, #0x0, #0xd, pl     // pl = nfrst
+| ffff800080000004:       14003fff        b       ffff800080010000 <__AArch64AbsLongThunk_primary_entry>
+...
+| ffff800080010000 <__AArch64AbsLongThunk_primary_entry>:
+| ffff800080010000:       58000050        ldr     x16, ffff800080010008 <__AArch64AbsLongThunk_primary_entry+0x8>
+| ffff800080010004:       d61f0200        br      x16
+| ffff800080010008:       889df0e0        .word   0x889df0e0
+| ffff80008001000c:       ffff8000        .word   0xffff8000
+
+... and as this is executed early in boot before the kernel is mapped in
+TTBR1 this results in a silent boot failure.
+
+Fix this by passing '--pic-veneer' to the linker, which will cause the
+linker to use position-independent veneers, e.g.
+
+| ffff800080000000 <_text>:
+| ffff800080000000:       fa405a4d        ccmp    x18, #0x0, #0xd, pl     // pl = nfrst
+| ffff800080000004:       14003fff        b       ffff800080010000 <__AArch64ADRPThunk_primary_entry>
+...
+| ffff800080010000 <__AArch64ADRPThunk_primary_entry>:
+| ffff800080010000:       f004e3f0        adrp    x16, ffff800089c8f000 <__idmap_text_start>
+| ffff800080010004:       91038210        add     x16, x16, #0xe0
+| ffff800080010008:       d61f0200        br      x16
+
+I've opted to pass '--pic-veneer' unconditionally, as:
+
+* In addition to solving the boot failure, these sequences are generally
+  nicer as they require fewer instructions and don't need to perform
+  data accesses.
+
+* While the position-independent veneer sequences have a limited +/-2GiB
+  range, this is not a new restriction. Even kernels built with
+  CONFIG_RELOCATABLE=n are limited to 2GiB in size as we have several
+  structues using 32-bit relative offsets and PPREL32 relocations, which
+  are similarly limited to +/-2GiB in range. These include extable
+  entries, jump table entries, and alt_instr entries.
+
+* GNU LD defaults to using position-independent veneers, and supports
+  the same '--pic-veneer' option, so this change is not expected to
+  adversely affect GNU LD.
+
+I've tested with GNU LD 2.30 to 2.42 inclusive and LLVM 13.0.1 to 19.1.0
+inclusive, using the kernel.org binaries from:
+
+* https://mirrors.edge.kernel.org/pub/tools/crosstool/
+* https://mirrors.edge.kernel.org/pub/tools/llvm/
+
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Reported-by: Marc Zyngier <maz@kernel.org>
+Cc: Ard Biesheuvel <ardb@kernel.org>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Nick Desaulniers <ndesaulniers@google.com>
+Cc: Will Deacon <will@kernel.org>
+Acked-by: Ard Biesheuvel <ardb@kernel.org>
+Reviewed-by: Nathan Chancellor <nathan@kernel.org>
+Link: https://lore.kernel.org/r/20240927101838.3061054-1-mark.rutland@arm.com
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
+index 9a2d3723cd0fa..11782860717fa 100644
+--- a/arch/arm64/Makefile
++++ b/arch/arm64/Makefile
+@@ -10,7 +10,7 @@
+ #
+ # Copyright (C) 1995-2001 by Russell King
+-LDFLAGS_vmlinux       :=--no-undefined -X
++LDFLAGS_vmlinux       :=--no-undefined -X --pic-veneer
+ ifeq ($(CONFIG_RELOCATABLE), y)
+ # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour
+-- 
+2.43.0
+
diff --git a/queue-6.6/asoc-amd-yc-add-quirk-for-hp-dragonfly-pro-one.patch b/queue-6.6/asoc-amd-yc-add-quirk-for-hp-dragonfly-pro-one.patch
new file mode 100644 (file)
index 0000000..89087c4
--- /dev/null
@@ -0,0 +1,40 @@
+From fd3b8ceb8d91c5f03ca6618124caf0a39a5218ba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 Sep 2024 00:44:08 -0400
+Subject: ASoC: amd: yc: Add quirk for HP Dragonfly pro one
+
+From: David Lawrence Glanzman <davidglanzman@yahoo.com>
+
+[ Upstream commit 84e8d59651879b2ff8499bddbbc9549b7f1a646b ]
+
+Adds a quirk entry to enable the mic on HP Dragonfly pro one laptop
+
+Signed-off-by: David Lawrence Glanzman <davidglanzman@yahoo.com>
+Link: https://patch.msgid.link/1249c09bd6bf696b59d087a4f546ae397828656c.camel@yahoo.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/amd/yc/acp6x-mach.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
+index 248e3bcbf386b..76f5d926d1eac 100644
+--- a/sound/soc/amd/yc/acp6x-mach.c
++++ b/sound/soc/amd/yc/acp6x-mach.c
+@@ -444,6 +444,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
+                       DMI_MATCH(DMI_BOARD_NAME, "8A3E"),
+               }
+       },
++      {
++              .driver_data = &acp6x_card,
++              .matches = {
++                      DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
++                      DMI_MATCH(DMI_BOARD_NAME, "8A7F"),
++              }
++      },
+       {
+               .driver_data = &acp6x_card,
+               .matches = {
+-- 
+2.43.0
+
diff --git a/queue-6.6/asoc-codecs-lpass-rx-macro-add-missing-cdc_rx_bcl_vb.patch b/queue-6.6/asoc-codecs-lpass-rx-macro-add-missing-cdc_rx_bcl_vb.patch
new file mode 100644 (file)
index 0000000..be1dbd1
--- /dev/null
@@ -0,0 +1,39 @@
+From efdc5576966c1c74ead9d1011a46e70b710297a1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 Sep 2024 05:38:23 +0100
+Subject: ASoC: codecs: lpass-rx-macro: add missing CDC_RX_BCL_VBAT_RF_PROC2 to
+ default regs values
+
+From: Alexey Klimov <alexey.klimov@linaro.org>
+
+[ Upstream commit e249786b2188107a7c50e7174d35f955a60988a1 ]
+
+CDC_RX_BCL_VBAT_RF_PROC1 is listed twice and its default value
+is 0x2a which is overwriten by its next occurence in rx_defaults[].
+The second one should be missing CDC_RX_BCL_VBAT_RF_PROC2 instead
+and its default value is expected 0x0.
+
+Signed-off-by: Alexey Klimov <alexey.klimov@linaro.org>
+Link: https://patch.msgid.link/20240925043823.520218-2-alexey.klimov@linaro.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/codecs/lpass-rx-macro.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c
+index 29197d34ec099..7e9f0ab914124 100644
+--- a/sound/soc/codecs/lpass-rx-macro.c
++++ b/sound/soc/codecs/lpass-rx-macro.c
+@@ -909,7 +909,7 @@ static const struct reg_default rx_defaults[] = {
+       { CDC_RX_BCL_VBAT_PK_EST2, 0x01 },
+       { CDC_RX_BCL_VBAT_PK_EST3, 0x40 },
+       { CDC_RX_BCL_VBAT_RF_PROC1, 0x2A },
+-      { CDC_RX_BCL_VBAT_RF_PROC1, 0x00 },
++      { CDC_RX_BCL_VBAT_RF_PROC2, 0x00 },
+       { CDC_RX_BCL_VBAT_TAC1, 0x00 },
+       { CDC_RX_BCL_VBAT_TAC2, 0x18 },
+       { CDC_RX_BCL_VBAT_TAC3, 0x18 },
+-- 
+2.43.0
+
diff --git a/queue-6.6/asoc-fsl_sai-enable-fifo-continue-on-error-fcont-bit.patch b/queue-6.6/asoc-fsl_sai-enable-fifo-continue-on-error-fcont-bit.patch
new file mode 100644 (file)
index 0000000..2d96eee
--- /dev/null
@@ -0,0 +1,63 @@
+From 92b4b6a4aff9de7fb1fa0644b1f3d36e6c664e51 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 30 Sep 2024 14:08:28 +0800
+Subject: ASoC: fsl_sai: Enable 'FIFO continue on error' FCONT bit
+
+From: Shengjiu Wang <shengjiu.wang@nxp.com>
+
+[ Upstream commit 72455e33173c1a00c0ce93d2b0198eb45d5f4195 ]
+
+FCONT=1 means On FIFO error, the SAI will continue from the
+same word that caused the FIFO error to set after the FIFO
+warning flag has been cleared.
+
+Set FCONT bit in control register to avoid the channel swap
+issue after SAI xrun.
+
+Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
+Link: https://patch.msgid.link/1727676508-22830-1-git-send-email-shengjiu.wang@nxp.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/fsl/fsl_sai.c | 5 ++++-
+ sound/soc/fsl/fsl_sai.h | 1 +
+ 2 files changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
+index 3d202398c5411..aa15f56ca139d 100644
+--- a/sound/soc/fsl/fsl_sai.c
++++ b/sound/soc/fsl/fsl_sai.c
+@@ -604,6 +604,9 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream,
+       val_cr4 |= FSL_SAI_CR4_FRSZ(slots);
++      /* Set to avoid channel swap */
++      val_cr4 |= FSL_SAI_CR4_FCONT;
++
+       /* Set to output mode to avoid tri-stated data pins */
+       if (tx)
+               val_cr4 |= FSL_SAI_CR4_CHMOD;
+@@ -690,7 +693,7 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream,
+       regmap_update_bits(sai->regmap, FSL_SAI_xCR4(tx, ofs),
+                          FSL_SAI_CR4_SYWD_MASK | FSL_SAI_CR4_FRSZ_MASK |
+-                         FSL_SAI_CR4_CHMOD_MASK,
++                         FSL_SAI_CR4_CHMOD_MASK | FSL_SAI_CR4_FCONT_MASK,
+                          val_cr4);
+       regmap_update_bits(sai->regmap, FSL_SAI_xCR5(tx, ofs),
+                          FSL_SAI_CR5_WNW_MASK | FSL_SAI_CR5_W0W_MASK |
+diff --git a/sound/soc/fsl/fsl_sai.h b/sound/soc/fsl/fsl_sai.h
+index 550df87b6a068..eba465c2387b6 100644
+--- a/sound/soc/fsl/fsl_sai.h
++++ b/sound/soc/fsl/fsl_sai.h
+@@ -137,6 +137,7 @@
+ /* SAI Transmit and Receive Configuration 4 Register */
++#define FSL_SAI_CR4_FCONT_MASK        BIT(28)
+ #define FSL_SAI_CR4_FCONT     BIT(28)
+ #define FSL_SAI_CR4_FCOMB_SHIFT BIT(26)
+ #define FSL_SAI_CR4_FCOMB_SOFT  BIT(27)
+-- 
+2.43.0
+
diff --git a/queue-6.6/asoc-qcom-sm8250-add-qrb4210-rb2-sndcard-compatible-.patch b/queue-6.6/asoc-qcom-sm8250-add-qrb4210-rb2-sndcard-compatible-.patch
new file mode 100644 (file)
index 0000000..7dfd8e4
--- /dev/null
@@ -0,0 +1,35 @@
+From 8ede1c87417301f45f3a8ed02532b8be7e9106af Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Oct 2024 03:20:10 +0100
+Subject: ASoC: qcom: sm8250: add qrb4210-rb2-sndcard compatible string
+
+From: Alexey Klimov <alexey.klimov@linaro.org>
+
+[ Upstream commit b97bc0656a66f89f78098d4d72dc04fa9518ab11 ]
+
+Add "qcom,qrb4210-rb2-sndcard" to the list of recognizable
+devices.
+
+Signed-off-by: Alexey Klimov <alexey.klimov@linaro.org>
+Link: https://patch.msgid.link/20241002022015.867031-3-alexey.klimov@linaro.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/qcom/sm8250.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/sound/soc/qcom/sm8250.c b/sound/soc/qcom/sm8250.c
+index 6558bf2e14e83..9eb8ae0196d91 100644
+--- a/sound/soc/qcom/sm8250.c
++++ b/sound/soc/qcom/sm8250.c
+@@ -153,6 +153,7 @@ static int sm8250_platform_probe(struct platform_device *pdev)
+ static const struct of_device_id snd_sm8250_dt_match[] = {
+       {.compatible = "qcom,sm8250-sndcard"},
++      {.compatible = "qcom,qrb4210-rb2-sndcard"},
+       {.compatible = "qcom,qrb5165-rb5-sndcard"},
+       {}
+ };
+-- 
+2.43.0
+
diff --git a/queue-6.6/cifs-validate-content-of-nfs-reparse-point-buffer.patch b/queue-6.6/cifs-validate-content-of-nfs-reparse-point-buffer.patch
new file mode 100644 (file)
index 0000000..23236ce
--- /dev/null
@@ -0,0 +1,76 @@
+From 280e78cf09f85b935f1a89013dd93fa912886a8c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 28 Sep 2024 23:59:47 +0200
+Subject: cifs: Validate content of NFS reparse point buffer
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Pali Rohár <pali@kernel.org>
+
+[ Upstream commit 556ac52bb1e76cc28fd30aa117b42989965b3efd ]
+
+Symlink target location stored in DataBuffer is encoded in UTF-16. So check
+that symlink DataBuffer length is non-zero and even number. And check that
+DataBuffer does not contain UTF-16 null codepoint because Linux cannot
+process symlink with null byte.
+
+DataBuffer for char and block devices is 8 bytes long as it contains two
+32-bit numbers (major and minor). Add check for this.
+
+DataBuffer buffer for sockets and fifos zero-length. Add checks for this.
+
+Signed-off-by: Pali Rohár <pali@kernel.org>
+Reviewed-by: Paulo Alcantara (Red Hat) <pc@manguebit.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/smb/client/reparse.c | 23 +++++++++++++++++++++++
+ 1 file changed, 23 insertions(+)
+
+diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
+index ad0e0de9a165d..7429b96a6ae5e 100644
+--- a/fs/smb/client/reparse.c
++++ b/fs/smb/client/reparse.c
+@@ -330,6 +330,18 @@ static int parse_reparse_posix(struct reparse_posix_data *buf,
+       switch ((type = le64_to_cpu(buf->InodeType))) {
+       case NFS_SPECFILE_LNK:
++              if (len == 0 || (len % 2)) {
++                      cifs_dbg(VFS, "srv returned malformed nfs symlink buffer\n");
++                      return -EIO;
++              }
++              /*
++               * Check that buffer does not contain UTF-16 null codepoint
++               * because Linux cannot process symlink with null byte.
++               */
++              if (UniStrnlen((wchar_t *)buf->DataBuffer, len/2) != len/2) {
++                      cifs_dbg(VFS, "srv returned null byte in nfs symlink target location\n");
++                      return -EIO;
++              }
+               data->symlink_target = cifs_strndup_from_utf16(buf->DataBuffer,
+                                                              len, true,
+                                                              cifs_sb->local_nls);
+@@ -340,8 +352,19 @@ static int parse_reparse_posix(struct reparse_posix_data *buf,
+               break;
+       case NFS_SPECFILE_CHR:
+       case NFS_SPECFILE_BLK:
++              /* DataBuffer for block and char devices contains two 32-bit numbers */
++              if (len != 8) {
++                      cifs_dbg(VFS, "srv returned malformed nfs buffer for type: 0x%llx\n", type);
++                      return -EIO;
++              }
++              break;
+       case NFS_SPECFILE_FIFO:
+       case NFS_SPECFILE_SOCK:
++              /* DataBuffer for fifos and sockets is empty */
++              if (len != 0) {
++                      cifs_dbg(VFS, "srv returned malformed nfs buffer for type: 0x%llx\n", type);
++                      return -EIO;
++              }
+               break;
+       default:
+               cifs_dbg(VFS, "%s: unhandled inode type: 0x%llx\n",
+-- 
+2.43.0
+
diff --git a/queue-6.6/drm-vboxvideo-replace-fake-vla-at-end-of-vbva_mouse_.patch b/queue-6.6/drm-vboxvideo-replace-fake-vla-at-end-of-vbva_mouse_.patch
new file mode 100644 (file)
index 0000000..26b586b
--- /dev/null
@@ -0,0 +1,72 @@
+From 408acbc95e10e0ed5660151221550e509be98ec0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Aug 2024 12:45:23 +0200
+Subject: drm/vboxvideo: Replace fake VLA at end of vbva_mouse_pointer_shape
+ with real VLA
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+[ Upstream commit d92b90f9a54d9300a6e883258e79f36dab53bfae ]
+
+Replace the fake VLA at end of the vbva_mouse_pointer_shape shape with
+a real VLA to fix a "memcpy: detected field-spanning write error" warning:
+
+[   13.319813] memcpy: detected field-spanning write (size 16896) of single field "p->data" at drivers/gpu/drm/vboxvideo/hgsmi_base.c:154 (size 4)
+[   13.319841] WARNING: CPU: 0 PID: 1105 at drivers/gpu/drm/vboxvideo/hgsmi_base.c:154 hgsmi_update_pointer_shape+0x192/0x1c0 [vboxvideo]
+[   13.320038] Call Trace:
+[   13.320173]  hgsmi_update_pointer_shape [vboxvideo]
+[   13.320184]  vbox_cursor_atomic_update [vboxvideo]
+
+Note as mentioned in the added comment it seems the original length
+calculation for the allocated and send hgsmi buffer is 4 bytes too large.
+Changing this is not the goal of this patch, so this behavior is kept.
+
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Reviewed-by: Jani Nikula <jani.nikula@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240827104523.17442-1-hdegoede@redhat.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/vboxvideo/hgsmi_base.c | 10 +++++++++-
+ drivers/gpu/drm/vboxvideo/vboxvideo.h  |  4 +---
+ 2 files changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/gpu/drm/vboxvideo/hgsmi_base.c b/drivers/gpu/drm/vboxvideo/hgsmi_base.c
+index 8c041d7ce4f1b..87dccaecc3e57 100644
+--- a/drivers/gpu/drm/vboxvideo/hgsmi_base.c
++++ b/drivers/gpu/drm/vboxvideo/hgsmi_base.c
+@@ -139,7 +139,15 @@ int hgsmi_update_pointer_shape(struct gen_pool *ctx, u32 flags,
+               flags |= VBOX_MOUSE_POINTER_VISIBLE;
+       }
+-      p = hgsmi_buffer_alloc(ctx, sizeof(*p) + pixel_len, HGSMI_CH_VBVA,
++      /*
++       * The 4 extra bytes come from switching struct vbva_mouse_pointer_shape
++       * from having a 4 bytes fixed array at the end to using a proper VLA
++       * at the end. These 4 extra bytes were not subtracted from sizeof(*p)
++       * before the switch to the VLA, so this way the behavior is unchanged.
++       * Chances are these 4 extra bytes are not necessary but they are kept
++       * to avoid regressions.
++       */
++      p = hgsmi_buffer_alloc(ctx, sizeof(*p) + pixel_len + 4, HGSMI_CH_VBVA,
+                              VBVA_MOUSE_POINTER_SHAPE);
+       if (!p)
+               return -ENOMEM;
+diff --git a/drivers/gpu/drm/vboxvideo/vboxvideo.h b/drivers/gpu/drm/vboxvideo/vboxvideo.h
+index f60d82504da02..79ec8481de0e4 100644
+--- a/drivers/gpu/drm/vboxvideo/vboxvideo.h
++++ b/drivers/gpu/drm/vboxvideo/vboxvideo.h
+@@ -351,10 +351,8 @@ struct vbva_mouse_pointer_shape {
+        * Bytes in the gap between the AND and the XOR mask are undefined.
+        * XOR mask scanlines have no gap between them and size of XOR mask is:
+        * xor_len = width * 4 * height.
+-       *
+-       * Preallocate 4 bytes for accessing actual data as p->data.
+        */
+-      u8 data[4];
++      u8 data[];
+ } __packed;
+ /* pointer is visible */
+-- 
+2.43.0
+
diff --git a/queue-6.6/exec-don-t-warn-for-racy-path_noexec-check.patch b/queue-6.6/exec-don-t-warn-for-racy-path_noexec-check.patch
new file mode 100644 (file)
index 0000000..69072e9
--- /dev/null
@@ -0,0 +1,99 @@
+From 0367e756c7f6791575ceef1ce763890054a4b89b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Oct 2024 15:44:58 -0300
+Subject: exec: don't WARN for racy path_noexec check
+
+From: Mateusz Guzik <mjguzik@gmail.com>
+
+[ Upstream commit 0d196e7589cefe207d5d41f37a0a28a1fdeeb7c6 ]
+
+Both i_mode and noexec checks wrapped in WARN_ON stem from an artifact
+of the previous implementation. They used to legitimately check for the
+condition, but that got moved up in two commits:
+633fb6ac3980 ("exec: move S_ISREG() check earlier")
+0fd338b2d2cd ("exec: move path_noexec() check earlier")
+
+Instead of being removed said checks are WARN_ON'ed instead, which
+has some debug value.
+
+However, the spurious path_noexec check is racy, resulting in
+unwarranted warnings should someone race with setting the noexec flag.
+
+One can note there is more to perm-checking whether execve is allowed
+and none of the conditions are guaranteed to still hold after they were
+tested for.
+
+Additionally this does not validate whether the code path did any perm
+checking to begin with -- it will pass if the inode happens to be
+regular.
+
+Keep the redundant path_noexec() check even though it's mindless
+nonsense checking for guarantee that isn't given so drop the WARN.
+
+Reword the commentary and do small tidy ups while here.
+
+Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
+Link: https://lore.kernel.org/r/20240805131721.765484-1-mjguzik@gmail.com
+[brauner: keep redundant path_noexec() check]
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+[cascardo: keep exit label and use it]
+Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/exec.c | 21 +++++++++------------
+ 1 file changed, 9 insertions(+), 12 deletions(-)
+
+diff --git a/fs/exec.c b/fs/exec.c
+index f49b352a60323..7776209d98c10 100644
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -143,13 +143,11 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
+               goto out;
+       /*
+-       * may_open() has already checked for this, so it should be
+-       * impossible to trip now. But we need to be extra cautious
+-       * and check again at the very end too.
++       * Check do_open_execat() for an explanation.
+        */
+       error = -EACCES;
+-      if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
+-                       path_noexec(&file->f_path)))
++      if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
++          path_noexec(&file->f_path))
+               goto exit;
+       error = -ENOEXEC;
+@@ -925,23 +923,22 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
+       file = do_filp_open(fd, name, &open_exec_flags);
+       if (IS_ERR(file))
+-              goto out;
++              return file;
+       /*
+-       * may_open() has already checked for this, so it should be
+-       * impossible to trip now. But we need to be extra cautious
+-       * and check again at the very end too.
++       * In the past the regular type check was here. It moved to may_open() in
++       * 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is
++       * an invariant that all non-regular files error out before we get here.
+        */
+       err = -EACCES;
+-      if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
+-                       path_noexec(&file->f_path)))
++      if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
++          path_noexec(&file->f_path))
+               goto exit;
+       err = deny_write_access(file);
+       if (err)
+               goto exit;
+-out:
+       return file;
+ exit:
+-- 
+2.43.0
+
diff --git a/queue-6.6/platform-x86-dell-sysman-add-support-for-alienware-p.patch b/queue-6.6/platform-x86-dell-sysman-add-support-for-alienware-p.patch
new file mode 100644 (file)
index 0000000..3d10c3d
--- /dev/null
@@ -0,0 +1,35 @@
+From 29f346e6c227c1097cfcf03d15aaf936797c1415 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 4 Oct 2024 23:27:58 +0800
+Subject: platform/x86: dell-sysman: add support for alienware products
+
+From: Crag Wang <crag_wang@dell.com>
+
+[ Upstream commit a561509b4187a8908eb7fbb2d1bf35bbc20ec74b ]
+
+Alienware supports firmware-attributes and has its own OEM string.
+
+Signed-off-by: Crag Wang <crag_wang@dell.com>
+Link: https://lore.kernel.org/r/20241004152826.93992-1-crag_wang@dell.com
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/platform/x86/dell/dell-wmi-sysman/sysman.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c
+index b929b4f824205..af49dd6b31ade 100644
+--- a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c
++++ b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c
+@@ -521,6 +521,7 @@ static int __init sysman_init(void)
+       int ret = 0;
+       if (!dmi_find_device(DMI_DEV_TYPE_OEM_STRING, "Dell System", NULL) &&
++          !dmi_find_device(DMI_DEV_TYPE_OEM_STRING, "Alienware", NULL) &&
+           !dmi_find_device(DMI_DEV_TYPE_OEM_STRING, "www.dell.com", NULL)) {
+               pr_err("Unable to run on non-Dell system\n");
+               return -ENODEV;
+-- 
+2.43.0
+
index 97f0cec414cf7968feb8b0517b4eb02130febd91..77b83c6d2a4101c9b13aed8cae5158445ef1c50b 100644 (file)
@@ -104,3 +104,23 @@ usb-gadget-f_uac2-fix-non-newline-terminated-functio.patch
 usb-gadget-f_uac2-fix-return-value-for-uac2_attribut.patch
 xhci-separate-port-and-caps-macros-into-dedicated-fi.patch
 usb-dwc3-core-fix-system-suspend-on-ti-am62-platform.patch
+tracing-fprobe-event-cleanup-fix-a-wrong-comment-in-.patch
+tracing-probes-cleanup-set-trace_probe-nr_args-at-tr.patch
+tracing-probes-support-argn-in-return-probe-kprobe-a.patch
+uprobes-encapsulate-preparation-of-uprobe-args-buffe.patch
+uprobes-prepare-uprobe-args-buffer-lazily.patch
+uprobes-prevent-mutex_lock-under-rcu_read_lock.patch
+uprobe-avoid-out-of-bounds-memory-access-of-fetching.patch
+exec-don-t-warn-for-racy-path_noexec-check.patch
+drm-vboxvideo-replace-fake-vla-at-end-of-vbva_mouse_.patch
+asoc-amd-yc-add-quirk-for-hp-dragonfly-pro-one.patch
+asoc-codecs-lpass-rx-macro-add-missing-cdc_rx_bcl_vb.patch
+asoc-fsl_sai-enable-fifo-continue-on-error-fcont-bit.patch
+arm64-force-position-independent-veneers.patch
+udf-refactor-udf_current_aext-to-handle-error.patch
+udf-refactor-udf_next_aext-to-handle-error.patch
+udf-refactor-inode_bmap-to-handle-error.patch
+udf-fix-uninit-value-use-in-udf_get_fileshortad.patch
+asoc-qcom-sm8250-add-qrb4210-rb2-sndcard-compatible-.patch
+cifs-validate-content-of-nfs-reparse-point-buffer.patch
+platform-x86-dell-sysman-add-support-for-alienware-p.patch
diff --git a/queue-6.6/tracing-fprobe-event-cleanup-fix-a-wrong-comment-in-.patch b/queue-6.6/tracing-fprobe-event-cleanup-fix-a-wrong-comment-in-.patch
new file mode 100644 (file)
index 0000000..79f1a55
--- /dev/null
@@ -0,0 +1,37 @@
+From 9af5ea91417f0e6a24abaa1b0b299d5c893b3006 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Mar 2024 12:40:16 +0900
+Subject: tracing/fprobe-event: cleanup: Fix a wrong comment in fprobe event
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit 7e37b6bc3cc096e24709908076807bb9c3cf0d38 ]
+
+Despite the fprobe event,  "Kretprobe" was commented. So fix it.
+
+Link: https://lore.kernel.org/all/170952361630.229804.10832200172327797860.stgit@devnote2/
+
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Stable-dep-of: 373b9338c972 ("uprobe: avoid out-of-bounds memory access of fetching args")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace_fprobe.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
+index 7d2ddbcfa377c..3ccef4d822358 100644
+--- a/kernel/trace/trace_fprobe.c
++++ b/kernel/trace/trace_fprobe.c
+@@ -210,7 +210,7 @@ fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
+ }
+ NOKPROBE_SYMBOL(fentry_trace_func);
+-/* Kretprobe handler */
++/* function exit handler */
+ static nokprobe_inline void
+ __fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
+                  unsigned long ret_ip, struct pt_regs *regs,
+-- 
+2.43.0
+
diff --git a/queue-6.6/tracing-probes-cleanup-set-trace_probe-nr_args-at-tr.patch b/queue-6.6/tracing-probes-cleanup-set-trace_probe-nr_args-at-tr.patch
new file mode 100644 (file)
index 0000000..3aee326
--- /dev/null
@@ -0,0 +1,131 @@
+From e9060cac0329d0498a1c761ec3d27ff0efc88ed1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Mar 2024 12:40:36 +0900
+Subject: tracing/probes: cleanup: Set trace_probe::nr_args at trace_probe_init
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit 035ba76014c096316fa809a46ce0a1b9af1cde0d ]
+
+Instead of incrementing the trace_probe::nr_args, init it at
+trace_probe_init(). Without this change, there is no way to get the number
+of trace_probe arguments while parsing it.
+This is a cleanup, so the behavior is not changed.
+
+Link: https://lore.kernel.org/all/170952363585.229804.13060759900346411951.stgit@devnote2/
+
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Stable-dep-of: 373b9338c972 ("uprobe: avoid out-of-bounds memory access of fetching args")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace_eprobe.c |  2 +-
+ kernel/trace/trace_fprobe.c |  2 +-
+ kernel/trace/trace_kprobe.c |  2 +-
+ kernel/trace/trace_probe.c  | 10 ++++++----
+ kernel/trace/trace_probe.h  |  2 +-
+ kernel/trace/trace_uprobe.c |  2 +-
+ 6 files changed, 11 insertions(+), 9 deletions(-)
+
+diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
+index 72714cbf475c7..42b76f02e57a9 100644
+--- a/kernel/trace/trace_eprobe.c
++++ b/kernel/trace/trace_eprobe.c
+@@ -220,7 +220,7 @@ static struct trace_eprobe *alloc_event_probe(const char *group,
+       if (!ep->event_system)
+               goto error;
+-      ret = trace_probe_init(&ep->tp, this_event, group, false);
++      ret = trace_probe_init(&ep->tp, this_event, group, false, nargs);
+       if (ret < 0)
+               goto error;
+diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
+index 3ccef4d822358..5109650b0d82d 100644
+--- a/kernel/trace/trace_fprobe.c
++++ b/kernel/trace/trace_fprobe.c
+@@ -389,7 +389,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group,
+       tf->tpoint = tpoint;
+       tf->fp.nr_maxactive = maxactive;
+-      ret = trace_probe_init(&tf->tp, event, group, false);
++      ret = trace_probe_init(&tf->tp, event, group, false, nargs);
+       if (ret < 0)
+               goto error;
+diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
+index 52f8b537dd0a0..d1a7c876e4198 100644
+--- a/kernel/trace/trace_kprobe.c
++++ b/kernel/trace/trace_kprobe.c
+@@ -290,7 +290,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
+       INIT_HLIST_NODE(&tk->rp.kp.hlist);
+       INIT_LIST_HEAD(&tk->rp.kp.list);
+-      ret = trace_probe_init(&tk->tp, event, group, false);
++      ret = trace_probe_init(&tk->tp, event, group, false, nargs);
+       if (ret < 0)
+               goto error;
+diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
+index ae162ba36a480..5d6c6c105f3cd 100644
+--- a/kernel/trace/trace_probe.c
++++ b/kernel/trace/trace_probe.c
+@@ -1383,9 +1383,6 @@ int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, const char *arg,
+       struct probe_arg *parg = &tp->args[i];
+       const char *body;
+-      /* Increment count for freeing args in error case */
+-      tp->nr_args++;
+-
+       body = strchr(arg, '=');
+       if (body) {
+               if (body - arg > MAX_ARG_NAME_LEN) {
+@@ -1770,7 +1767,7 @@ void trace_probe_cleanup(struct trace_probe *tp)
+ }
+ int trace_probe_init(struct trace_probe *tp, const char *event,
+-                   const char *group, bool alloc_filter)
++                   const char *group, bool alloc_filter, int nargs)
+ {
+       struct trace_event_call *call;
+       size_t size = sizeof(struct trace_probe_event);
+@@ -1806,6 +1803,11 @@ int trace_probe_init(struct trace_probe *tp, const char *event,
+               goto error;
+       }
++      tp->nr_args = nargs;
++      /* Make sure pointers in args[] are NULL */
++      if (nargs)
++              memset(tp->args, 0, sizeof(tp->args[0]) * nargs);
++
+       return 0;
+ error:
+diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
+index c1877d0182691..ed8d1052f8a78 100644
+--- a/kernel/trace/trace_probe.h
++++ b/kernel/trace/trace_probe.h
+@@ -338,7 +338,7 @@ static inline bool trace_probe_has_single_file(struct trace_probe *tp)
+ }
+ int trace_probe_init(struct trace_probe *tp, const char *event,
+-                   const char *group, bool alloc_filter);
++                   const char *group, bool alloc_filter, int nargs);
+ void trace_probe_cleanup(struct trace_probe *tp);
+ int trace_probe_append(struct trace_probe *tp, struct trace_probe *to);
+ void trace_probe_unlink(struct trace_probe *tp);
+diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
+index 99c051de412af..49d9af6d446e9 100644
+--- a/kernel/trace/trace_uprobe.c
++++ b/kernel/trace/trace_uprobe.c
+@@ -337,7 +337,7 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
+       if (!tu)
+               return ERR_PTR(-ENOMEM);
+-      ret = trace_probe_init(&tu->tp, event, group, true);
++      ret = trace_probe_init(&tu->tp, event, group, true, nargs);
+       if (ret < 0)
+               goto error;
+-- 
+2.43.0
+
diff --git a/queue-6.6/tracing-probes-support-argn-in-return-probe-kprobe-a.patch b/queue-6.6/tracing-probes-support-argn-in-return-probe-kprobe-a.patch
new file mode 100644 (file)
index 0000000..49d08a4
--- /dev/null
@@ -0,0 +1,886 @@
+From bd405015ec2e2c9e81811b65fe921dd4f98937e5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Mar 2024 12:40:55 +0900
+Subject: tracing/probes: Support $argN in return probe (kprobe and fprobe)
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit 25f00e40ce7953db197af3a59233711d154c9d80 ]
+
+Support accessing $argN in the return probe events. This will help users to
+record entry data in function return (exit) event for simplfing the function
+entry/exit information in one event, and record the result values (e.g.
+allocated object/initialized object) at function exit.
+
+For example, if we have a function `int init_foo(struct foo *obj, int param)`
+sometimes we want to check how `obj` is initialized. In such case, we can
+define a new return event like below;
+
+ # echo 'r init_foo retval=$retval param=$arg2 field1=+0($arg1)' >> kprobe_events
+
+Thus it records the function parameter `param` and its result `obj->field1`
+(the dereference will be done in the function exit timing) value at once.
+
+This also support fprobe, BTF args and'$arg*'. So if CONFIG_DEBUG_INFO_BTF
+is enabled, we can trace both function parameters and the return value
+by following command.
+
+ # echo 'f target_function%return $arg* $retval' >> dynamic_events
+
+Link: https://lore.kernel.org/all/170952365552.229804.224112990211602895.stgit@devnote2/
+
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Stable-dep-of: 373b9338c972 ("uprobe: avoid out-of-bounds memory access of fetching args")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace.c                          |   1 +
+ kernel/trace/trace_eprobe.c                   |   6 +-
+ kernel/trace/trace_fprobe.c                   |  55 ++++--
+ kernel/trace/trace_kprobe.c                   |  56 +++++-
+ kernel/trace/trace_probe.c                    | 177 +++++++++++++++---
+ kernel/trace/trace_probe.h                    |  28 ++-
+ kernel/trace/trace_probe_tmpl.h               |  10 +-
+ kernel/trace/trace_uprobe.c                   |  12 +-
+ .../test.d/dynevent/fprobe_syntax_errors.tc   |   4 +
+ .../test.d/kprobe/kprobe_syntax_errors.tc     |   2 +
+ 10 files changed, 289 insertions(+), 62 deletions(-)
+
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index 4f93d57cc0299..ecd869ed27670 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -5757,6 +5757,7 @@ static const char readme_msg[] =
+       "\t           $stack<index>, $stack, $retval, $comm,\n"
+ #endif
+       "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
++      "\t     kernel return probes support: $retval, $arg<N>, $comm\n"
+       "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
+       "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
+       "\t           symstr, <type>\\[<array-size>\\]\n"
+diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
+index 42b76f02e57a9..b03bc30f85ee3 100644
+--- a/kernel/trace/trace_eprobe.c
++++ b/kernel/trace/trace_eprobe.c
+@@ -390,8 +390,8 @@ static int get_eprobe_size(struct trace_probe *tp, void *rec)
+ /* Note that we don't verify it, since the code does not come from user space */
+ static int
+-process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
+-                 void *base)
++process_fetch_insn(struct fetch_insn *code, void *rec, void *edata,
++                 void *dest, void *base)
+ {
+       unsigned long val;
+       int ret;
+@@ -438,7 +438,7 @@ __eprobe_trace_func(struct eprobe_data *edata, void *rec)
+               return;
+       entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
+-      store_trace_args(&entry[1], &edata->ep->tp, rec, sizeof(*entry), dsize);
++      store_trace_args(&entry[1], &edata->ep->tp, rec, NULL, sizeof(*entry), dsize);
+       trace_event_buffer_commit(&fbuffer);
+ }
+diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
+index 5109650b0d82d..4f42808155225 100644
+--- a/kernel/trace/trace_fprobe.c
++++ b/kernel/trace/trace_fprobe.c
+@@ -4,6 +4,7 @@
+  * Copyright (C) 2022 Google LLC.
+  */
+ #define pr_fmt(fmt)   "trace_fprobe: " fmt
++#include <asm/ptrace.h>
+ #include <linux/fprobe.h>
+ #include <linux/module.h>
+@@ -129,8 +130,8 @@ static bool trace_fprobe_is_registered(struct trace_fprobe *tf)
+  * from user space.
+  */
+ static int
+-process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
+-                 void *base)
++process_fetch_insn(struct fetch_insn *code, void *rec, void *edata,
++                 void *dest, void *base)
+ {
+       struct pt_regs *regs = rec;
+       unsigned long val;
+@@ -152,6 +153,9 @@ process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
+       case FETCH_OP_ARG:
+               val = regs_get_kernel_argument(regs, code->param);
+               break;
++      case FETCH_OP_EDATA:
++              val = *(unsigned long *)((unsigned long)edata + code->offset);
++              break;
+ #endif
+       case FETCH_NOP_SYMBOL:  /* Ignore a place holder */
+               code++;
+@@ -184,7 +188,7 @@ __fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
+       if (trace_trigger_soft_disabled(trace_file))
+               return;
+-      dsize = __get_data_size(&tf->tp, regs);
++      dsize = __get_data_size(&tf->tp, regs, NULL);
+       entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+                                          sizeof(*entry) + tf->tp.size + dsize);
+@@ -194,7 +198,7 @@ __fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
+       fbuffer.regs = regs;
+       entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
+       entry->ip = entry_ip;
+-      store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
++      store_trace_args(&entry[1], &tf->tp, regs, NULL, sizeof(*entry), dsize);
+       trace_event_buffer_commit(&fbuffer);
+ }
+@@ -211,10 +215,23 @@ fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
+ NOKPROBE_SYMBOL(fentry_trace_func);
+ /* function exit handler */
++static int trace_fprobe_entry_handler(struct fprobe *fp, unsigned long entry_ip,
++                              unsigned long ret_ip, struct pt_regs *regs,
++                              void *entry_data)
++{
++      struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
++
++      if (tf->tp.entry_arg)
++              store_trace_entry_data(entry_data, &tf->tp, regs);
++
++      return 0;
++}
++NOKPROBE_SYMBOL(trace_fprobe_entry_handler)
++
+ static nokprobe_inline void
+ __fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
+                  unsigned long ret_ip, struct pt_regs *regs,
+-                 struct trace_event_file *trace_file)
++                 void *entry_data, struct trace_event_file *trace_file)
+ {
+       struct fexit_trace_entry_head *entry;
+       struct trace_event_buffer fbuffer;
+@@ -227,7 +244,7 @@ __fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
+       if (trace_trigger_soft_disabled(trace_file))
+               return;
+-      dsize = __get_data_size(&tf->tp, regs);
++      dsize = __get_data_size(&tf->tp, regs, entry_data);
+       entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+                                          sizeof(*entry) + tf->tp.size + dsize);
+@@ -238,19 +255,19 @@ __fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
+       entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
+       entry->func = entry_ip;
+       entry->ret_ip = ret_ip;
+-      store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
++      store_trace_args(&entry[1], &tf->tp, regs, entry_data, sizeof(*entry), dsize);
+       trace_event_buffer_commit(&fbuffer);
+ }
+ static void
+ fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
+-               unsigned long ret_ip, struct pt_regs *regs)
++               unsigned long ret_ip, struct pt_regs *regs, void *entry_data)
+ {
+       struct event_file_link *link;
+       trace_probe_for_each_link_rcu(link, &tf->tp)
+-              __fexit_trace_func(tf, entry_ip, ret_ip, regs, link->file);
++              __fexit_trace_func(tf, entry_ip, ret_ip, regs, entry_data, link->file);
+ }
+ NOKPROBE_SYMBOL(fexit_trace_func);
+@@ -269,7 +286,7 @@ static int fentry_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
+       if (hlist_empty(head))
+               return 0;
+-      dsize = __get_data_size(&tf->tp, regs);
++      dsize = __get_data_size(&tf->tp, regs, NULL);
+       __size = sizeof(*entry) + tf->tp.size + dsize;
+       size = ALIGN(__size + sizeof(u32), sizeof(u64));
+       size -= sizeof(u32);
+@@ -280,7 +297,7 @@ static int fentry_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
+       entry->ip = entry_ip;
+       memset(&entry[1], 0, dsize);
+-      store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
++      store_trace_args(&entry[1], &tf->tp, regs, NULL, sizeof(*entry), dsize);
+       perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
+                             head, NULL);
+       return 0;
+@@ -289,7 +306,8 @@ NOKPROBE_SYMBOL(fentry_perf_func);
+ static void
+ fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
+-              unsigned long ret_ip, struct pt_regs *regs)
++              unsigned long ret_ip, struct pt_regs *regs,
++              void *entry_data)
+ {
+       struct trace_event_call *call = trace_probe_event_call(&tf->tp);
+       struct fexit_trace_entry_head *entry;
+@@ -301,7 +319,7 @@ fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
+       if (hlist_empty(head))
+               return;
+-      dsize = __get_data_size(&tf->tp, regs);
++      dsize = __get_data_size(&tf->tp, regs, entry_data);
+       __size = sizeof(*entry) + tf->tp.size + dsize;
+       size = ALIGN(__size + sizeof(u32), sizeof(u64));
+       size -= sizeof(u32);
+@@ -312,7 +330,7 @@ fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
+       entry->func = entry_ip;
+       entry->ret_ip = ret_ip;
+-      store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
++      store_trace_args(&entry[1], &tf->tp, regs, entry_data, sizeof(*entry), dsize);
+       perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
+                             head, NULL);
+ }
+@@ -343,10 +361,10 @@ static void fexit_dispatcher(struct fprobe *fp, unsigned long entry_ip,
+       struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
+       if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
+-              fexit_trace_func(tf, entry_ip, ret_ip, regs);
++              fexit_trace_func(tf, entry_ip, ret_ip, regs, entry_data);
+ #ifdef CONFIG_PERF_EVENTS
+       if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
+-              fexit_perf_func(tf, entry_ip, ret_ip, regs);
++              fexit_perf_func(tf, entry_ip, ret_ip, regs, entry_data);
+ #endif
+ }
+ NOKPROBE_SYMBOL(fexit_dispatcher);
+@@ -1109,6 +1127,11 @@ static int __trace_fprobe_create(int argc, const char *argv[])
+                       goto error;     /* This can be -ENOMEM */
+       }
++      if (is_return && tf->tp.entry_arg) {
++              tf->fp.entry_handler = trace_fprobe_entry_handler;
++              tf->fp.entry_data_size = traceprobe_get_entry_data_size(&tf->tp);
++      }
++
+       ret = traceprobe_set_print_fmt(&tf->tp,
+                       is_return ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL);
+       if (ret < 0)
+diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
+index d1a7c876e4198..32c617123f374 100644
+--- a/kernel/trace/trace_kprobe.c
++++ b/kernel/trace/trace_kprobe.c
+@@ -740,6 +740,9 @@ static unsigned int number_of_same_symbols(char *func_name)
+       return ctx.count;
+ }
++static int trace_kprobe_entry_handler(struct kretprobe_instance *ri,
++                                    struct pt_regs *regs);
++
+ static int __trace_kprobe_create(int argc, const char *argv[])
+ {
+       /*
+@@ -948,6 +951,11 @@ static int __trace_kprobe_create(int argc, const char *argv[])
+               if (ret)
+                       goto error;     /* This can be -ENOMEM */
+       }
++      /* entry handler for kretprobe */
++      if (is_return && tk->tp.entry_arg) {
++              tk->rp.entry_handler = trace_kprobe_entry_handler;
++              tk->rp.data_size = traceprobe_get_entry_data_size(&tk->tp);
++      }
+       ptype = is_return ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL;
+       ret = traceprobe_set_print_fmt(&tk->tp, ptype);
+@@ -1303,8 +1311,8 @@ static const struct file_operations kprobe_profile_ops = {
+ /* Note that we don't verify it, since the code does not come from user space */
+ static int
+-process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
+-                 void *base)
++process_fetch_insn(struct fetch_insn *code, void *rec, void *edata,
++                 void *dest, void *base)
+ {
+       struct pt_regs *regs = rec;
+       unsigned long val;
+@@ -1329,6 +1337,9 @@ process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
+       case FETCH_OP_ARG:
+               val = regs_get_kernel_argument(regs, code->param);
+               break;
++      case FETCH_OP_EDATA:
++              val = *(unsigned long *)((unsigned long)edata + code->offset);
++              break;
+ #endif
+       case FETCH_NOP_SYMBOL:  /* Ignore a place holder */
+               code++;
+@@ -1359,7 +1370,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
+       if (trace_trigger_soft_disabled(trace_file))
+               return;
+-      dsize = __get_data_size(&tk->tp, regs);
++      dsize = __get_data_size(&tk->tp, regs, NULL);
+       entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+                                          sizeof(*entry) + tk->tp.size + dsize);
+@@ -1368,7 +1379,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
+       fbuffer.regs = regs;
+       entry->ip = (unsigned long)tk->rp.kp.addr;
+-      store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
++      store_trace_args(&entry[1], &tk->tp, regs, NULL, sizeof(*entry), dsize);
+       trace_event_buffer_commit(&fbuffer);
+ }
+@@ -1384,6 +1395,31 @@ kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
+ NOKPROBE_SYMBOL(kprobe_trace_func);
+ /* Kretprobe handler */
++
++static int trace_kprobe_entry_handler(struct kretprobe_instance *ri,
++                                    struct pt_regs *regs)
++{
++      struct kretprobe *rp = get_kretprobe(ri);
++      struct trace_kprobe *tk;
++
++      /*
++       * There is a small chance that get_kretprobe(ri) returns NULL when
++       * the kretprobe is unregister on another CPU between kretprobe's
++       * trampoline_handler and this function.
++       */
++      if (unlikely(!rp))
++              return -ENOENT;
++
++      tk = container_of(rp, struct trace_kprobe, rp);
++
++      /* store argument values into ri->data as entry data */
++      if (tk->tp.entry_arg)
++              store_trace_entry_data(ri->data, &tk->tp, regs);
++
++      return 0;
++}
++
++
+ static nokprobe_inline void
+ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
+                      struct pt_regs *regs,
+@@ -1399,7 +1435,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
+       if (trace_trigger_soft_disabled(trace_file))
+               return;
+-      dsize = __get_data_size(&tk->tp, regs);
++      dsize = __get_data_size(&tk->tp, regs, ri->data);
+       entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+                                          sizeof(*entry) + tk->tp.size + dsize);
+@@ -1409,7 +1445,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
+       fbuffer.regs = regs;
+       entry->func = (unsigned long)tk->rp.kp.addr;
+       entry->ret_ip = get_kretprobe_retaddr(ri);
+-      store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
++      store_trace_args(&entry[1], &tk->tp, regs, ri->data, sizeof(*entry), dsize);
+       trace_event_buffer_commit(&fbuffer);
+ }
+@@ -1557,7 +1593,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
+       if (hlist_empty(head))
+               return 0;
+-      dsize = __get_data_size(&tk->tp, regs);
++      dsize = __get_data_size(&tk->tp, regs, NULL);
+       __size = sizeof(*entry) + tk->tp.size + dsize;
+       size = ALIGN(__size + sizeof(u32), sizeof(u64));
+       size -= sizeof(u32);
+@@ -1568,7 +1604,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
+       entry->ip = (unsigned long)tk->rp.kp.addr;
+       memset(&entry[1], 0, dsize);
+-      store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
++      store_trace_args(&entry[1], &tk->tp, regs, NULL, sizeof(*entry), dsize);
+       perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
+                             head, NULL);
+       return 0;
+@@ -1593,7 +1629,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
+       if (hlist_empty(head))
+               return;
+-      dsize = __get_data_size(&tk->tp, regs);
++      dsize = __get_data_size(&tk->tp, regs, ri->data);
+       __size = sizeof(*entry) + tk->tp.size + dsize;
+       size = ALIGN(__size + sizeof(u32), sizeof(u64));
+       size -= sizeof(u32);
+@@ -1604,7 +1640,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
+       entry->func = (unsigned long)tk->rp.kp.addr;
+       entry->ret_ip = get_kretprobe_retaddr(ri);
+-      store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
++      store_trace_args(&entry[1], &tk->tp, regs, ri->data, sizeof(*entry), dsize);
+       perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
+                             head, NULL);
+ }
+diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
+index 5d6c6c105f3cd..58a6275c7f496 100644
+--- a/kernel/trace/trace_probe.c
++++ b/kernel/trace/trace_probe.c
+@@ -598,6 +598,8 @@ static int parse_btf_field(char *fieldname, const struct btf_type *type,
+       return 0;
+ }
++static int __store_entry_arg(struct trace_probe *tp, int argnum);
++
+ static int parse_btf_arg(char *varname,
+                        struct fetch_insn **pcode, struct fetch_insn *end,
+                        struct traceprobe_parse_context *ctx)
+@@ -622,11 +624,7 @@ static int parse_btf_arg(char *varname,
+               return -EOPNOTSUPP;
+       }
+-      if (ctx->flags & TPARG_FL_RETURN) {
+-              if (strcmp(varname, "$retval") != 0) {
+-                      trace_probe_log_err(ctx->offset, NO_BTFARG);
+-                      return -ENOENT;
+-              }
++      if (ctx->flags & TPARG_FL_RETURN && !strcmp(varname, "$retval")) {
+               code->op = FETCH_OP_RETVAL;
+               /* Check whether the function return type is not void */
+               if (query_btf_context(ctx) == 0) {
+@@ -658,11 +656,21 @@ static int parse_btf_arg(char *varname,
+               const char *name = btf_name_by_offset(ctx->btf, params[i].name_off);
+               if (name && !strcmp(name, varname)) {
+-                      code->op = FETCH_OP_ARG;
+-                      if (ctx->flags & TPARG_FL_TPOINT)
+-                              code->param = i + 1;
+-                      else
+-                              code->param = i;
++                      if (tparg_is_function_entry(ctx->flags)) {
++                              code->op = FETCH_OP_ARG;
++                              if (ctx->flags & TPARG_FL_TPOINT)
++                                      code->param = i + 1;
++                              else
++                                      code->param = i;
++                      } else if (tparg_is_function_return(ctx->flags)) {
++                              code->op = FETCH_OP_EDATA;
++                              ret = __store_entry_arg(ctx->tp, i);
++                              if (ret < 0) {
++                                      /* internal error */
++                                      return ret;
++                              }
++                              code->offset = ret;
++                      }
+                       tid = params[i].type;
+                       goto found;
+               }
+@@ -759,6 +767,110 @@ static int check_prepare_btf_string_fetch(char *typename,
+ #endif
++#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
++
++static int __store_entry_arg(struct trace_probe *tp, int argnum)
++{
++      struct probe_entry_arg *earg = tp->entry_arg;
++      bool match = false;
++      int i, offset;
++
++      if (!earg) {
++              earg = kzalloc(sizeof(*tp->entry_arg), GFP_KERNEL);
++              if (!earg)
++                      return -ENOMEM;
++              earg->size = 2 * tp->nr_args + 1;
++              earg->code = kcalloc(earg->size, sizeof(struct fetch_insn),
++                                   GFP_KERNEL);
++              if (!earg->code) {
++                      kfree(earg);
++                      return -ENOMEM;
++              }
++              /* Fill the code buffer with 'end' to simplify it */
++              for (i = 0; i < earg->size; i++)
++                      earg->code[i].op = FETCH_OP_END;
++              tp->entry_arg = earg;
++      }
++
++      offset = 0;
++      for (i = 0; i < earg->size - 1; i++) {
++              switch (earg->code[i].op) {
++              case FETCH_OP_END:
++                      earg->code[i].op = FETCH_OP_ARG;
++                      earg->code[i].param = argnum;
++                      earg->code[i + 1].op = FETCH_OP_ST_EDATA;
++                      earg->code[i + 1].offset = offset;
++                      return offset;
++              case FETCH_OP_ARG:
++                      match = (earg->code[i].param == argnum);
++                      break;
++              case FETCH_OP_ST_EDATA:
++                      offset = earg->code[i].offset;
++                      if (match)
++                              return offset;
++                      offset += sizeof(unsigned long);
++                      break;
++              default:
++                      break;
++              }
++      }
++      return -ENOSPC;
++}
++
++int traceprobe_get_entry_data_size(struct trace_probe *tp)
++{
++      struct probe_entry_arg *earg = tp->entry_arg;
++      int i, size = 0;
++
++      if (!earg)
++              return 0;
++
++      for (i = 0; i < earg->size; i++) {
++              switch (earg->code[i].op) {
++              case FETCH_OP_END:
++                      goto out;
++              case FETCH_OP_ST_EDATA:
++                      size = earg->code[i].offset + sizeof(unsigned long);
++                      break;
++              default:
++                      break;
++              }
++      }
++out:
++      return size;
++}
++
++void store_trace_entry_data(void *edata, struct trace_probe *tp, struct pt_regs *regs)
++{
++      struct probe_entry_arg *earg = tp->entry_arg;
++      unsigned long val;
++      int i;
++
++      if (!earg)
++              return;
++
++      for (i = 0; i < earg->size; i++) {
++              struct fetch_insn *code = &earg->code[i];
++
++              switch (code->op) {
++              case FETCH_OP_ARG:
++                      val = regs_get_kernel_argument(regs, code->param);
++                      break;
++              case FETCH_OP_ST_EDATA:
++                      *(unsigned long *)((unsigned long)edata + code->offset) = val;
++                      break;
++              case FETCH_OP_END:
++                      goto end;
++              default:
++                      break;
++              }
++      }
++end:
++      return;
++}
++NOKPROBE_SYMBOL(store_trace_entry_data)
++#endif
++
+ #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
+ /* Parse $vars. @orig_arg points '$', which syncs to @ctx->offset */
+@@ -834,7 +946,7 @@ static int parse_probe_vars(char *orig_arg, const struct fetch_type *t,
+ #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+       len = str_has_prefix(arg, "arg");
+-      if (len && tparg_is_function_entry(ctx->flags)) {
++      if (len) {
+               ret = kstrtoul(arg + len, 10, &param);
+               if (ret)
+                       goto inval;
+@@ -843,15 +955,29 @@ static int parse_probe_vars(char *orig_arg, const struct fetch_type *t,
+                       err = TP_ERR_BAD_ARG_NUM;
+                       goto inval;
+               }
++              param--; /* argN starts from 1, but internal arg[N] starts from 0 */
+-              code->op = FETCH_OP_ARG;
+-              code->param = (unsigned int)param - 1;
+-              /*
+-               * The tracepoint probe will probe a stub function, and the
+-               * first parameter of the stub is a dummy and should be ignored.
+-               */
+-              if (ctx->flags & TPARG_FL_TPOINT)
+-                      code->param++;
++              if (tparg_is_function_entry(ctx->flags)) {
++                      code->op = FETCH_OP_ARG;
++                      code->param = (unsigned int)param;
++                      /*
++                       * The tracepoint probe will probe a stub function, and the
++                       * first parameter of the stub is a dummy and should be ignored.
++                       */
++                      if (ctx->flags & TPARG_FL_TPOINT)
++                              code->param++;
++              } else if (tparg_is_function_return(ctx->flags)) {
++                      /* function entry argument access from return probe */
++                      ret = __store_entry_arg(ctx->tp, param);
++                      if (ret < 0)    /* This error should be an internal error */
++                              return ret;
++
++                      code->op = FETCH_OP_EDATA;
++                      code->offset = ret;
++              } else {
++                      err = TP_ERR_NOFENTRY_ARGS;
++                      goto inval;
++              }
+               return 0;
+       }
+ #endif
+@@ -1041,7 +1167,8 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
+               break;
+       default:
+               if (isalpha(arg[0]) || arg[0] == '_') { /* BTF variable */
+-                      if (!tparg_is_function_entry(ctx->flags)) {
++                      if (!tparg_is_function_entry(ctx->flags) &&
++                          !tparg_is_function_return(ctx->flags)) {
+                               trace_probe_log_err(ctx->offset, NOSUP_BTFARG);
+                               return -EINVAL;
+                       }
+@@ -1383,6 +1510,7 @@ int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, const char *arg,
+       struct probe_arg *parg = &tp->args[i];
+       const char *body;
++      ctx->tp = tp;
+       body = strchr(arg, '=');
+       if (body) {
+               if (body - arg > MAX_ARG_NAME_LEN) {
+@@ -1439,7 +1567,8 @@ static int argv_has_var_arg(int argc, const char *argv[], int *args_idx,
+               if (str_has_prefix(argv[i], "$arg")) {
+                       trace_probe_log_set_index(i + 2);
+-                      if (!tparg_is_function_entry(ctx->flags)) {
++                      if (!tparg_is_function_entry(ctx->flags) &&
++                          !tparg_is_function_return(ctx->flags)) {
+                               trace_probe_log_err(0, NOFENTRY_ARGS);
+                               return -EINVAL;
+                       }
+@@ -1762,6 +1891,12 @@ void trace_probe_cleanup(struct trace_probe *tp)
+       for (i = 0; i < tp->nr_args; i++)
+               traceprobe_free_probe_arg(&tp->args[i]);
++      if (tp->entry_arg) {
++              kfree(tp->entry_arg->code);
++              kfree(tp->entry_arg);
++              tp->entry_arg = NULL;
++      }
++
+       if (tp->event)
+               trace_probe_unlink(tp);
+ }
+diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
+index ed8d1052f8a78..cef3a50628a3e 100644
+--- a/kernel/trace/trace_probe.h
++++ b/kernel/trace/trace_probe.h
+@@ -92,6 +92,7 @@ enum fetch_op {
+       FETCH_OP_ARG,           /* Function argument : .param */
+       FETCH_OP_FOFFS,         /* File offset: .immediate */
+       FETCH_OP_DATA,          /* Allocated data: .data */
++      FETCH_OP_EDATA,         /* Entry data: .offset */
+       // Stage 2 (dereference) op
+       FETCH_OP_DEREF,         /* Dereference: .offset */
+       FETCH_OP_UDEREF,        /* User-space Dereference: .offset */
+@@ -102,6 +103,7 @@ enum fetch_op {
+       FETCH_OP_ST_STRING,     /* String: .offset, .size */
+       FETCH_OP_ST_USTRING,    /* User String: .offset, .size */
+       FETCH_OP_ST_SYMSTR,     /* Kernel Symbol String: .offset, .size */
++      FETCH_OP_ST_EDATA,      /* Store Entry Data: .offset */
+       // Stage 4 (modify) op
+       FETCH_OP_MOD_BF,        /* Bitfield: .basesize, .lshift, .rshift */
+       // Stage 5 (loop) op
+@@ -232,6 +234,11 @@ struct probe_arg {
+       const struct fetch_type *type;  /* Type of this argument */
+ };
++struct probe_entry_arg {
++      struct fetch_insn       *code;
++      unsigned int            size;   /* The entry data size */
++};
++
+ struct trace_uprobe_filter {
+       rwlock_t                rwlock;
+       int                     nr_systemwide;
+@@ -253,6 +260,7 @@ struct trace_probe {
+       struct trace_probe_event        *event;
+       ssize_t                         size;   /* trace entry size */
+       unsigned int                    nr_args;
++      struct probe_entry_arg          *entry_arg;     /* This is only for return probe */
+       struct probe_arg                args[];
+ };
+@@ -355,6 +363,18 @@ int trace_probe_create(const char *raw_command, int (*createfn)(int, const char
+ int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_args,
+                u8 *data, void *field);
++#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
++int traceprobe_get_entry_data_size(struct trace_probe *tp);
++/* This is a runtime function to store entry data */
++void store_trace_entry_data(void *edata, struct trace_probe *tp, struct pt_regs *regs);
++#else /* !CONFIG_HAVE_FUNCTION_ARG_ACCESS_API */
++static inline int traceprobe_get_entry_data_size(struct trace_probe *tp)
++{
++      return 0;
++}
++#define store_trace_entry_data(edata, tp, regs) do { } while (0)
++#endif
++
+ #define trace_probe_for_each_link(pos, tp)    \
+       list_for_each_entry(pos, &(tp)->event->files, list)
+ #define trace_probe_for_each_link_rcu(pos, tp)        \
+@@ -381,6 +401,11 @@ static inline bool tparg_is_function_entry(unsigned int flags)
+       return (flags & TPARG_FL_LOC_MASK) == (TPARG_FL_KERNEL | TPARG_FL_FENTRY);
+ }
++static inline bool tparg_is_function_return(unsigned int flags)
++{
++      return (flags & TPARG_FL_LOC_MASK) == (TPARG_FL_KERNEL | TPARG_FL_RETURN);
++}
++
+ struct traceprobe_parse_context {
+       struct trace_event_call *event;
+       /* BTF related parameters */
+@@ -392,6 +417,7 @@ struct traceprobe_parse_context {
+       const struct btf_type *last_type;       /* Saved type */
+       u32 last_bitoffs;               /* Saved bitoffs */
+       u32 last_bitsize;               /* Saved bitsize */
++      struct trace_probe *tp;
+       unsigned int flags;
+       int offset;
+ };
+@@ -506,7 +532,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
+       C(NO_BTFARG,            "This variable is not found at this probe point"),\
+       C(NO_BTF_ENTRY,         "No BTF entry for this probe point"),   \
+       C(BAD_VAR_ARGS,         "$arg* must be an independent parameter without name etc."),\
+-      C(NOFENTRY_ARGS,        "$arg* can be used only on function entry"),    \
++      C(NOFENTRY_ARGS,        "$arg* can be used only on function entry or exit"),    \
+       C(DOUBLE_ARGS,          "$arg* can be used only once in the parameters"),       \
+       C(ARGS_2LONG,           "$arg* failed because the argument list is too long"),  \
+       C(ARGIDX_2BIG,          "$argN index is too big"),              \
+diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h
+index 3935b347f874b..2caf0d2afb322 100644
+--- a/kernel/trace/trace_probe_tmpl.h
++++ b/kernel/trace/trace_probe_tmpl.h
+@@ -54,7 +54,7 @@ fetch_apply_bitfield(struct fetch_insn *code, void *buf)
+  * If dest is NULL, don't store result and return required dynamic data size.
+  */
+ static int
+-process_fetch_insn(struct fetch_insn *code, void *rec,
++process_fetch_insn(struct fetch_insn *code, void *rec, void *edata,
+                  void *dest, void *base);
+ static nokprobe_inline int fetch_store_strlen(unsigned long addr);
+ static nokprobe_inline int
+@@ -232,7 +232,7 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val,
+ /* Sum up total data length for dynamic arrays (strings) */
+ static nokprobe_inline int
+-__get_data_size(struct trace_probe *tp, struct pt_regs *regs)
++__get_data_size(struct trace_probe *tp, struct pt_regs *regs, void *edata)
+ {
+       struct probe_arg *arg;
+       int i, len, ret = 0;
+@@ -240,7 +240,7 @@ __get_data_size(struct trace_probe *tp, struct pt_regs *regs)
+       for (i = 0; i < tp->nr_args; i++) {
+               arg = tp->args + i;
+               if (unlikely(arg->dynamic)) {
+-                      len = process_fetch_insn(arg->code, regs, NULL, NULL);
++                      len = process_fetch_insn(arg->code, regs, edata, NULL, NULL);
+                       if (len > 0)
+                               ret += len;
+               }
+@@ -251,7 +251,7 @@ __get_data_size(struct trace_probe *tp, struct pt_regs *regs)
+ /* Store the value of each argument */
+ static nokprobe_inline void
+-store_trace_args(void *data, struct trace_probe *tp, void *rec,
++store_trace_args(void *data, struct trace_probe *tp, void *rec, void *edata,
+                int header_size, int maxlen)
+ {
+       struct probe_arg *arg;
+@@ -266,7 +266,7 @@ store_trace_args(void *data, struct trace_probe *tp, void *rec,
+               /* Point the dynamic data area if needed */
+               if (unlikely(arg->dynamic))
+                       *dl = make_data_loc(maxlen, dyndata - base);
+-              ret = process_fetch_insn(arg->code, rec, dl, base);
++              ret = process_fetch_insn(arg->code, rec, edata, dl, base);
+               if (arg->dynamic && likely(ret > 0)) {
+                       dyndata += ret;
+                       maxlen -= ret;
+diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
+index 49d9af6d446e9..78d76d74f45bc 100644
+--- a/kernel/trace/trace_uprobe.c
++++ b/kernel/trace/trace_uprobe.c
+@@ -211,8 +211,8 @@ static unsigned long translate_user_vaddr(unsigned long file_offset)
+ /* Note that we don't verify it, since the code does not come from user space */
+ static int
+-process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
+-                 void *base)
++process_fetch_insn(struct fetch_insn *code, void *rec, void *edata,
++                 void *dest, void *base)
+ {
+       struct pt_regs *regs = rec;
+       unsigned long val;
+@@ -1490,11 +1490,11 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
+       if (WARN_ON_ONCE(!uprobe_cpu_buffer))
+               return 0;
+-      dsize = __get_data_size(&tu->tp, regs);
++      dsize = __get_data_size(&tu->tp, regs, NULL);
+       esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
+       ucb = uprobe_buffer_get();
+-      store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize);
++      store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize);
+       if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE))
+               ret |= uprobe_trace_func(tu, regs, ucb, dsize);
+@@ -1525,11 +1525,11 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con,
+       if (WARN_ON_ONCE(!uprobe_cpu_buffer))
+               return 0;
+-      dsize = __get_data_size(&tu->tp, regs);
++      dsize = __get_data_size(&tu->tp, regs, NULL);
+       esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
+       ucb = uprobe_buffer_get();
+-      store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize);
++      store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize);
+       if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE))
+               uretprobe_trace_func(tu, func, regs, ucb, dsize);
+diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
+index 20e42c030095b..61877d1664511 100644
+--- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
++++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
+@@ -34,7 +34,9 @@ check_error 'f vfs_read ^$stack10000'        # BAD_STACK_NUM
+ check_error 'f vfs_read ^$arg10000'   # BAD_ARG_NUM
++if !grep -q 'kernel return probes support:' README; then
+ check_error 'f vfs_read $retval ^$arg1' # BAD_VAR
++fi
+ check_error 'f vfs_read ^$none_var'   # BAD_VAR
+ check_error 'f vfs_read ^'$REG                # BAD_VAR
+@@ -99,7 +101,9 @@ if grep -q "<argname>" README; then
+ check_error 'f vfs_read args=^$arg*'          # BAD_VAR_ARGS
+ check_error 'f vfs_read +0(^$arg*)'           # BAD_VAR_ARGS
+ check_error 'f vfs_read $arg* ^$arg*'         # DOUBLE_ARGS
++if !grep -q 'kernel return probes support:' README; then
+ check_error 'f vfs_read%return ^$arg*'                # NOFENTRY_ARGS
++fi
+ check_error 'f vfs_read ^hoge'                        # NO_BTFARG
+ check_error 'f kfree ^$arg10'                 # NO_BTFARG (exceed the number of parameters)
+ check_error 'f kfree%return ^$retval'         # NO_RETVAL
+diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+index 65fbb26fd58c1..a16c6a6f6055c 100644
+--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
++++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+@@ -108,7 +108,9 @@ if grep -q "<argname>" README; then
+ check_error 'p vfs_read args=^$arg*'          # BAD_VAR_ARGS
+ check_error 'p vfs_read +0(^$arg*)'           # BAD_VAR_ARGS
+ check_error 'p vfs_read $arg* ^$arg*'         # DOUBLE_ARGS
++if !grep -q 'kernel return probes support:' README; then
+ check_error 'r vfs_read ^$arg*'                       # NOFENTRY_ARGS
++fi
+ check_error 'p vfs_read+8 ^$arg*'             # NOFENTRY_ARGS
+ check_error 'p vfs_read ^hoge'                        # NO_BTFARG
+ check_error 'p kfree ^$arg10'                 # NO_BTFARG (exceed the number of parameters)
+-- 
+2.43.0
+
diff --git a/queue-6.6/udf-fix-uninit-value-use-in-udf_get_fileshortad.patch b/queue-6.6/udf-fix-uninit-value-use-in-udf_get_fileshortad.patch
new file mode 100644 (file)
index 0000000..9454dee
--- /dev/null
@@ -0,0 +1,54 @@
+From f08091f812aa1ced381e509bdf651ed8f16649d3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 Sep 2024 09:46:15 +0200
+Subject: udf: fix uninit-value use in udf_get_fileshortad
+
+From: Gianfranco Trad <gianf.trad@gmail.com>
+
+[ Upstream commit 264db9d666ad9a35075cc9ed9ec09d021580fbb1 ]
+
+Check for overflow when computing alen in udf_current_aext to mitigate
+later uninit-value use in udf_get_fileshortad KMSAN bug[1].
+After applying the patch reproducer did not trigger any issue[2].
+
+[1] https://syzkaller.appspot.com/bug?extid=8901c4560b7ab5c2f9df
+[2] https://syzkaller.appspot.com/x/log.txt?x=10242227980000
+
+Reported-by: syzbot+8901c4560b7ab5c2f9df@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=8901c4560b7ab5c2f9df
+Tested-by: syzbot+8901c4560b7ab5c2f9df@syzkaller.appspotmail.com
+Suggested-by: Jan Kara <jack@suse.com>
+Signed-off-by: Gianfranco Trad <gianf.trad@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://patch.msgid.link/20240925074613.8475-3-gianf.trad@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/udf/inode.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/fs/udf/inode.c b/fs/udf/inode.c
+index 37fa27136fafb..e98c198f85b96 100644
+--- a/fs/udf/inode.c
++++ b/fs/udf/inode.c
+@@ -2255,12 +2255,15 @@ int udf_current_aext(struct inode *inode, struct extent_position *epos,
+               alen = udf_file_entry_alloc_offset(inode) +
+                                                       iinfo->i_lenAlloc;
+       } else {
++              struct allocExtDesc *header =
++                      (struct allocExtDesc *)epos->bh->b_data;
++
+               if (!epos->offset)
+                       epos->offset = sizeof(struct allocExtDesc);
+               ptr = epos->bh->b_data + epos->offset;
+-              alen = sizeof(struct allocExtDesc) +
+-                      le32_to_cpu(((struct allocExtDesc *)epos->bh->b_data)->
+-                                                      lengthAllocDescs);
++              if (check_add_overflow(sizeof(struct allocExtDesc),
++                              le32_to_cpu(header->lengthAllocDescs), &alen))
++                      return -1;
+       }
+       switch (iinfo->i_alloc_type) {
+-- 
+2.43.0
+
diff --git a/queue-6.6/udf-refactor-inode_bmap-to-handle-error.patch b/queue-6.6/udf-refactor-inode_bmap-to-handle-error.patch
new file mode 100644 (file)
index 0000000..deb7fcc
--- /dev/null
@@ -0,0 +1,240 @@
+From 55d029512c75c7ed51a9874045257bea8731a67e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Oct 2024 19:54:25 +0800
+Subject: udf: refactor inode_bmap() to handle error
+
+From: Zhao Mengmeng <zhaomengmeng@kylinos.cn>
+
+[ Upstream commit c226964ec786f3797ed389a16392ce4357697d24 ]
+
+Refactor inode_bmap() to handle error since udf_next_aext() can return
+error now. On situations like ftruncate, udf_extend_file() can now
+detect errors and bail out early without resorting to checking for
+particular offsets and assuming internal behavior of these functions.
+
+Reported-by: syzbot+7a4842f0b1801230a989@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=7a4842f0b1801230a989
+Tested-by: syzbot+7a4842f0b1801230a989@syzkaller.appspotmail.com
+Signed-off-by: Zhao Mengmeng <zhaomengmeng@kylinos.cn>
+Suggested-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://patch.msgid.link/20241001115425.266556-4-zhaomzhao@126.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/udf/directory.c | 13 ++++++++-----
+ fs/udf/inode.c     | 40 +++++++++++++++++++++++++---------------
+ fs/udf/partition.c |  6 ++++--
+ fs/udf/truncate.c  |  6 ++++--
+ fs/udf/udfdecl.h   |  5 +++--
+ 5 files changed, 44 insertions(+), 26 deletions(-)
+
+diff --git a/fs/udf/directory.c b/fs/udf/directory.c
+index c6950050e7aeb..632453aa38934 100644
+--- a/fs/udf/directory.c
++++ b/fs/udf/directory.c
+@@ -246,6 +246,7 @@ int udf_fiiter_init(struct udf_fileident_iter *iter, struct inode *dir,
+ {
+       struct udf_inode_info *iinfo = UDF_I(dir);
+       int err = 0;
++      int8_t etype;
+       iter->dir = dir;
+       iter->bh[0] = iter->bh[1] = NULL;
+@@ -265,9 +266,9 @@ int udf_fiiter_init(struct udf_fileident_iter *iter, struct inode *dir,
+               goto out;
+       }
+-      if (inode_bmap(dir, iter->pos >> dir->i_blkbits, &iter->epos,
+-                     &iter->eloc, &iter->elen, &iter->loffset) !=
+-          (EXT_RECORDED_ALLOCATED >> 30)) {
++      err = inode_bmap(dir, iter->pos >> dir->i_blkbits, &iter->epos,
++                       &iter->eloc, &iter->elen, &iter->loffset, &etype);
++      if (err <= 0 || etype != (EXT_RECORDED_ALLOCATED >> 30)) {
+               if (pos == dir->i_size)
+                       return 0;
+               udf_err(dir->i_sb,
+@@ -463,6 +464,7 @@ int udf_fiiter_append_blk(struct udf_fileident_iter *iter)
+       sector_t block;
+       uint32_t old_elen = iter->elen;
+       int err;
++      int8_t etype;
+       if (WARN_ON_ONCE(iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB))
+               return -EINVAL;
+@@ -477,8 +479,9 @@ int udf_fiiter_append_blk(struct udf_fileident_iter *iter)
+               udf_fiiter_update_elen(iter, old_elen);
+               return err;
+       }
+-      if (inode_bmap(iter->dir, block, &iter->epos, &iter->eloc, &iter->elen,
+-                     &iter->loffset) != (EXT_RECORDED_ALLOCATED >> 30)) {
++      err = inode_bmap(iter->dir, block, &iter->epos, &iter->eloc, &iter->elen,
++                 &iter->loffset, &etype);
++      if (err <= 0 || etype != (EXT_RECORDED_ALLOCATED >> 30)) {
+               udf_err(iter->dir->i_sb,
+                       "block %llu not allocated in directory (ino %lu)\n",
+                       (unsigned long long)block, iter->dir->i_ino);
+diff --git a/fs/udf/inode.c b/fs/udf/inode.c
+index f7623b49ec349..37fa27136fafb 100644
+--- a/fs/udf/inode.c
++++ b/fs/udf/inode.c
+@@ -408,7 +408,7 @@ struct udf_map_rq {
+ static int udf_map_block(struct inode *inode, struct udf_map_rq *map)
+ {
+-      int err;
++      int ret;
+       struct udf_inode_info *iinfo = UDF_I(inode);
+       if (WARN_ON_ONCE(iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB))
+@@ -420,18 +420,24 @@ static int udf_map_block(struct inode *inode, struct udf_map_rq *map)
+               uint32_t elen;
+               sector_t offset;
+               struct extent_position epos = {};
++              int8_t etype;
+               down_read(&iinfo->i_data_sem);
+-              if (inode_bmap(inode, map->lblk, &epos, &eloc, &elen, &offset)
+-                              == (EXT_RECORDED_ALLOCATED >> 30)) {
++              ret = inode_bmap(inode, map->lblk, &epos, &eloc, &elen, &offset,
++                               &etype);
++              if (ret < 0)
++                      goto out_read;
++              if (ret > 0 && etype == (EXT_RECORDED_ALLOCATED >> 30)) {
+                       map->pblk = udf_get_lb_pblock(inode->i_sb, &eloc,
+                                                       offset);
+                       map->oflags |= UDF_BLK_MAPPED;
++                      ret = 0;
+               }
++out_read:
+               up_read(&iinfo->i_data_sem);
+               brelse(epos.bh);
+-              return 0;
++              return ret;
+       }
+       down_write(&iinfo->i_data_sem);
+@@ -442,9 +448,9 @@ static int udf_map_block(struct inode *inode, struct udf_map_rq *map)
+       if (((loff_t)map->lblk) << inode->i_blkbits >= iinfo->i_lenExtents)
+               udf_discard_prealloc(inode);
+       udf_clear_extent_cache(inode);
+-      err = inode_getblk(inode, map);
++      ret = inode_getblk(inode, map);
+       up_write(&iinfo->i_data_sem);
+-      return err;
++      return ret;
+ }
+ static int __udf_get_block(struct inode *inode, sector_t block,
+@@ -666,8 +672,10 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
+        */
+       udf_discard_prealloc(inode);
+-      etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset);
+-      within_last_ext = (etype != -1);
++      err = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset, &etype);
++      if (err < 0)
++              goto out;
++      within_last_ext = (err == 1);
+       /* We don't expect extents past EOF... */
+       WARN_ON_ONCE(within_last_ext &&
+                    elen > ((loff_t)offset + 1) << inode->i_blkbits);
+@@ -2401,13 +2409,15 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos)
+       return (elen >> 30);
+ }
+-int8_t inode_bmap(struct inode *inode, sector_t block,
+-                struct extent_position *pos, struct kernel_lb_addr *eloc,
+-                uint32_t *elen, sector_t *offset)
++/*
++ * Returns 1 on success, -errno on error, 0 on hit EOF.
++ */
++int inode_bmap(struct inode *inode, sector_t block, struct extent_position *pos,
++             struct kernel_lb_addr *eloc, uint32_t *elen, sector_t *offset,
++             int8_t *etype)
+ {
+       unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
+       loff_t lbcount = 0, bcount = (loff_t) block << blocksize_bits;
+-      int8_t etype;
+       struct udf_inode_info *iinfo;
+       int err = 0;
+@@ -2419,13 +2429,13 @@ int8_t inode_bmap(struct inode *inode, sector_t block,
+       }
+       *elen = 0;
+       do {
+-              err = udf_next_aext(inode, pos, eloc, elen, &etype, 1);
++              err = udf_next_aext(inode, pos, eloc, elen, etype, 1);
+               if (err <= 0) {
+                       if (err == 0) {
+                               *offset = (bcount - lbcount) >> blocksize_bits;
+                               iinfo->i_lenExtents = lbcount;
+                       }
+-                      return -1;
++                      return err;
+               }
+               lbcount += *elen;
+       } while (lbcount <= bcount);
+@@ -2433,5 +2443,5 @@ int8_t inode_bmap(struct inode *inode, sector_t block,
+       udf_update_extent_cache(inode, lbcount - *elen, pos);
+       *offset = (bcount + *elen - lbcount) >> blocksize_bits;
+-      return etype;
++      return 1;
+ }
+diff --git a/fs/udf/partition.c b/fs/udf/partition.c
+index af877991edc13..2b85c9501bed8 100644
+--- a/fs/udf/partition.c
++++ b/fs/udf/partition.c
+@@ -282,9 +282,11 @@ static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block,
+       sector_t ext_offset;
+       struct extent_position epos = {};
+       uint32_t phyblock;
++      int8_t etype;
++      int err = 0;
+-      if (inode_bmap(inode, block, &epos, &eloc, &elen, &ext_offset) !=
+-                                              (EXT_RECORDED_ALLOCATED >> 30))
++      err = inode_bmap(inode, block, &epos, &eloc, &elen, &ext_offset, &etype);
++      if (err <= 0 || etype != (EXT_RECORDED_ALLOCATED >> 30))
+               phyblock = 0xFFFFFFFF;
+       else {
+               map = &UDF_SB(sb)->s_partmaps[partition];
+diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
+index 399958f891d14..4f33a4a488861 100644
+--- a/fs/udf/truncate.c
++++ b/fs/udf/truncate.c
+@@ -214,10 +214,12 @@ int udf_truncate_extents(struct inode *inode)
+       else
+               BUG();
+-      etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset);
++      ret = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset, &etype);
++      if (ret < 0)
++              return ret;
+       byte_offset = (offset << sb->s_blocksize_bits) +
+               (inode->i_size & (sb->s_blocksize - 1));
+-      if (etype == -1) {
++      if (ret == 0) {
+               /* We should extend the file? */
+               WARN_ON(byte_offset);
+               return 0;
+diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
+index 5067ed68a8b45..d159f20d61e89 100644
+--- a/fs/udf/udfdecl.h
++++ b/fs/udf/udfdecl.h
+@@ -157,8 +157,9 @@ extern struct buffer_head *udf_bread(struct inode *inode, udf_pblk_t block,
+ extern int udf_setsize(struct inode *, loff_t);
+ extern void udf_evict_inode(struct inode *);
+ extern int udf_write_inode(struct inode *, struct writeback_control *wbc);
+-extern int8_t inode_bmap(struct inode *, sector_t, struct extent_position *,
+-                       struct kernel_lb_addr *, uint32_t *, sector_t *);
++extern int inode_bmap(struct inode *inode, sector_t block,
++                    struct extent_position *pos, struct kernel_lb_addr *eloc,
++                    uint32_t *elen, sector_t *offset, int8_t *etype);
+ int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
+ extern int udf_setup_indirect_aext(struct inode *inode, udf_pblk_t block,
+                                  struct extent_position *epos);
+-- 
+2.43.0
+
diff --git a/queue-6.6/udf-refactor-udf_current_aext-to-handle-error.patch b/queue-6.6/udf-refactor-udf_current_aext-to-handle-error.patch
new file mode 100644 (file)
index 0000000..f488aa8
--- /dev/null
@@ -0,0 +1,187 @@
+From 64d9065fe2a00eaca50849fb490d3c4e57bd12af Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Oct 2024 19:54:23 +0800
+Subject: udf: refactor udf_current_aext() to handle error
+
+From: Zhao Mengmeng <zhaomengmeng@kylinos.cn>
+
+[ Upstream commit ee703a7068f95764cfb62b57db1d36e465cb9b26 ]
+
+As Jan suggested in links below, refactor udf_current_aext() to
+differentiate between error, hit EOF and success, it now takes pointer to
+etype to store the extent type, return 1 when getting etype success,
+return 0 when hitting EOF and return -errno when err.
+
+Link: https://lore.kernel.org/all/20240912111235.6nr3wuqvktecy3vh@quack3/
+Signed-off-by: Zhao Mengmeng <zhaomengmeng@kylinos.cn>
+Suggested-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://patch.msgid.link/20241001115425.266556-2-zhaomzhao@126.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/udf/inode.c    | 40 ++++++++++++++++++++++++++--------------
+ fs/udf/truncate.c | 10 ++++++++--
+ fs/udf/udfdecl.h  |  5 +++--
+ 3 files changed, 37 insertions(+), 18 deletions(-)
+
+diff --git a/fs/udf/inode.c b/fs/udf/inode.c
+index 8db07d1f56bc9..911be5bcb98e5 100644
+--- a/fs/udf/inode.c
++++ b/fs/udf/inode.c
+@@ -1953,6 +1953,7 @@ int udf_setup_indirect_aext(struct inode *inode, udf_pblk_t block,
+       struct extent_position nepos;
+       struct kernel_lb_addr neloc;
+       int ver, adsize;
++      int err = 0;
+       if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
+               adsize = sizeof(struct short_ad);
+@@ -1997,10 +1998,12 @@ int udf_setup_indirect_aext(struct inode *inode, udf_pblk_t block,
+       if (epos->offset + adsize > sb->s_blocksize) {
+               struct kernel_lb_addr cp_loc;
+               uint32_t cp_len;
+-              int cp_type;
++              int8_t cp_type;
+               epos->offset -= adsize;
+-              cp_type = udf_current_aext(inode, epos, &cp_loc, &cp_len, 0);
++              err = udf_current_aext(inode, epos, &cp_loc, &cp_len, &cp_type, 0);
++              if (err <= 0)
++                      goto err_out;
+               cp_len |= ((uint32_t)cp_type) << 30;
+               __udf_add_aext(inode, &nepos, &cp_loc, cp_len, 1);
+@@ -2015,6 +2018,9 @@ int udf_setup_indirect_aext(struct inode *inode, udf_pblk_t block,
+       *epos = nepos;
+       return 0;
++err_out:
++      brelse(bh);
++      return err;
+ }
+ /*
+@@ -2165,9 +2171,12 @@ int8_t udf_next_aext(struct inode *inode, struct extent_position *epos,
+ {
+       int8_t etype;
+       unsigned int indirections = 0;
++      int ret = 0;
+-      while ((etype = udf_current_aext(inode, epos, eloc, elen, inc)) ==
+-             (EXT_NEXT_EXTENT_ALLOCDESCS >> 30)) {
++      while ((ret = udf_current_aext(inode, epos, eloc, elen,
++                                     &etype, inc)) > 0) {
++              if (etype != (EXT_NEXT_EXTENT_ALLOCDESCS >> 30))
++                      break;
+               udf_pblk_t block;
+               if (++indirections > UDF_MAX_INDIR_EXTS) {
+@@ -2188,14 +2197,17 @@ int8_t udf_next_aext(struct inode *inode, struct extent_position *epos,
+               }
+       }
+-      return etype;
++      return ret > 0 ? etype : -1;
+ }
+-int8_t udf_current_aext(struct inode *inode, struct extent_position *epos,
+-                      struct kernel_lb_addr *eloc, uint32_t *elen, int inc)
++/*
++ * Returns 1 on success, -errno on error, 0 on hit EOF.
++ */
++int udf_current_aext(struct inode *inode, struct extent_position *epos,
++                   struct kernel_lb_addr *eloc, uint32_t *elen, int8_t *etype,
++                   int inc)
+ {
+       int alen;
+-      int8_t etype;
+       uint8_t *ptr;
+       struct short_ad *sad;
+       struct long_ad *lad;
+@@ -2222,8 +2234,8 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos,
+       case ICBTAG_FLAG_AD_SHORT:
+               sad = udf_get_fileshortad(ptr, alen, &epos->offset, inc);
+               if (!sad)
+-                      return -1;
+-              etype = le32_to_cpu(sad->extLength) >> 30;
++                      return 0;
++              *etype = le32_to_cpu(sad->extLength) >> 30;
+               eloc->logicalBlockNum = le32_to_cpu(sad->extPosition);
+               eloc->partitionReferenceNum =
+                               iinfo->i_location.partitionReferenceNum;
+@@ -2232,17 +2244,17 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos,
+       case ICBTAG_FLAG_AD_LONG:
+               lad = udf_get_filelongad(ptr, alen, &epos->offset, inc);
+               if (!lad)
+-                      return -1;
+-              etype = le32_to_cpu(lad->extLength) >> 30;
++                      return 0;
++              *etype = le32_to_cpu(lad->extLength) >> 30;
+               *eloc = lelb_to_cpu(lad->extLocation);
+               *elen = le32_to_cpu(lad->extLength) & UDF_EXTENT_LENGTH_MASK;
+               break;
+       default:
+               udf_debug("alloc_type = %u unsupported\n", iinfo->i_alloc_type);
+-              return -1;
++              return -EINVAL;
+       }
+-      return etype;
++      return 1;
+ }
+ static int udf_insert_aext(struct inode *inode, struct extent_position epos,
+diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
+index a686c10fd709d..4758ba7b5f51c 100644
+--- a/fs/udf/truncate.c
++++ b/fs/udf/truncate.c
+@@ -188,6 +188,7 @@ int udf_truncate_extents(struct inode *inode)
+       loff_t byte_offset;
+       int adsize;
+       struct udf_inode_info *iinfo = UDF_I(inode);
++      int ret = 0;
+       if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
+               adsize = sizeof(struct short_ad);
+@@ -217,8 +218,8 @@ int udf_truncate_extents(struct inode *inode)
+       else
+               lenalloc -= sizeof(struct allocExtDesc);
+-      while ((etype = udf_current_aext(inode, &epos, &eloc,
+-                                       &elen, 0)) != -1) {
++      while ((ret = udf_current_aext(inode, &epos, &eloc,
++                                     &elen, &etype, 0)) > 0) {
+               if (etype == (EXT_NEXT_EXTENT_ALLOCDESCS >> 30)) {
+                       udf_write_aext(inode, &epos, &neloc, nelen, 0);
+                       if (indirect_ext_len) {
+@@ -253,6 +254,11 @@ int udf_truncate_extents(struct inode *inode)
+               }
+       }
++      if (ret < 0) {
++              brelse(epos.bh);
++              return ret;
++      }
++
+       if (indirect_ext_len) {
+               BUG_ON(!epos.bh);
+               udf_free_blocks(sb, NULL, &epos.block, 0, indirect_ext_len);
+diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
+index 88692512a4668..d893db95ac70e 100644
+--- a/fs/udf/udfdecl.h
++++ b/fs/udf/udfdecl.h
+@@ -171,8 +171,9 @@ extern void udf_write_aext(struct inode *, struct extent_position *,
+ extern int8_t udf_delete_aext(struct inode *, struct extent_position);
+ extern int8_t udf_next_aext(struct inode *, struct extent_position *,
+                           struct kernel_lb_addr *, uint32_t *, int);
+-extern int8_t udf_current_aext(struct inode *, struct extent_position *,
+-                             struct kernel_lb_addr *, uint32_t *, int);
++extern int udf_current_aext(struct inode *inode, struct extent_position *epos,
++                          struct kernel_lb_addr *eloc, uint32_t *elen,
++                          int8_t *etype, int inc);
+ extern void udf_update_extra_perms(struct inode *inode, umode_t mode);
+ /* misc.c */
+-- 
+2.43.0
+
diff --git a/queue-6.6/udf-refactor-udf_next_aext-to-handle-error.patch b/queue-6.6/udf-refactor-udf_next_aext-to-handle-error.patch
new file mode 100644 (file)
index 0000000..e9eb730
--- /dev/null
@@ -0,0 +1,598 @@
+From be2ab9dad9a3f29211e4b7ea09245bb18dd98d6b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Oct 2024 19:54:24 +0800
+Subject: udf: refactor udf_next_aext() to handle error
+
+From: Zhao Mengmeng <zhaomengmeng@kylinos.cn>
+
+[ Upstream commit b405c1e58b73981da0f8df03b00666b22b9397ae ]
+
+Since udf_current_aext() has error handling, udf_next_aext() should have
+error handling too. Besides, when too many indirect extents found in one
+inode, return -EFSCORRUPTED; when reading block failed, return -EIO.
+
+Signed-off-by: Zhao Mengmeng <zhaomengmeng@kylinos.cn>
+Suggested-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://patch.msgid.link/20241001115425.266556-3-zhaomzhao@126.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/udf/balloc.c    |  38 +++++++++-----
+ fs/udf/directory.c |  10 +++-
+ fs/udf/inode.c     | 125 ++++++++++++++++++++++++++++++---------------
+ fs/udf/super.c     |   3 +-
+ fs/udf/truncate.c  |  27 ++++++++--
+ fs/udf/udfdecl.h   |   5 +-
+ 6 files changed, 143 insertions(+), 65 deletions(-)
+
+diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
+index bb471ec364046..f5de2030e769a 100644
+--- a/fs/udf/balloc.c
++++ b/fs/udf/balloc.c
+@@ -387,6 +387,7 @@ static void udf_table_free_blocks(struct super_block *sb,
+       struct extent_position oepos, epos;
+       int8_t etype;
+       struct udf_inode_info *iinfo;
++      int ret = 0;
+       mutex_lock(&sbi->s_alloc_mutex);
+       iinfo = UDF_I(table);
+@@ -400,8 +401,12 @@ static void udf_table_free_blocks(struct super_block *sb,
+       epos.block = oepos.block = iinfo->i_location;
+       epos.bh = oepos.bh = NULL;
+-      while (count &&
+-             (etype = udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) {
++      while (count) {
++              ret = udf_next_aext(table, &epos, &eloc, &elen, &etype, 1);
++              if (ret < 0)
++                      goto error_return;
++              if (ret == 0)
++                      break;
+               if (((eloc.logicalBlockNum +
+                       (elen >> sb->s_blocksize_bits)) == start)) {
+                       if ((0x3FFFFFFF - elen) <
+@@ -476,11 +481,8 @@ static void udf_table_free_blocks(struct super_block *sb,
+                       adsize = sizeof(struct short_ad);
+               else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
+                       adsize = sizeof(struct long_ad);
+-              else {
+-                      brelse(oepos.bh);
+-                      brelse(epos.bh);
++              else
+                       goto error_return;
+-              }
+               if (epos.offset + (2 * adsize) > sb->s_blocksize) {
+                       /* Steal a block from the extent being free'd */
+@@ -496,10 +498,10 @@ static void udf_table_free_blocks(struct super_block *sb,
+                       __udf_add_aext(table, &epos, &eloc, elen, 1);
+       }
++error_return:
+       brelse(epos.bh);
+       brelse(oepos.bh);
+-error_return:
+       mutex_unlock(&sbi->s_alloc_mutex);
+       return;
+ }
+@@ -515,6 +517,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
+       struct extent_position epos;
+       int8_t etype = -1;
+       struct udf_inode_info *iinfo;
++      int ret = 0;
+       if (first_block >= sbi->s_partmaps[partition].s_partition_len)
+               return 0;
+@@ -533,11 +536,14 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
+       epos.bh = NULL;
+       eloc.logicalBlockNum = 0xFFFFFFFF;
+-      while (first_block != eloc.logicalBlockNum &&
+-             (etype = udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) {
++      while (first_block != eloc.logicalBlockNum) {
++              ret = udf_next_aext(table, &epos, &eloc, &elen, &etype, 1);
++              if (ret < 0)
++                      goto err_out;
++              if (ret == 0)
++                      break;
+               udf_debug("eloc=%u, elen=%u, first_block=%u\n",
+                         eloc.logicalBlockNum, elen, first_block);
+-              ; /* empty loop body */
+       }
+       if (first_block == eloc.logicalBlockNum) {
+@@ -556,6 +562,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
+               alloc_count = 0;
+       }
++err_out:
+       brelse(epos.bh);
+       if (alloc_count)
+@@ -577,6 +584,7 @@ static udf_pblk_t udf_table_new_block(struct super_block *sb,
+       struct extent_position epos, goal_epos;
+       int8_t etype;
+       struct udf_inode_info *iinfo = UDF_I(table);
++      int ret = 0;
+       *err = -ENOSPC;
+@@ -600,8 +608,10 @@ static udf_pblk_t udf_table_new_block(struct super_block *sb,
+       epos.block = iinfo->i_location;
+       epos.bh = goal_epos.bh = NULL;
+-      while (spread &&
+-             (etype = udf_next_aext(table, &epos, &eloc, &elen, 1)) != -1) {
++      while (spread) {
++              ret = udf_next_aext(table, &epos, &eloc, &elen, &etype, 1);
++              if (ret <= 0)
++                      break;
+               if (goal >= eloc.logicalBlockNum) {
+                       if (goal < eloc.logicalBlockNum +
+                                       (elen >> sb->s_blocksize_bits))
+@@ -629,9 +639,11 @@ static udf_pblk_t udf_table_new_block(struct super_block *sb,
+       brelse(epos.bh);
+-      if (spread == 0xFFFFFFFF) {
++      if (ret < 0 || spread == 0xFFFFFFFF) {
+               brelse(goal_epos.bh);
+               mutex_unlock(&sbi->s_alloc_mutex);
++              if (ret < 0)
++                      *err = ret;
+               return 0;
+       }
+diff --git a/fs/udf/directory.c b/fs/udf/directory.c
+index 93153665eb374..c6950050e7aeb 100644
+--- a/fs/udf/directory.c
++++ b/fs/udf/directory.c
+@@ -166,13 +166,19 @@ static struct buffer_head *udf_fiiter_bread_blk(struct udf_fileident_iter *iter)
+  */
+ static int udf_fiiter_advance_blk(struct udf_fileident_iter *iter)
+ {
++      int8_t etype = -1;
++      int err = 0;
++
+       iter->loffset++;
+       if (iter->loffset < DIV_ROUND_UP(iter->elen, 1<<iter->dir->i_blkbits))
+               return 0;
+       iter->loffset = 0;
+-      if (udf_next_aext(iter->dir, &iter->epos, &iter->eloc, &iter->elen, 1)
+-                      != (EXT_RECORDED_ALLOCATED >> 30)) {
++      err = udf_next_aext(iter->dir, &iter->epos, &iter->eloc,
++                          &iter->elen, &etype, 1);
++      if (err < 0)
++              return err;
++      else if (err == 0 || etype != (EXT_RECORDED_ALLOCATED >> 30)) {
+               if (iter->pos == iter->dir->i_size) {
+                       iter->elen = 0;
+                       return 0;
+diff --git a/fs/udf/inode.c b/fs/udf/inode.c
+index 911be5bcb98e5..f7623b49ec349 100644
+--- a/fs/udf/inode.c
++++ b/fs/udf/inode.c
+@@ -547,6 +547,7 @@ static int udf_do_extend_file(struct inode *inode,
+       } else {
+               struct kernel_lb_addr tmploc;
+               uint32_t tmplen;
++              int8_t tmptype;
+               udf_write_aext(inode, last_pos, &last_ext->extLocation,
+                               last_ext->extLength, 1);
+@@ -556,8 +557,12 @@ static int udf_do_extend_file(struct inode *inode,
+                * more extents, we may need to enter possible following
+                * empty indirect extent.
+                */
+-              if (new_block_bytes)
+-                      udf_next_aext(inode, last_pos, &tmploc, &tmplen, 0);
++              if (new_block_bytes) {
++                      err = udf_next_aext(inode, last_pos, &tmploc, &tmplen,
++                                          &tmptype, 0);
++                      if (err < 0)
++                              goto out_err;
++              }
+       }
+       iinfo->i_lenExtents += add;
+@@ -676,8 +681,10 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
+               extent.extLength = EXT_NOT_RECORDED_NOT_ALLOCATED;
+       } else {
+               epos.offset -= adsize;
+-              etype = udf_next_aext(inode, &epos, &extent.extLocation,
+-                                    &extent.extLength, 0);
++              err = udf_next_aext(inode, &epos, &extent.extLocation,
++                                  &extent.extLength, &etype, 0);
++              if (err <= 0)
++                      goto out;
+               extent.extLength |= etype << 30;
+       }
+@@ -714,11 +721,11 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map)
+       loff_t lbcount = 0, b_off = 0;
+       udf_pblk_t newblocknum;
+       sector_t offset = 0;
+-      int8_t etype;
++      int8_t etype, tmpetype;
+       struct udf_inode_info *iinfo = UDF_I(inode);
+       udf_pblk_t goal = 0, pgoal = iinfo->i_location.logicalBlockNum;
+       int lastblock = 0;
+-      bool isBeyondEOF;
++      bool isBeyondEOF = false;
+       int ret = 0;
+       prev_epos.offset = udf_file_entry_alloc_offset(inode);
+@@ -750,9 +757,13 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map)
+               prev_epos.offset = cur_epos.offset;
+               cur_epos.offset = next_epos.offset;
+-              etype = udf_next_aext(inode, &next_epos, &eloc, &elen, 1);
+-              if (etype == -1)
++              ret = udf_next_aext(inode, &next_epos, &eloc, &elen, &etype, 1);
++              if (ret < 0) {
++                      goto out_free;
++              } else if (ret == 0) {
++                      isBeyondEOF = true;
+                       break;
++              }
+               c = !c;
+@@ -773,13 +784,17 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map)
+        * Move prev_epos and cur_epos into indirect extent if we are at
+        * the pointer to it
+        */
+-      udf_next_aext(inode, &prev_epos, &tmpeloc, &tmpelen, 0);
+-      udf_next_aext(inode, &cur_epos, &tmpeloc, &tmpelen, 0);
++      ret = udf_next_aext(inode, &prev_epos, &tmpeloc, &tmpelen, &tmpetype, 0);
++      if (ret < 0)
++              goto out_free;
++      ret = udf_next_aext(inode, &cur_epos, &tmpeloc, &tmpelen, &tmpetype, 0);
++      if (ret < 0)
++              goto out_free;
+       /* if the extent is allocated and recorded, return the block
+          if the extent is not a multiple of the blocksize, round up */
+-      if (etype == (EXT_RECORDED_ALLOCATED >> 30)) {
++      if (!isBeyondEOF && etype == (EXT_RECORDED_ALLOCATED >> 30)) {
+               if (elen & (inode->i_sb->s_blocksize - 1)) {
+                       elen = EXT_RECORDED_ALLOCATED |
+                               ((elen + inode->i_sb->s_blocksize - 1) &
+@@ -795,10 +810,9 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map)
+       }
+       /* Are we beyond EOF and preallocated extent? */
+-      if (etype == -1) {
++      if (isBeyondEOF) {
+               loff_t hole_len;
+-              isBeyondEOF = true;
+               if (count) {
+                       if (c)
+                               laarr[0] = laarr[1];
+@@ -834,7 +848,6 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map)
+               endnum = c + 1;
+               lastblock = 1;
+       } else {
+-              isBeyondEOF = false;
+               endnum = startnum = ((count > 2) ? 2 : count);
+               /* if the current extent is in position 0,
+@@ -848,15 +861,17 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map)
+               /* if the current block is located in an extent,
+                  read the next extent */
+-              etype = udf_next_aext(inode, &next_epos, &eloc, &elen, 0);
+-              if (etype != -1) {
++              ret = udf_next_aext(inode, &next_epos, &eloc, &elen, &etype, 0);
++              if (ret > 0) {
+                       laarr[c + 1].extLength = (etype << 30) | elen;
+                       laarr[c + 1].extLocation = eloc;
+                       count++;
+                       startnum++;
+                       endnum++;
+-              } else
++              } else if (ret == 0)
+                       lastblock = 1;
++              else
++                      goto out_free;
+       }
+       /* if the current extent is not recorded but allocated, get the
+@@ -1174,6 +1189,7 @@ static int udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr,
+       int start = 0, i;
+       struct kernel_lb_addr tmploc;
+       uint32_t tmplen;
++      int8_t tmpetype;
+       int err;
+       if (startnum > endnum) {
+@@ -1191,14 +1207,19 @@ static int udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr,
+                        */
+                       if (err < 0)
+                               return err;
+-                      udf_next_aext(inode, epos, &laarr[i].extLocation,
+-                                    &laarr[i].extLength, 1);
++                      err = udf_next_aext(inode, epos, &laarr[i].extLocation,
++                                    &laarr[i].extLength, &tmpetype, 1);
++                      if (err < 0)
++                              return err;
+                       start++;
+               }
+       }
+       for (i = start; i < endnum; i++) {
+-              udf_next_aext(inode, epos, &tmploc, &tmplen, 0);
++              err = udf_next_aext(inode, epos, &tmploc, &tmplen, &tmpetype, 0);
++              if (err < 0)
++                      return err;
++
+               udf_write_aext(inode, epos, &laarr[i].extLocation,
+                              laarr[i].extLength, 1);
+       }
+@@ -2166,24 +2187,30 @@ void udf_write_aext(struct inode *inode, struct extent_position *epos,
+  */
+ #define UDF_MAX_INDIR_EXTS 16
+-int8_t udf_next_aext(struct inode *inode, struct extent_position *epos,
+-                   struct kernel_lb_addr *eloc, uint32_t *elen, int inc)
++/*
++ * Returns 1 on success, -errno on error, 0 on hit EOF.
++ */
++int udf_next_aext(struct inode *inode, struct extent_position *epos,
++                struct kernel_lb_addr *eloc, uint32_t *elen, int8_t *etype,
++                int inc)
+ {
+-      int8_t etype;
+       unsigned int indirections = 0;
+       int ret = 0;
++      udf_pblk_t block;
+-      while ((ret = udf_current_aext(inode, epos, eloc, elen,
+-                                     &etype, inc)) > 0) {
+-              if (etype != (EXT_NEXT_EXTENT_ALLOCDESCS >> 30))
+-                      break;
+-              udf_pblk_t block;
++      while (1) {
++              ret = udf_current_aext(inode, epos, eloc, elen,
++                                     etype, inc);
++              if (ret <= 0)
++                      return ret;
++              if (*etype != (EXT_NEXT_EXTENT_ALLOCDESCS >> 30))
++                      return ret;
+               if (++indirections > UDF_MAX_INDIR_EXTS) {
+                       udf_err(inode->i_sb,
+                               "too many indirect extents in inode %lu\n",
+                               inode->i_ino);
+-                      return -1;
++                      return -EFSCORRUPTED;
+               }
+               epos->block = *eloc;
+@@ -2193,11 +2220,9 @@ int8_t udf_next_aext(struct inode *inode, struct extent_position *epos,
+               epos->bh = sb_bread(inode->i_sb, block);
+               if (!epos->bh) {
+                       udf_debug("reading block %u failed!\n", block);
+-                      return -1;
++                      return -EIO;
+               }
+       }
+-
+-      return ret > 0 ? etype : -1;
+ }
+ /*
+@@ -2263,20 +2288,24 @@ static int udf_insert_aext(struct inode *inode, struct extent_position epos,
+       struct kernel_lb_addr oeloc;
+       uint32_t oelen;
+       int8_t etype;
+-      int err;
++      int ret;
+       if (epos.bh)
+               get_bh(epos.bh);
+-      while ((etype = udf_next_aext(inode, &epos, &oeloc, &oelen, 0)) != -1) {
++      while (1) {
++              ret = udf_next_aext(inode, &epos, &oeloc, &oelen, &etype, 0);
++              if (ret <= 0)
++                      break;
+               udf_write_aext(inode, &epos, &neloc, nelen, 1);
+               neloc = oeloc;
+               nelen = (etype << 30) | oelen;
+       }
+-      err = udf_add_aext(inode, &epos, &neloc, nelen, 1);
++      if (ret == 0)
++              ret = udf_add_aext(inode, &epos, &neloc, nelen, 1);
+       brelse(epos.bh);
+-      return err;
++      return ret;
+ }
+ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos)
+@@ -2288,6 +2317,7 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos)
+       struct udf_inode_info *iinfo;
+       struct kernel_lb_addr eloc;
+       uint32_t elen;
++      int ret;
+       if (epos.bh) {
+               get_bh(epos.bh);
+@@ -2303,10 +2333,18 @@ int8_t udf_delete_aext(struct inode *inode, struct extent_position epos)
+               adsize = 0;
+       oepos = epos;
+-      if (udf_next_aext(inode, &epos, &eloc, &elen, 1) == -1)
++      if (udf_next_aext(inode, &epos, &eloc, &elen, &etype, 1) <= 0)
+               return -1;
+-      while ((etype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) {
++      while (1) {
++              ret = udf_next_aext(inode, &epos, &eloc, &elen, &etype, 1);
++              if (ret < 0) {
++                      brelse(epos.bh);
++                      brelse(oepos.bh);
++                      return -1;
++              }
++              if (ret == 0)
++                      break;
+               udf_write_aext(inode, &oepos, &eloc, (etype << 30) | elen, 1);
+               if (oepos.bh != epos.bh) {
+                       oepos.block = epos.block;
+@@ -2371,6 +2409,7 @@ int8_t inode_bmap(struct inode *inode, sector_t block,
+       loff_t lbcount = 0, bcount = (loff_t) block << blocksize_bits;
+       int8_t etype;
+       struct udf_inode_info *iinfo;
++      int err = 0;
+       iinfo = UDF_I(inode);
+       if (!udf_read_extent_cache(inode, bcount, &lbcount, pos)) {
+@@ -2380,10 +2419,12 @@ int8_t inode_bmap(struct inode *inode, sector_t block,
+       }
+       *elen = 0;
+       do {
+-              etype = udf_next_aext(inode, pos, eloc, elen, 1);
+-              if (etype == -1) {
+-                      *offset = (bcount - lbcount) >> blocksize_bits;
+-                      iinfo->i_lenExtents = lbcount;
++              err = udf_next_aext(inode, pos, eloc, elen, &etype, 1);
++              if (err <= 0) {
++                      if (err == 0) {
++                              *offset = (bcount - lbcount) >> blocksize_bits;
++                              iinfo->i_lenExtents = lbcount;
++                      }
+                       return -1;
+               }
+               lbcount += *elen;
+diff --git a/fs/udf/super.c b/fs/udf/super.c
+index 3c78535f406b0..20dff9ed2471d 100644
+--- a/fs/udf/super.c
++++ b/fs/udf/super.c
+@@ -2454,13 +2454,14 @@ static unsigned int udf_count_free_table(struct super_block *sb,
+       uint32_t elen;
+       struct kernel_lb_addr eloc;
+       struct extent_position epos;
++      int8_t etype;
+       mutex_lock(&UDF_SB(sb)->s_alloc_mutex);
+       epos.block = UDF_I(table)->i_location;
+       epos.offset = sizeof(struct unallocSpaceEntry);
+       epos.bh = NULL;
+-      while (udf_next_aext(table, &epos, &eloc, &elen, 1) != -1)
++      while (udf_next_aext(table, &epos, &eloc, &elen, &etype, 1) > 0)
+               accum += (elen >> table->i_sb->s_blocksize_bits);
+       brelse(epos.bh);
+diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
+index 4758ba7b5f51c..399958f891d14 100644
+--- a/fs/udf/truncate.c
++++ b/fs/udf/truncate.c
+@@ -69,6 +69,7 @@ void udf_truncate_tail_extent(struct inode *inode)
+       int8_t etype = -1, netype;
+       int adsize;
+       struct udf_inode_info *iinfo = UDF_I(inode);
++      int ret;
+       if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB ||
+           inode->i_size == iinfo->i_lenExtents)
+@@ -85,7 +86,10 @@ void udf_truncate_tail_extent(struct inode *inode)
+               BUG();
+       /* Find the last extent in the file */
+-      while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) {
++      while (1) {
++              ret = udf_next_aext(inode, &epos, &eloc, &elen, &netype, 1);
++              if (ret <= 0)
++                      break;
+               etype = netype;
+               lbcount += elen;
+               if (lbcount > inode->i_size) {
+@@ -101,7 +105,8 @@ void udf_truncate_tail_extent(struct inode *inode)
+                       epos.offset -= adsize;
+                       extent_trunc(inode, &epos, &eloc, etype, elen, nelen);
+                       epos.offset += adsize;
+-                      if (udf_next_aext(inode, &epos, &eloc, &elen, 1) != -1)
++                      if (udf_next_aext(inode, &epos, &eloc, &elen,
++                                        &netype, 1) > 0)
+                               udf_err(inode->i_sb,
+                                       "Extent after EOF in inode %u\n",
+                                       (unsigned)inode->i_ino);
+@@ -110,7 +115,8 @@ void udf_truncate_tail_extent(struct inode *inode)
+       }
+       /* This inode entry is in-memory only and thus we don't have to mark
+        * the inode dirty */
+-      iinfo->i_lenExtents = inode->i_size;
++      if (ret == 0)
++              iinfo->i_lenExtents = inode->i_size;
+       brelse(epos.bh);
+ }
+@@ -124,6 +130,8 @@ void udf_discard_prealloc(struct inode *inode)
+       int8_t etype = -1;
+       struct udf_inode_info *iinfo = UDF_I(inode);
+       int bsize = i_blocksize(inode);
++      int8_t tmpetype = -1;
++      int ret;
+       if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB ||
+           ALIGN(inode->i_size, bsize) == ALIGN(iinfo->i_lenExtents, bsize))
+@@ -132,15 +140,23 @@ void udf_discard_prealloc(struct inode *inode)
+       epos.block = iinfo->i_location;
+       /* Find the last extent in the file */
+-      while (udf_next_aext(inode, &epos, &eloc, &elen, 0) != -1) {
++      while (1) {
++              ret = udf_next_aext(inode, &epos, &eloc, &elen, &tmpetype, 0);
++              if (ret < 0)
++                      goto out;
++              if (ret == 0)
++                      break;
+               brelse(prev_epos.bh);
+               prev_epos = epos;
+               if (prev_epos.bh)
+                       get_bh(prev_epos.bh);
+-              etype = udf_next_aext(inode, &epos, &eloc, &elen, 1);
++              ret = udf_next_aext(inode, &epos, &eloc, &elen, &etype, 1);
++              if (ret < 0)
++                      goto out;
+               lbcount += elen;
+       }
++
+       if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) {
+               lbcount -= elen;
+               udf_delete_aext(inode, prev_epos);
+@@ -150,6 +166,7 @@ void udf_discard_prealloc(struct inode *inode)
+       /* This inode entry is in-memory only and thus we don't have to mark
+        * the inode dirty */
+       iinfo->i_lenExtents = lbcount;
++out:
+       brelse(epos.bh);
+       brelse(prev_epos.bh);
+ }
+diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
+index d893db95ac70e..5067ed68a8b45 100644
+--- a/fs/udf/udfdecl.h
++++ b/fs/udf/udfdecl.h
+@@ -169,8 +169,9 @@ extern int udf_add_aext(struct inode *, struct extent_position *,
+ extern void udf_write_aext(struct inode *, struct extent_position *,
+                          struct kernel_lb_addr *, uint32_t, int);
+ extern int8_t udf_delete_aext(struct inode *, struct extent_position);
+-extern int8_t udf_next_aext(struct inode *, struct extent_position *,
+-                          struct kernel_lb_addr *, uint32_t *, int);
++extern int udf_next_aext(struct inode *inode, struct extent_position *epos,
++                       struct kernel_lb_addr *eloc, uint32_t *elen,
++                       int8_t *etype, int inc);
+ extern int udf_current_aext(struct inode *inode, struct extent_position *epos,
+                           struct kernel_lb_addr *eloc, uint32_t *elen,
+                           int8_t *etype, int inc);
+-- 
+2.43.0
+
diff --git a/queue-6.6/uprobe-avoid-out-of-bounds-memory-access-of-fetching.patch b/queue-6.6/uprobe-avoid-out-of-bounds-memory-access-of-fetching.patch
new file mode 100644 (file)
index 0000000..38c3a16
--- /dev/null
@@ -0,0 +1,183 @@
+From d5cdfe6cb49be8b0733677a3a09c96fe30a5fa48 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Oct 2024 14:01:48 +0800
+Subject: uprobe: avoid out-of-bounds memory access of fetching args
+
+From: Qiao Ma <mqaio@linux.alibaba.com>
+
+[ Upstream commit 373b9338c9722a368925d83bc622c596896b328e ]
+
+Uprobe needs to fetch args into a percpu buffer, and then copy to ring
+buffer to avoid non-atomic context problem.
+
+Sometimes user-space strings, arrays can be very large, but the size of
+percpu buffer is only page size. And store_trace_args() won't check
+whether these data exceeds a single page or not, caused out-of-bounds
+memory access.
+
+It could be reproduced by following steps:
+1. build kernel with CONFIG_KASAN enabled
+2. save follow program as test.c
+
+```
+\#include <stdio.h>
+\#include <stdlib.h>
+\#include <string.h>
+
+// If string length large than MAX_STRING_SIZE, the fetch_store_strlen()
+// will return 0, cause __get_data_size() return shorter size, and
+// store_trace_args() will not trigger out-of-bounds access.
+// So make string length less than 4096.
+\#define STRLEN 4093
+
+void generate_string(char *str, int n)
+{
+    int i;
+    for (i = 0; i < n; ++i)
+    {
+        char c = i % 26 + 'a';
+        str[i] = c;
+    }
+    str[n-1] = '\0';
+}
+
+void print_string(char *str)
+{
+    printf("%s\n", str);
+}
+
+int main()
+{
+    char tmp[STRLEN];
+
+    generate_string(tmp, STRLEN);
+    print_string(tmp);
+
+    return 0;
+}
+```
+3. compile program
+`gcc -o test test.c`
+
+4. get the offset of `print_string()`
+```
+objdump -t test | grep -w print_string
+0000000000401199 g     F .text  000000000000001b              print_string
+```
+
+5. configure uprobe with offset 0x1199
+```
+off=0x1199
+
+cd /sys/kernel/debug/tracing/
+echo "p /root/test:${off} arg1=+0(%di):ustring arg2=\$comm arg3=+0(%di):ustring"
+ > uprobe_events
+echo 1 > events/uprobes/enable
+echo 1 > tracing_on
+```
+
+6. run `test`, and kasan will report error.
+==================================================================
+BUG: KASAN: use-after-free in strncpy_from_user+0x1d6/0x1f0
+Write of size 8 at addr ffff88812311c004 by task test/499CPU: 0 UID: 0 PID: 499 Comm: test Not tainted 6.12.0-rc3+ #18
+Hardware name: Red Hat KVM, BIOS 1.16.0-4.al8 04/01/2014
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x55/0x70
+ print_address_description.constprop.0+0x27/0x310
+ kasan_report+0x10f/0x120
+ ? strncpy_from_user+0x1d6/0x1f0
+ strncpy_from_user+0x1d6/0x1f0
+ ? rmqueue.constprop.0+0x70d/0x2ad0
+ process_fetch_insn+0xb26/0x1470
+ ? __pfx_process_fetch_insn+0x10/0x10
+ ? _raw_spin_lock+0x85/0xe0
+ ? __pfx__raw_spin_lock+0x10/0x10
+ ? __pte_offset_map+0x1f/0x2d0
+ ? unwind_next_frame+0xc5f/0x1f80
+ ? arch_stack_walk+0x68/0xf0
+ ? is_bpf_text_address+0x23/0x30
+ ? kernel_text_address.part.0+0xbb/0xd0
+ ? __kernel_text_address+0x66/0xb0
+ ? unwind_get_return_address+0x5e/0xa0
+ ? __pfx_stack_trace_consume_entry+0x10/0x10
+ ? arch_stack_walk+0xa2/0xf0
+ ? _raw_spin_lock_irqsave+0x8b/0xf0
+ ? __pfx__raw_spin_lock_irqsave+0x10/0x10
+ ? depot_alloc_stack+0x4c/0x1f0
+ ? _raw_spin_unlock_irqrestore+0xe/0x30
+ ? stack_depot_save_flags+0x35d/0x4f0
+ ? kasan_save_stack+0x34/0x50
+ ? kasan_save_stack+0x24/0x50
+ ? mutex_lock+0x91/0xe0
+ ? __pfx_mutex_lock+0x10/0x10
+ prepare_uprobe_buffer.part.0+0x2cd/0x500
+ uprobe_dispatcher+0x2c3/0x6a0
+ ? __pfx_uprobe_dispatcher+0x10/0x10
+ ? __kasan_slab_alloc+0x4d/0x90
+ handler_chain+0xdd/0x3e0
+ handle_swbp+0x26e/0x3d0
+ ? __pfx_handle_swbp+0x10/0x10
+ ? uprobe_pre_sstep_notifier+0x151/0x1b0
+ irqentry_exit_to_user_mode+0xe2/0x1b0
+ asm_exc_int3+0x39/0x40
+RIP: 0033:0x401199
+Code: 01 c2 0f b6 45 fb 88 02 83 45 fc 01 8b 45 fc 3b 45 e4 7c b7 8b 45 e4 48 98 48 8d 50 ff 48 8b 45 e8 48 01 d0 ce
+RSP: 002b:00007ffdf00576a8 EFLAGS: 00000206
+RAX: 00007ffdf00576b0 RBX: 0000000000000000 RCX: 0000000000000ff2
+RDX: 0000000000000ffc RSI: 0000000000000ffd RDI: 00007ffdf00576b0
+RBP: 00007ffdf00586b0 R08: 00007feb2f9c0d20 R09: 00007feb2f9c0d20
+R10: 0000000000000001 R11: 0000000000000202 R12: 0000000000401040
+R13: 00007ffdf0058780 R14: 0000000000000000 R15: 0000000000000000
+ </TASK>
+
+This commit enforces the buffer's maxlen less than a page-size to avoid
+store_trace_args() out-of-memory access.
+
+Link: https://lore.kernel.org/all/20241015060148.1108331-1-mqaio@linux.alibaba.com/
+
+Fixes: dcad1a204f72 ("tracing/uprobes: Fetch args before reserving a ring buffer")
+Signed-off-by: Qiao Ma <mqaio@linux.alibaba.com>
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace_uprobe.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
+index 0d52588329b29..d72ac9dde5321 100644
+--- a/kernel/trace/trace_uprobe.c
++++ b/kernel/trace/trace_uprobe.c
+@@ -858,6 +858,7 @@ struct uprobe_cpu_buffer {
+ };
+ static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer;
+ static int uprobe_buffer_refcnt;
++#define MAX_UCB_BUFFER_SIZE PAGE_SIZE
+ static int uprobe_buffer_init(void)
+ {
+@@ -962,6 +963,11 @@ static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu,
+       ucb = uprobe_buffer_get();
+       ucb->dsize = tu->tp.size + dsize;
++      if (WARN_ON_ONCE(ucb->dsize > MAX_UCB_BUFFER_SIZE)) {
++              ucb->dsize = MAX_UCB_BUFFER_SIZE;
++              dsize = MAX_UCB_BUFFER_SIZE - tu->tp.size;
++      }
++
+       store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize);
+       *ucbp = ucb;
+@@ -981,9 +987,6 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
+       WARN_ON(call != trace_file->event_call);
+-      if (WARN_ON_ONCE(ucb->dsize > PAGE_SIZE))
+-              return;
+-
+       if (trace_trigger_soft_disabled(trace_file))
+               return;
+-- 
+2.43.0
+
diff --git a/queue-6.6/uprobes-encapsulate-preparation-of-uprobe-args-buffe.patch b/queue-6.6/uprobes-encapsulate-preparation-of-uprobe-args-buffe.patch
new file mode 100644 (file)
index 0000000..e97b035
--- /dev/null
@@ -0,0 +1,257 @@
+From 715741b9e585de285369902049c9139629e681cd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Mar 2024 11:17:26 -0700
+Subject: uprobes: encapsulate preparation of uprobe args buffer
+
+From: Andrii Nakryiko <andrii@kernel.org>
+
+[ Upstream commit 3eaea21b4d27cff0017c20549aeb53034c58fc23 ]
+
+Move the logic of fetching temporary per-CPU uprobe buffer and storing
+uprobes args into it to a new helper function. Store data size as part
+of this buffer, simplifying interfaces a bit, as now we only pass single
+uprobe_cpu_buffer reference around, instead of pointer + dsize.
+
+This logic was duplicated across uprobe_dispatcher and uretprobe_dispatcher,
+and now will be centralized. All this is also in preparation to make
+this uprobe_cpu_buffer handling logic optional in the next patch.
+
+Link: https://lore.kernel.org/all/20240318181728.2795838-2-andrii@kernel.org/
+[Masami: update for v6.9-rc3 kernel]
+
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Reviewed-by: Jiri Olsa <jolsa@kernel.org>
+Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Stable-dep-of: 373b9338c972 ("uprobe: avoid out-of-bounds memory access of fetching args")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace_uprobe.c | 78 +++++++++++++++++++------------------
+ 1 file changed, 41 insertions(+), 37 deletions(-)
+
+diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
+index 78d76d74f45bc..58506c9632eae 100644
+--- a/kernel/trace/trace_uprobe.c
++++ b/kernel/trace/trace_uprobe.c
+@@ -854,6 +854,7 @@ static const struct file_operations uprobe_profile_ops = {
+ struct uprobe_cpu_buffer {
+       struct mutex mutex;
+       void *buf;
++      int dsize;
+ };
+ static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer;
+ static int uprobe_buffer_refcnt;
+@@ -943,9 +944,26 @@ static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb)
+       mutex_unlock(&ucb->mutex);
+ }
++static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu,
++                                                     struct pt_regs *regs)
++{
++      struct uprobe_cpu_buffer *ucb;
++      int dsize, esize;
++
++      esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
++      dsize = __get_data_size(&tu->tp, regs, NULL);
++
++      ucb = uprobe_buffer_get();
++      ucb->dsize = tu->tp.size + dsize;
++
++      store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize);
++
++      return ucb;
++}
++
+ static void __uprobe_trace_func(struct trace_uprobe *tu,
+                               unsigned long func, struct pt_regs *regs,
+-                              struct uprobe_cpu_buffer *ucb, int dsize,
++                              struct uprobe_cpu_buffer *ucb,
+                               struct trace_event_file *trace_file)
+ {
+       struct uprobe_trace_entry_head *entry;
+@@ -956,14 +974,14 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
+       WARN_ON(call != trace_file->event_call);
+-      if (WARN_ON_ONCE(tu->tp.size + dsize > PAGE_SIZE))
++      if (WARN_ON_ONCE(ucb->dsize > PAGE_SIZE))
+               return;
+       if (trace_trigger_soft_disabled(trace_file))
+               return;
+       esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
+-      size = esize + tu->tp.size + dsize;
++      size = esize + ucb->dsize;
+       entry = trace_event_buffer_reserve(&fbuffer, trace_file, size);
+       if (!entry)
+               return;
+@@ -977,14 +995,14 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
+               data = DATAOF_TRACE_ENTRY(entry, false);
+       }
+-      memcpy(data, ucb->buf, tu->tp.size + dsize);
++      memcpy(data, ucb->buf, ucb->dsize);
+       trace_event_buffer_commit(&fbuffer);
+ }
+ /* uprobe handler */
+ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs,
+-                           struct uprobe_cpu_buffer *ucb, int dsize)
++                           struct uprobe_cpu_buffer *ucb)
+ {
+       struct event_file_link *link;
+@@ -993,7 +1011,7 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs,
+       rcu_read_lock();
+       trace_probe_for_each_link_rcu(link, &tu->tp)
+-              __uprobe_trace_func(tu, 0, regs, ucb, dsize, link->file);
++              __uprobe_trace_func(tu, 0, regs, ucb, link->file);
+       rcu_read_unlock();
+       return 0;
+@@ -1001,13 +1019,13 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs,
+ static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
+                                struct pt_regs *regs,
+-                               struct uprobe_cpu_buffer *ucb, int dsize)
++                               struct uprobe_cpu_buffer *ucb)
+ {
+       struct event_file_link *link;
+       rcu_read_lock();
+       trace_probe_for_each_link_rcu(link, &tu->tp)
+-              __uprobe_trace_func(tu, func, regs, ucb, dsize, link->file);
++              __uprobe_trace_func(tu, func, regs, ucb, link->file);
+       rcu_read_unlock();
+ }
+@@ -1335,7 +1353,7 @@ static bool uprobe_perf_filter(struct uprobe_consumer *uc,
+ static void __uprobe_perf_func(struct trace_uprobe *tu,
+                              unsigned long func, struct pt_regs *regs,
+-                             struct uprobe_cpu_buffer *ucb, int dsize)
++                             struct uprobe_cpu_buffer *ucb)
+ {
+       struct trace_event_call *call = trace_probe_event_call(&tu->tp);
+       struct uprobe_trace_entry_head *entry;
+@@ -1356,7 +1374,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
+       esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
+-      size = esize + tu->tp.size + dsize;
++      size = esize + ucb->dsize;
+       size = ALIGN(size + sizeof(u32), sizeof(u64)) - sizeof(u32);
+       if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
+               return;
+@@ -1379,13 +1397,10 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
+               data = DATAOF_TRACE_ENTRY(entry, false);
+       }
+-      memcpy(data, ucb->buf, tu->tp.size + dsize);
++      memcpy(data, ucb->buf, ucb->dsize);
+-      if (size - esize > tu->tp.size + dsize) {
+-              int len = tu->tp.size + dsize;
+-
+-              memset(data + len, 0, size - esize - len);
+-      }
++      if (size - esize > ucb->dsize)
++              memset(data + ucb->dsize, 0, size - esize - ucb->dsize);
+       perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
+                             head, NULL);
+@@ -1395,21 +1410,21 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
+ /* uprobe profile handler */
+ static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs,
+-                          struct uprobe_cpu_buffer *ucb, int dsize)
++                          struct uprobe_cpu_buffer *ucb)
+ {
+       if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
+               return UPROBE_HANDLER_REMOVE;
+       if (!is_ret_probe(tu))
+-              __uprobe_perf_func(tu, 0, regs, ucb, dsize);
++              __uprobe_perf_func(tu, 0, regs, ucb);
+       return 0;
+ }
+ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
+                               struct pt_regs *regs,
+-                              struct uprobe_cpu_buffer *ucb, int dsize)
++                              struct uprobe_cpu_buffer *ucb)
+ {
+-      __uprobe_perf_func(tu, func, regs, ucb, dsize);
++      __uprobe_perf_func(tu, func, regs, ucb);
+ }
+ int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
+@@ -1475,10 +1490,8 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
+       struct trace_uprobe *tu;
+       struct uprobe_dispatch_data udd;
+       struct uprobe_cpu_buffer *ucb;
+-      int dsize, esize;
+       int ret = 0;
+-
+       tu = container_of(con, struct trace_uprobe, consumer);
+       tu->nhit++;
+@@ -1490,18 +1503,14 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
+       if (WARN_ON_ONCE(!uprobe_cpu_buffer))
+               return 0;
+-      dsize = __get_data_size(&tu->tp, regs, NULL);
+-      esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
+-
+-      ucb = uprobe_buffer_get();
+-      store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize);
++      ucb = prepare_uprobe_buffer(tu, regs);
+       if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE))
+-              ret |= uprobe_trace_func(tu, regs, ucb, dsize);
++              ret |= uprobe_trace_func(tu, regs, ucb);
+ #ifdef CONFIG_PERF_EVENTS
+       if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE))
+-              ret |= uprobe_perf_func(tu, regs, ucb, dsize);
++              ret |= uprobe_perf_func(tu, regs, ucb);
+ #endif
+       uprobe_buffer_put(ucb);
+       return ret;
+@@ -1513,7 +1522,6 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con,
+       struct trace_uprobe *tu;
+       struct uprobe_dispatch_data udd;
+       struct uprobe_cpu_buffer *ucb;
+-      int dsize, esize;
+       tu = container_of(con, struct trace_uprobe, consumer);
+@@ -1525,18 +1533,14 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con,
+       if (WARN_ON_ONCE(!uprobe_cpu_buffer))
+               return 0;
+-      dsize = __get_data_size(&tu->tp, regs, NULL);
+-      esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
+-
+-      ucb = uprobe_buffer_get();
+-      store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize);
++      ucb = prepare_uprobe_buffer(tu, regs);
+       if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE))
+-              uretprobe_trace_func(tu, func, regs, ucb, dsize);
++              uretprobe_trace_func(tu, func, regs, ucb);
+ #ifdef CONFIG_PERF_EVENTS
+       if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE))
+-              uretprobe_perf_func(tu, func, regs, ucb, dsize);
++              uretprobe_perf_func(tu, func, regs, ucb);
+ #endif
+       uprobe_buffer_put(ucb);
+       return 0;
+-- 
+2.43.0
+
diff --git a/queue-6.6/uprobes-prepare-uprobe-args-buffer-lazily.patch b/queue-6.6/uprobes-prepare-uprobe-args-buffer-lazily.patch
new file mode 100644 (file)
index 0000000..4754b2d
--- /dev/null
@@ -0,0 +1,265 @@
+From 703f087efdd94cd0e987efed2649e6fe7262356f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Mar 2024 11:17:27 -0700
+Subject: uprobes: prepare uprobe args buffer lazily
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Andrii Nakryiko <andrii@kernel.org>
+
+[ Upstream commit 1b8f85defbc82e2eb8f27c5f6060ea507ad4d5a3 ]
+
+uprobe_cpu_buffer and corresponding logic to store uprobe args into it
+are used for uprobes/uretprobes that are created through tracefs or
+perf events.
+
+BPF is yet another user of uprobe/uretprobe infrastructure, but doesn't
+need uprobe_cpu_buffer and associated data. For BPF-only use cases this
+buffer handling and preparation is a pure overhead. At the same time,
+BPF-only uprobe/uretprobe usage is very common in practice. Also, for
+a lot of cases applications are very senstivie to performance overheads,
+as they might be tracing a very high frequency functions like
+malloc()/free(), so every bit of performance improvement matters.
+
+All that is to say that this uprobe_cpu_buffer preparation is an
+unnecessary overhead that each BPF user of uprobes/uretprobe has to pay.
+This patch is changing this by making uprobe_cpu_buffer preparation
+optional. It will happen only if either tracefs-based or perf event-based
+uprobe/uretprobe consumer is registered for given uprobe/uretprobe. For
+BPF-only use cases this step will be skipped.
+
+We used uprobe/uretprobe benchmark which is part of BPF selftests (see [0])
+to estimate the improvements. We have 3 uprobe and 3 uretprobe
+scenarios, which vary an instruction that is replaced by uprobe: nop
+(fastest uprobe case), `push rbp` (typical case), and non-simulated
+`ret` instruction (slowest case). Benchmark thread is constantly calling
+user space function in a tight loop. User space function has attached
+BPF uprobe or uretprobe program doing nothing but atomic counter
+increments to count number of triggering calls. Benchmark emits
+throughput in millions of executions per second.
+
+BEFORE these changes
+====================
+uprobe-nop     :    2.657 ± 0.024M/s
+uprobe-push    :    2.499 ± 0.018M/s
+uprobe-ret     :    1.100 ± 0.006M/s
+uretprobe-nop  :    1.356 ± 0.004M/s
+uretprobe-push :    1.317 ± 0.019M/s
+uretprobe-ret  :    0.785 ± 0.007M/s
+
+AFTER these changes
+===================
+uprobe-nop     :    2.732 ± 0.022M/s (+2.8%)
+uprobe-push    :    2.621 ± 0.016M/s (+4.9%)
+uprobe-ret     :    1.105 ± 0.007M/s (+0.5%)
+uretprobe-nop  :    1.396 ± 0.007M/s (+2.9%)
+uretprobe-push :    1.347 ± 0.008M/s (+2.3%)
+uretprobe-ret  :    0.800 ± 0.006M/s (+1.9)
+
+So the improvements on this particular machine seems to be between 2% and 5%.
+
+  [0] https://github.com/torvalds/linux/blob/master/tools/testing/selftests/bpf/benchs/bench_trigger.c
+
+Reviewed-by: Jiri Olsa <jolsa@kernel.org>
+Link: https://lore.kernel.org/all/20240318181728.2795838-3-andrii@kernel.org/
+
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Stable-dep-of: 373b9338c972 ("uprobe: avoid out-of-bounds memory access of fetching args")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace_uprobe.c | 49 +++++++++++++++++++++----------------
+ 1 file changed, 28 insertions(+), 21 deletions(-)
+
+diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
+index 58506c9632eae..6c2ab0e316d6a 100644
+--- a/kernel/trace/trace_uprobe.c
++++ b/kernel/trace/trace_uprobe.c
+@@ -941,15 +941,21 @@ static struct uprobe_cpu_buffer *uprobe_buffer_get(void)
+ static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb)
+ {
++      if (!ucb)
++              return;
+       mutex_unlock(&ucb->mutex);
+ }
+ static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu,
+-                                                     struct pt_regs *regs)
++                                                     struct pt_regs *regs,
++                                                     struct uprobe_cpu_buffer **ucbp)
+ {
+       struct uprobe_cpu_buffer *ucb;
+       int dsize, esize;
++      if (*ucbp)
++              return *ucbp;
++
+       esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
+       dsize = __get_data_size(&tu->tp, regs, NULL);
+@@ -958,22 +964,25 @@ static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu,
+       store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize);
++      *ucbp = ucb;
+       return ucb;
+ }
+ static void __uprobe_trace_func(struct trace_uprobe *tu,
+                               unsigned long func, struct pt_regs *regs,
+-                              struct uprobe_cpu_buffer *ucb,
++                              struct uprobe_cpu_buffer **ucbp,
+                               struct trace_event_file *trace_file)
+ {
+       struct uprobe_trace_entry_head *entry;
+       struct trace_event_buffer fbuffer;
++      struct uprobe_cpu_buffer *ucb;
+       void *data;
+       int size, esize;
+       struct trace_event_call *call = trace_probe_event_call(&tu->tp);
+       WARN_ON(call != trace_file->event_call);
++      ucb = prepare_uprobe_buffer(tu, regs, ucbp);
+       if (WARN_ON_ONCE(ucb->dsize > PAGE_SIZE))
+               return;
+@@ -1002,7 +1011,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
+ /* uprobe handler */
+ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs,
+-                           struct uprobe_cpu_buffer *ucb)
++                           struct uprobe_cpu_buffer **ucbp)
+ {
+       struct event_file_link *link;
+@@ -1011,7 +1020,7 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs,
+       rcu_read_lock();
+       trace_probe_for_each_link_rcu(link, &tu->tp)
+-              __uprobe_trace_func(tu, 0, regs, ucb, link->file);
++              __uprobe_trace_func(tu, 0, regs, ucbp, link->file);
+       rcu_read_unlock();
+       return 0;
+@@ -1019,13 +1028,13 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs,
+ static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
+                                struct pt_regs *regs,
+-                               struct uprobe_cpu_buffer *ucb)
++                               struct uprobe_cpu_buffer **ucbp)
+ {
+       struct event_file_link *link;
+       rcu_read_lock();
+       trace_probe_for_each_link_rcu(link, &tu->tp)
+-              __uprobe_trace_func(tu, func, regs, ucb, link->file);
++              __uprobe_trace_func(tu, func, regs, ucbp, link->file);
+       rcu_read_unlock();
+ }
+@@ -1353,10 +1362,11 @@ static bool uprobe_perf_filter(struct uprobe_consumer *uc,
+ static void __uprobe_perf_func(struct trace_uprobe *tu,
+                              unsigned long func, struct pt_regs *regs,
+-                             struct uprobe_cpu_buffer *ucb)
++                             struct uprobe_cpu_buffer **ucbp)
+ {
+       struct trace_event_call *call = trace_probe_event_call(&tu->tp);
+       struct uprobe_trace_entry_head *entry;
++      struct uprobe_cpu_buffer *ucb;
+       struct hlist_head *head;
+       void *data;
+       int size, esize;
+@@ -1374,6 +1384,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
+       esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
++      ucb = prepare_uprobe_buffer(tu, regs, ucbp);
+       size = esize + ucb->dsize;
+       size = ALIGN(size + sizeof(u32), sizeof(u64)) - sizeof(u32);
+       if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
+@@ -1410,21 +1421,21 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
+ /* uprobe profile handler */
+ static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs,
+-                          struct uprobe_cpu_buffer *ucb)
++                          struct uprobe_cpu_buffer **ucbp)
+ {
+       if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
+               return UPROBE_HANDLER_REMOVE;
+       if (!is_ret_probe(tu))
+-              __uprobe_perf_func(tu, 0, regs, ucb);
++              __uprobe_perf_func(tu, 0, regs, ucbp);
+       return 0;
+ }
+ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
+                               struct pt_regs *regs,
+-                              struct uprobe_cpu_buffer *ucb)
++                              struct uprobe_cpu_buffer **ucbp)
+ {
+-      __uprobe_perf_func(tu, func, regs, ucb);
++      __uprobe_perf_func(tu, func, regs, ucbp);
+ }
+ int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
+@@ -1489,7 +1500,7 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
+ {
+       struct trace_uprobe *tu;
+       struct uprobe_dispatch_data udd;
+-      struct uprobe_cpu_buffer *ucb;
++      struct uprobe_cpu_buffer *ucb = NULL;
+       int ret = 0;
+       tu = container_of(con, struct trace_uprobe, consumer);
+@@ -1503,14 +1514,12 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
+       if (WARN_ON_ONCE(!uprobe_cpu_buffer))
+               return 0;
+-      ucb = prepare_uprobe_buffer(tu, regs);
+-
+       if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE))
+-              ret |= uprobe_trace_func(tu, regs, ucb);
++              ret |= uprobe_trace_func(tu, regs, &ucb);
+ #ifdef CONFIG_PERF_EVENTS
+       if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE))
+-              ret |= uprobe_perf_func(tu, regs, ucb);
++              ret |= uprobe_perf_func(tu, regs, &ucb);
+ #endif
+       uprobe_buffer_put(ucb);
+       return ret;
+@@ -1521,7 +1530,7 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con,
+ {
+       struct trace_uprobe *tu;
+       struct uprobe_dispatch_data udd;
+-      struct uprobe_cpu_buffer *ucb;
++      struct uprobe_cpu_buffer *ucb = NULL;
+       tu = container_of(con, struct trace_uprobe, consumer);
+@@ -1533,14 +1542,12 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con,
+       if (WARN_ON_ONCE(!uprobe_cpu_buffer))
+               return 0;
+-      ucb = prepare_uprobe_buffer(tu, regs);
+-
+       if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE))
+-              uretprobe_trace_func(tu, func, regs, ucb);
++              uretprobe_trace_func(tu, func, regs, &ucb);
+ #ifdef CONFIG_PERF_EVENTS
+       if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE))
+-              uretprobe_perf_func(tu, func, regs, ucb);
++              uretprobe_perf_func(tu, func, regs, &ucb);
+ #endif
+       uprobe_buffer_put(ucb);
+       return 0;
+-- 
+2.43.0
+
diff --git a/queue-6.6/uprobes-prevent-mutex_lock-under-rcu_read_lock.patch b/queue-6.6/uprobes-prevent-mutex_lock-under-rcu_read_lock.patch
new file mode 100644 (file)
index 0000000..ce04c46
--- /dev/null
@@ -0,0 +1,131 @@
+From c1d6e576464468471dee4c839ac22f4dbd9af8b9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 May 2024 22:30:17 -0700
+Subject: uprobes: prevent mutex_lock() under rcu_read_lock()
+
+From: Andrii Nakryiko <andrii@kernel.org>
+
+[ Upstream commit 699646734ab51bf5b1cd4a7a30c20074f6e74f6e ]
+
+Recent changes made uprobe_cpu_buffer preparation lazy, and moved it
+deeper into __uprobe_trace_func(). This is problematic because
+__uprobe_trace_func() is called inside rcu_read_lock()/rcu_read_unlock()
+block, which then calls prepare_uprobe_buffer() -> uprobe_buffer_get() ->
+mutex_lock(&ucb->mutex), leading to a splat about using mutex under
+non-sleepable RCU:
+
+  BUG: sleeping function called from invalid context at kernel/locking/mutex.c:585
+   in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 98231, name: stress-ng-sigq
+   preempt_count: 0, expected: 0
+   RCU nest depth: 1, expected: 0
+   ...
+   Call Trace:
+    <TASK>
+    dump_stack_lvl+0x3d/0xe0
+    __might_resched+0x24c/0x270
+    ? prepare_uprobe_buffer+0xd5/0x1d0
+    __mutex_lock+0x41/0x820
+    ? ___perf_sw_event+0x206/0x290
+    ? __perf_event_task_sched_in+0x54/0x660
+    ? __perf_event_task_sched_in+0x54/0x660
+    prepare_uprobe_buffer+0xd5/0x1d0
+    __uprobe_trace_func+0x4a/0x140
+    uprobe_dispatcher+0x135/0x280
+    ? uprobe_dispatcher+0x94/0x280
+    uprobe_notify_resume+0x650/0xec0
+    ? atomic_notifier_call_chain+0x21/0x110
+    ? atomic_notifier_call_chain+0xf8/0x110
+    irqentry_exit_to_user_mode+0xe2/0x1e0
+    asm_exc_int3+0x35/0x40
+   RIP: 0033:0x7f7e1d4da390
+   Code: 33 04 00 0f 1f 80 00 00 00 00 f3 0f 1e fa b9 01 00 00 00 e9 b2 fc ff ff 66 90 f3 0f 1e fa 31 c9 e9 a5 fc ff ff 0f 1f 44 00 00 <cc> 0f 1e fa b8 27 00 00 00 0f 05 c3 0f 1f 40 00 f3 0f 1e fa b8 6e
+   RSP: 002b:00007ffd2abc3608 EFLAGS: 00000246
+   RAX: 0000000000000000 RBX: 0000000076d325f1 RCX: 0000000000000000
+   RDX: 0000000076d325f1 RSI: 000000000000000a RDI: 00007ffd2abc3690
+   RBP: 000000000000000a R08: 00017fb700000000 R09: 00017fb700000000
+   R10: 00017fb700000000 R11: 0000000000000246 R12: 0000000000017ff2
+   R13: 00007ffd2abc3610 R14: 0000000000000000 R15: 00007ffd2abc3780
+    </TASK>
+
+Luckily, it's easy to fix by moving prepare_uprobe_buffer() to be called
+slightly earlier: into uprobe_trace_func() and uretprobe_trace_func(), outside
+of RCU locked section. This still keeps this buffer preparation lazy and helps
+avoid the overhead when it's not needed. E.g., if there is only BPF uprobe
+handler installed on a given uprobe, buffer won't be initialized.
+
+Note, the other user of prepare_uprobe_buffer(), __uprobe_perf_func(), is not
+affected, as it doesn't prepare buffer under RCU read lock.
+
+Link: https://lore.kernel.org/all/20240521053017.3708530-1-andrii@kernel.org/
+
+Fixes: 1b8f85defbc8 ("uprobes: prepare uprobe args buffer lazily")
+Reported-by: Breno Leitao <leitao@debian.org>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Stable-dep-of: 373b9338c972 ("uprobe: avoid out-of-bounds memory access of fetching args")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace_uprobe.c | 14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
+index 6c2ab0e316d6a..0d52588329b29 100644
+--- a/kernel/trace/trace_uprobe.c
++++ b/kernel/trace/trace_uprobe.c
+@@ -970,19 +970,17 @@ static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu,
+ static void __uprobe_trace_func(struct trace_uprobe *tu,
+                               unsigned long func, struct pt_regs *regs,
+-                              struct uprobe_cpu_buffer **ucbp,
++                              struct uprobe_cpu_buffer *ucb,
+                               struct trace_event_file *trace_file)
+ {
+       struct uprobe_trace_entry_head *entry;
+       struct trace_event_buffer fbuffer;
+-      struct uprobe_cpu_buffer *ucb;
+       void *data;
+       int size, esize;
+       struct trace_event_call *call = trace_probe_event_call(&tu->tp);
+       WARN_ON(call != trace_file->event_call);
+-      ucb = prepare_uprobe_buffer(tu, regs, ucbp);
+       if (WARN_ON_ONCE(ucb->dsize > PAGE_SIZE))
+               return;
+@@ -1014,13 +1012,16 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs,
+                            struct uprobe_cpu_buffer **ucbp)
+ {
+       struct event_file_link *link;
++      struct uprobe_cpu_buffer *ucb;
+       if (is_ret_probe(tu))
+               return 0;
++      ucb = prepare_uprobe_buffer(tu, regs, ucbp);
++
+       rcu_read_lock();
+       trace_probe_for_each_link_rcu(link, &tu->tp)
+-              __uprobe_trace_func(tu, 0, regs, ucbp, link->file);
++              __uprobe_trace_func(tu, 0, regs, ucb, link->file);
+       rcu_read_unlock();
+       return 0;
+@@ -1031,10 +1032,13 @@ static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
+                                struct uprobe_cpu_buffer **ucbp)
+ {
+       struct event_file_link *link;
++      struct uprobe_cpu_buffer *ucb;
++
++      ucb = prepare_uprobe_buffer(tu, regs, ucbp);
+       rcu_read_lock();
+       trace_probe_for_each_link_rcu(link, &tu->tp)
+-              __uprobe_trace_func(tu, func, regs, ucbp, link->file);
++              __uprobe_trace_func(tu, func, regs, ucb, link->file);
+       rcu_read_unlock();
+ }
+-- 
+2.43.0
+