]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.10
authorSasha Levin <sashal@kernel.org>
Thu, 3 Mar 2022 21:33:05 +0000 (16:33 -0500)
committerSasha Levin <sashal@kernel.org>
Thu, 3 Mar 2022 21:33:05 +0000 (16:33 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-5.10/ata-pata_hpt37x-fix-pci-clock-detection.patch [new file with mode: 0644]
queue-5.10/drm-amdgpu-check-vm-ready-by-amdgpu_vm-evicting-flag.patch [new file with mode: 0644]
queue-5.10/exfat-fix-i_blocks-for-files-truncated-over-4-gib.patch [new file with mode: 0644]
queue-5.10/exfat-reuse-exfat_inode_info-variable-instead-of-cal.patch [new file with mode: 0644]
queue-5.10/serial-stm32-prevent-tdr-register-overwrite-when-sen.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/tracing-add-test-for-user-space-strings-when-filteri.patch [new file with mode: 0644]
queue-5.10/tracing-add-ustring-operation-to-filtering-string-po.patch [new file with mode: 0644]

diff --git a/queue-5.10/ata-pata_hpt37x-fix-pci-clock-detection.patch b/queue-5.10/ata-pata_hpt37x-fix-pci-clock-detection.patch
new file mode 100644 (file)
index 0000000..e208240
--- /dev/null
@@ -0,0 +1,45 @@
+From 6264ac59d8e7772085817151fda5ca98ce01448f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 19 Feb 2022 23:04:29 +0300
+Subject: ata: pata_hpt37x: fix PCI clock detection
+
+From: Sergey Shtylyov <s.shtylyov@omp.ru>
+
+[ Upstream commit 5f6b0f2d037c8864f20ff15311c695f65eb09db5 ]
+
+The f_CNT register (at the PCI config. address 0x78) is 16-bit, not
+8-bit! The bug was there from the very start... :-(
+
+Signed-off-by: Sergey Shtylyov <s.shtylyov@omp.ru>
+Fixes: 669a5db411d8 ("[libata] Add a bunch of PATA drivers.")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ata/pata_hpt37x.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c
+index 499a947d56ddb..fef46de2f6b23 100644
+--- a/drivers/ata/pata_hpt37x.c
++++ b/drivers/ata/pata_hpt37x.c
+@@ -962,14 +962,14 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
+       if ((freq >> 12) != 0xABCDE) {
+               int i;
+-              u8 sr;
++              u16 sr;
+               u32 total = 0;
+               pr_warn("BIOS has not set timing clocks\n");
+               /* This is the process the HPT371 BIOS is reported to use */
+               for (i = 0; i < 128; i++) {
+-                      pci_read_config_byte(dev, 0x78, &sr);
++                      pci_read_config_word(dev, 0x78, &sr);
+                       total += sr & 0x1FF;
+                       udelay(15);
+               }
+-- 
+2.34.1
+
diff --git a/queue-5.10/drm-amdgpu-check-vm-ready-by-amdgpu_vm-evicting-flag.patch b/queue-5.10/drm-amdgpu-check-vm-ready-by-amdgpu_vm-evicting-flag.patch
new file mode 100644 (file)
index 0000000..8e6daec
--- /dev/null
@@ -0,0 +1,83 @@
+From a6d60d77200808719ffb6765923ba8e909ae847e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Feb 2022 17:53:56 +0800
+Subject: drm/amdgpu: check vm ready by amdgpu_vm->evicting flag
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Qiang Yu <qiang.yu@amd.com>
+
+[ Upstream commit c1a66c3bc425ff93774fb2f6eefa67b83170dd7e ]
+
+Workstation application ANSA/META v21.1.4 get this error dmesg when
+running CI test suite provided by ANSA/META:
+[drm:amdgpu_gem_va_ioctl [amdgpu]] *ERROR* Couldn't update BO_VA (-16)
+
+This is caused by:
+1. create a 256MB buffer in invisible VRAM
+2. CPU map the buffer and access it causes vm_fault and try to move
+   it to visible VRAM
+3. force visible VRAM space and traverse all VRAM bos to check if
+   evicting this bo is valuable
+4. when checking a VM bo (in invisible VRAM), amdgpu_vm_evictable()
+   will set amdgpu_vm->evicting, but latter due to not in visible
+   VRAM, won't really evict it so not add it to amdgpu_vm->evicted
+5. before next CS to clear the amdgpu_vm->evicting, user VM ops
+   ioctl will pass amdgpu_vm_ready() (check amdgpu_vm->evicted)
+   but fail in amdgpu_vm_bo_update_mapping() (check
+   amdgpu_vm->evicting) and get this error log
+
+This error won't affect functionality as next CS will finish the
+waiting VM ops. But we'd better clear the error log by checking
+the amdgpu_vm->evicting flag in amdgpu_vm_ready() to stop calling
+amdgpu_vm_bo_update_mapping() later.
+
+Another reason is amdgpu_vm->evicted list holds all BOs (both
+user buffer and page table), but only page table BOs' eviction
+prevent VM ops. amdgpu_vm->evicting flag is set only for page
+table BOs, so we should use evicting flag instead of evicted list
+in amdgpu_vm_ready().
+
+The side effect of this change is: previously blocked VM op (user
+buffer in "evicted" list but no page table in it) gets done
+immediately.
+
+v2: update commit comments.
+
+Acked-by: Paul Menzel <pmenzel@molgen.mpg.de>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Qiang Yu <qiang.yu@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+index b47829ff30af7..47cc038d7d506 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+@@ -715,11 +715,16 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+  * Check if all VM PDs/PTs are ready for updates
+  *
+  * Returns:
+- * True if eviction list is empty.
++ * True if VM is not evicting.
+  */
+ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
+ {
+-      return list_empty(&vm->evicted);
++      bool ret;
++
++      amdgpu_vm_eviction_lock(vm);
++      ret = !vm->evicting;
++      amdgpu_vm_eviction_unlock(vm);
++      return ret;
+ }
+ /**
+-- 
+2.34.1
+
diff --git a/queue-5.10/exfat-fix-i_blocks-for-files-truncated-over-4-gib.patch b/queue-5.10/exfat-fix-i_blocks-for-files-truncated-over-4-gib.patch
new file mode 100644 (file)
index 0000000..b458929
--- /dev/null
@@ -0,0 +1,81 @@
+From 469b7cc76f58da21387f6f87be427d7da78b1e65 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Nov 2021 22:02:37 +0900
+Subject: exfat: fix i_blocks for files truncated over 4 GiB
+
+From: Christophe Vu-Brugier <christophe.vu-brugier@seagate.com>
+
+[ Upstream commit 92fba084b79e6bc7b12fc118209f1922c1a2df56 ]
+
+In exfat_truncate(), the computation of inode->i_blocks is wrong if
+the file is larger than 4 GiB because a 32-bit variable is used as a
+mask. This is fixed and simplified by using round_up().
+
+Also fix the same buggy computation in exfat_read_root() and another
+(correct) one in exfat_fill_inode(). The latter was fixed another way
+last month but can be simplified by using round_up() as well. See:
+
+  commit 0c336d6e33f4 ("exfat: fix incorrect loading of i_blocks for
+                        large files")
+
+Fixes: 98d917047e8b ("exfat: add file operations")
+Cc: stable@vger.kernel.org # v5.7+
+Suggested-by: Matthew Wilcox <willy@infradead.org>
+Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
+Signed-off-by: Christophe Vu-Brugier <christophe.vu-brugier@seagate.com>
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/exfat/file.c  | 4 ++--
+ fs/exfat/inode.c | 4 ++--
+ fs/exfat/super.c | 4 ++--
+ 3 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/fs/exfat/file.c b/fs/exfat/file.c
+index 6258c5da3060b..c819e8427ea57 100644
+--- a/fs/exfat/file.c
++++ b/fs/exfat/file.c
+@@ -250,8 +250,8 @@ void exfat_truncate(struct inode *inode, loff_t size)
+       else
+               mark_inode_dirty(inode);
+-      inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) &
+-                      ~(sbi->cluster_size - 1)) >> inode->i_blkbits;
++      inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >>
++                              inode->i_blkbits;
+ write_size:
+       aligned_size = i_size_read(inode);
+       if (aligned_size & (blocksize - 1)) {
+diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
+index d7f11b7ab46c5..2a9f6a80584ee 100644
+--- a/fs/exfat/inode.c
++++ b/fs/exfat/inode.c
+@@ -600,8 +600,8 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info)
+       exfat_save_attr(inode, info->attr);
+-      inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) &
+-              ~((loff_t)sbi->cluster_size - 1)) >> inode->i_blkbits;
++      inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >>
++                              inode->i_blkbits;
+       inode->i_mtime = info->mtime;
+       inode->i_ctime = info->mtime;
+       ei->i_crtime = info->crtime;
+diff --git a/fs/exfat/super.c b/fs/exfat/super.c
+index 7b91214a4110e..cd04c912f02e0 100644
+--- a/fs/exfat/super.c
++++ b/fs/exfat/super.c
+@@ -364,8 +364,8 @@ static int exfat_read_root(struct inode *inode)
+       inode->i_op = &exfat_dir_inode_operations;
+       inode->i_fop = &exfat_dir_operations;
+-      inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1))
+-                      & ~(sbi->cluster_size - 1)) >> inode->i_blkbits;
++      inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >>
++                              inode->i_blkbits;
+       ei->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff;
+       ei->i_size_aligned = i_size_read(inode);
+       ei->i_size_ondisk = i_size_read(inode);
+-- 
+2.34.1
+
diff --git a/queue-5.10/exfat-reuse-exfat_inode_info-variable-instead-of-cal.patch b/queue-5.10/exfat-reuse-exfat_inode_info-variable-instead-of-cal.patch
new file mode 100644 (file)
index 0000000..70a3899
--- /dev/null
@@ -0,0 +1,134 @@
+From 763c459f8926db81943c34d6868f71990823ab93 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Nov 2021 22:23:58 +0100
+Subject: exfat: reuse exfat_inode_info variable instead of calling EXFAT_I()
+
+From: Christophe Vu-Brugier <christophe.vu-brugier@seagate.com>
+
+[ Upstream commit 7dee6f57d7f22a89dd214518c778aec448270d4c ]
+
+Also add a local "struct exfat_inode_info *ei" variable to
+exfat_truncate() to simplify the code.
+
+Signed-off-by: Christophe Vu-Brugier <christophe.vu-brugier@seagate.com>
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/exfat/file.c  | 14 +++++++-------
+ fs/exfat/inode.c |  9 ++++-----
+ fs/exfat/namei.c |  6 +++---
+ fs/exfat/super.c |  6 +++---
+ 4 files changed, 17 insertions(+), 18 deletions(-)
+
+diff --git a/fs/exfat/file.c b/fs/exfat/file.c
+index a92478eabfa4e..6258c5da3060b 100644
+--- a/fs/exfat/file.c
++++ b/fs/exfat/file.c
+@@ -109,8 +109,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size)
+       exfat_set_volume_dirty(sb);
+       num_clusters_new = EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi);
+-      num_clusters_phys =
+-              EXFAT_B_TO_CLU_ROUND_UP(EXFAT_I(inode)->i_size_ondisk, sbi);
++      num_clusters_phys = EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi);
+       exfat_chain_set(&clu, ei->start_clu, num_clusters_phys, ei->flags);
+@@ -227,12 +226,13 @@ void exfat_truncate(struct inode *inode, loff_t size)
+ {
+       struct super_block *sb = inode->i_sb;
+       struct exfat_sb_info *sbi = EXFAT_SB(sb);
++      struct exfat_inode_info *ei = EXFAT_I(inode);
+       unsigned int blocksize = i_blocksize(inode);
+       loff_t aligned_size;
+       int err;
+       mutex_lock(&sbi->s_lock);
+-      if (EXFAT_I(inode)->start_clu == 0) {
++      if (ei->start_clu == 0) {
+               /*
+                * Empty start_clu != ~0 (not allocated)
+                */
+@@ -259,11 +259,11 @@ void exfat_truncate(struct inode *inode, loff_t size)
+               aligned_size++;
+       }
+-      if (EXFAT_I(inode)->i_size_ondisk > i_size_read(inode))
+-              EXFAT_I(inode)->i_size_ondisk = aligned_size;
++      if (ei->i_size_ondisk > i_size_read(inode))
++              ei->i_size_ondisk = aligned_size;
+-      if (EXFAT_I(inode)->i_size_aligned > i_size_read(inode))
+-              EXFAT_I(inode)->i_size_aligned = aligned_size;
++      if (ei->i_size_aligned > i_size_read(inode))
++              ei->i_size_aligned = aligned_size;
+       mutex_unlock(&sbi->s_lock);
+ }
+diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
+index 8b0288f70e93d..d7f11b7ab46c5 100644
+--- a/fs/exfat/inode.c
++++ b/fs/exfat/inode.c
+@@ -114,10 +114,9 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
+       unsigned int local_clu_offset = clu_offset;
+       unsigned int num_to_be_allocated = 0, num_clusters = 0;
+-      if (EXFAT_I(inode)->i_size_ondisk > 0)
++      if (ei->i_size_ondisk > 0)
+               num_clusters =
+-                      EXFAT_B_TO_CLU_ROUND_UP(EXFAT_I(inode)->i_size_ondisk,
+-                      sbi);
++                      EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi);
+       if (clu_offset >= num_clusters)
+               num_to_be_allocated = clu_offset - num_clusters + 1;
+@@ -415,10 +414,10 @@ static int exfat_write_end(struct file *file, struct address_space *mapping,
+       err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
+-      if (EXFAT_I(inode)->i_size_aligned < i_size_read(inode)) {
++      if (ei->i_size_aligned < i_size_read(inode)) {
+               exfat_fs_error(inode->i_sb,
+                       "invalid size(size(%llu) > aligned(%llu)\n",
+-                      i_size_read(inode), EXFAT_I(inode)->i_size_aligned);
++                      i_size_read(inode), ei->i_size_aligned);
+               return -EIO;
+       }
+diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
+index 2932b23a3b6c3..935f600509009 100644
+--- a/fs/exfat/namei.c
++++ b/fs/exfat/namei.c
+@@ -395,9 +395,9 @@ static int exfat_find_empty_entry(struct inode *inode,
+               /* directory inode should be updated in here */
+               i_size_write(inode, size);
+-              EXFAT_I(inode)->i_size_ondisk += sbi->cluster_size;
+-              EXFAT_I(inode)->i_size_aligned += sbi->cluster_size;
+-              EXFAT_I(inode)->flags = p_dir->flags;
++              ei->i_size_ondisk += sbi->cluster_size;
++              ei->i_size_aligned += sbi->cluster_size;
++              ei->flags = p_dir->flags;
+               inode->i_blocks += 1 << sbi->sect_per_clus_bits;
+       }
+diff --git a/fs/exfat/super.c b/fs/exfat/super.c
+index c6d8d2e534865..7b91214a4110e 100644
+--- a/fs/exfat/super.c
++++ b/fs/exfat/super.c
+@@ -366,9 +366,9 @@ static int exfat_read_root(struct inode *inode)
+       inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1))
+                       & ~(sbi->cluster_size - 1)) >> inode->i_blkbits;
+-      EXFAT_I(inode)->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff;
+-      EXFAT_I(inode)->i_size_aligned = i_size_read(inode);
+-      EXFAT_I(inode)->i_size_ondisk = i_size_read(inode);
++      ei->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff;
++      ei->i_size_aligned = i_size_read(inode);
++      ei->i_size_ondisk = i_size_read(inode);
+       exfat_save_attr(inode, ATTR_SUBDIR);
+       inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
+-- 
+2.34.1
+
diff --git a/queue-5.10/serial-stm32-prevent-tdr-register-overwrite-when-sen.patch b/queue-5.10/serial-stm32-prevent-tdr-register-overwrite-when-sen.patch
new file mode 100644 (file)
index 0000000..b95a4a2
--- /dev/null
@@ -0,0 +1,57 @@
+From 0bdd10a0970a47105a5f77da57df07d6a26d58e0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Jan 2022 17:44:40 +0100
+Subject: serial: stm32: prevent TDR register overwrite when sending x_char
+
+From: Valentin Caron <valentin.caron@foss.st.com>
+
+[ Upstream commit d3d079bde07e1b7deaeb57506dc0b86010121d17 ]
+
+When sending x_char in stm32_usart_transmit_chars(), driver can overwrite
+the value of TDR register by the value of x_char. If this happens, the
+previous value that was present in TDR register will not be sent through
+uart.
+
+This code checks if the previous value in TDR register is sent before
+writing the x_char value into register.
+
+Fixes: 48a6092fb41f ("serial: stm32-usart: Add STM32 USART Driver")
+Cc: stable <stable@vger.kernel.org>
+Signed-off-by: Valentin Caron <valentin.caron@foss.st.com>
+Link: https://lore.kernel.org/r/20220111164441.6178-2-valentin.caron@foss.st.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/stm32-usart.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
+index 0eadf0547175c..6afae051ba8d1 100644
+--- a/drivers/tty/serial/stm32-usart.c
++++ b/drivers/tty/serial/stm32-usart.c
+@@ -420,10 +420,22 @@ static void stm32_usart_transmit_chars(struct uart_port *port)
+       struct stm32_port *stm32_port = to_stm32_port(port);
+       const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+       struct circ_buf *xmit = &port->state->xmit;
++      u32 isr;
++      int ret;
+       if (port->x_char) {
+               if (stm32_port->tx_dma_busy)
+                       stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_DMAT);
++
++              /* Check that TDR is empty before filling FIFO */
++              ret =
++              readl_relaxed_poll_timeout_atomic(port->membase + ofs->isr,
++                                                isr,
++                                                (isr & USART_SR_TXE),
++                                                10, 1000);
++              if (ret)
++                      dev_warn(port->dev, "1 character may be erased\n");
++
+               writel_relaxed(port->x_char, port->membase + ofs->tdr);
+               port->x_char = 0;
+               port->icount.tx++;
+-- 
+2.34.1
+
index 5c25117290cb6c5d587c5bdcd7b7fe0f0b5e66bf..261d4350a853fc3f051d7ddb683c9a26d6ae74bf 100644 (file)
@@ -15,3 +15,10 @@ i2c-qup-allow-compile_test.patch
 net-usb-cdc_mbim-avoid-altsetting-toggling-for-telit.patch
 usb-gadget-don-t-release-an-existing-dev-buf.patch
 usb-gadget-clear-related-members-when-goto-fail.patch
+exfat-reuse-exfat_inode_info-variable-instead-of-cal.patch
+exfat-fix-i_blocks-for-files-truncated-over-4-gib.patch
+tracing-add-test-for-user-space-strings-when-filteri.patch
+serial-stm32-prevent-tdr-register-overwrite-when-sen.patch
+ata-pata_hpt37x-fix-pci-clock-detection.patch
+drm-amdgpu-check-vm-ready-by-amdgpu_vm-evicting-flag.patch
+tracing-add-ustring-operation-to-filtering-string-po.patch
diff --git a/queue-5.10/tracing-add-test-for-user-space-strings-when-filteri.patch b/queue-5.10/tracing-add-test-for-user-space-strings-when-filteri.patch
new file mode 100644 (file)
index 0000000..645c89b
--- /dev/null
@@ -0,0 +1,217 @@
+From c8a49e25979081b12e10347d8f0e23d16a1fabd4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Jan 2022 11:55:32 -0500
+Subject: tracing: Add test for user space strings when filtering on string
+ pointers
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+[ Upstream commit 77360f9bbc7e5e2ab7a2c8b4c0244fbbfcfc6f62 ]
+
+Pingfan reported that the following causes a fault:
+
+  echo "filename ~ \"cpu\"" > events/syscalls/sys_enter_openat/filter
+  echo 1 > events/syscalls/sys_enter_at/enable
+
+The reason is that trace event filter treats the user space pointer
+defined by "filename" as a normal pointer to compare against the "cpu"
+string. The following bug happened:
+
+ kvm-03-guest16 login: [72198.026181] BUG: unable to handle page fault for address: 00007fffaae8ef60
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0001) - permissions violation
+ PGD 80000001008b7067 P4D 80000001008b7067 PUD 2393f1067 PMD 2393ec067 PTE 8000000108f47867
+ Oops: 0001 [#1] PREEMPT SMP PTI
+ CPU: 1 PID: 1 Comm: systemd Kdump: loaded Not tainted 5.14.0-32.el9.x86_64 #1
+ Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
+ RIP: 0010:strlen+0x0/0x20
+ Code: 48 89 f9 74 09 48 83 c1 01 80 39 00 75 f7 31 d2 44 0f b6 04 16 44 88 04 11
+       48 83 c2 01 45 84 c0 75 ee c3 0f 1f 80 00 00 00 00 <80> 3f 00 74 10 48 89 f8
+       48 83 c0 01 80 38 00 75 f7 48 29 f8 c3 31
+ RSP: 0018:ffffb5b900013e48 EFLAGS: 00010246
+ RAX: 0000000000000018 RBX: ffff8fc1c49ede00 RCX: 0000000000000000
+ RDX: 0000000000000020 RSI: ffff8fc1c02d601c RDI: 00007fffaae8ef60
+ RBP: 00007fffaae8ef60 R08: 0005034f4ddb8ea4 R09: 0000000000000000
+ R10: ffff8fc1c02d601c R11: 0000000000000000 R12: ffff8fc1c8a6e380
+ R13: 0000000000000000 R14: ffff8fc1c02d6010 R15: ffff8fc1c00453c0
+ FS:  00007fa86123db40(0000) GS:ffff8fc2ffd00000(0000) knlGS:0000000000000000
+ CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 00007fffaae8ef60 CR3: 0000000102880001 CR4: 00000000007706e0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ PKRU: 55555554
+ Call Trace:
+  filter_pred_pchar+0x18/0x40
+  filter_match_preds+0x31/0x70
+  ftrace_syscall_enter+0x27a/0x2c0
+  syscall_trace_enter.constprop.0+0x1aa/0x1d0
+  do_syscall_64+0x16/0x90
+  entry_SYSCALL_64_after_hwframe+0x44/0xae
+ RIP: 0033:0x7fa861d88664
+
+The above happened because the kernel tried to access user space directly
+and triggered a "supervisor read access in kernel mode" fault. Worse yet,
+the memory could not even be loaded yet, and a SEGFAULT could happen as
+well. This could be true for kernel space accessing as well.
+
+To be even more robust, test both kernel and user space strings. If the
+string fails to read, then simply have the filter fail.
+
+Note, TASK_SIZE is used to determine if the pointer is user or kernel space
+and the appropriate strncpy_from_kernel/user_nofault() function is used to
+copy the memory. For some architectures, the compare to TASK_SIZE may always
+pick user space or kernel space. If it gets it wrong, the only thing is that
+the filter will fail to match. In the future, this needs to be fixed to have
+the event denote which should be used. But failing a filter is much better
+than panicing the machine, and that can be solved later.
+
+Link: https://lore.kernel.org/all/20220107044951.22080-1-kernelfans@gmail.com/
+Link: https://lkml.kernel.org/r/20220110115532.536088fd@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Tom Zanussi <zanussi@kernel.org>
+Reported-by: Pingfan Liu <kernelfans@gmail.com>
+Tested-by: Pingfan Liu <kernelfans@gmail.com>
+Fixes: 87a342f5db69d ("tracing/filters: Support filtering for char * strings")
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/trace/events.rst     | 10 +++++
+ kernel/trace/trace_events_filter.c | 66 ++++++++++++++++++++++++++++--
+ 2 files changed, 73 insertions(+), 3 deletions(-)
+
+diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
+index 2a5aa48eff6c7..58a471b690e07 100644
+--- a/Documentation/trace/events.rst
++++ b/Documentation/trace/events.rst
+@@ -230,6 +230,16 @@ Currently the caret ('^') for an error always appears at the beginning of
+ the filter string; the error message should still be useful though
+ even without more accurate position info.
++5.2.1 Filter limitations
++------------------------
++
++If a filter is placed on a string pointer ``(char *)`` that does not point
++to a string on the ring buffer, but instead points to kernel or user space
++memory, then, for safety reasons, at most 1024 bytes of the content is
++copied onto a temporary buffer to do the compare. If the copy of the memory
++faults (the pointer points to memory that should not be accessed), then the
++string compare will be treated as not matching.
++
+ 5.3 Clearing filters
+ --------------------
+diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
+index 78a678eeb1409..7b1fb811cb0ab 100644
+--- a/kernel/trace/trace_events_filter.c
++++ b/kernel/trace/trace_events_filter.c
+@@ -5,6 +5,7 @@
+  * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
+  */
++#include <linux/uaccess.h>
+ #include <linux/module.h>
+ #include <linux/ctype.h>
+ #include <linux/mutex.h>
+@@ -654,6 +655,47 @@ DEFINE_EQUALITY_PRED(32);
+ DEFINE_EQUALITY_PRED(16);
+ DEFINE_EQUALITY_PRED(8);
++/* user space strings temp buffer */
++#define USTRING_BUF_SIZE      1024
++
++struct ustring_buffer {
++      char            buffer[USTRING_BUF_SIZE];
++};
++
++static __percpu struct ustring_buffer *ustring_per_cpu;
++
++static __always_inline char *test_string(char *str)
++{
++      struct ustring_buffer *ubuf;
++      char __user *ustr;
++      char *kstr;
++
++      if (!ustring_per_cpu)
++              return NULL;
++
++      ubuf = this_cpu_ptr(ustring_per_cpu);
++      kstr = ubuf->buffer;
++
++      /*
++       * We use TASK_SIZE to denote user or kernel space, but this will
++       * not work for all architectures. If it picks the wrong one, it may
++       * just fail the filter (but will not bug).
++       *
++       * TODO: Have a way to properly denote which one this is for.
++       */
++      if (likely((unsigned long)str >= TASK_SIZE)) {
++              /* For safety, do not trust the string pointer */
++              if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE))
++                      return NULL;
++      } else {
++              /* user space address? */
++              ustr = (char __user *)str;
++              if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE))
++                      return NULL;
++      }
++      return kstr;
++}
++
+ /* Filter predicate for fixed sized arrays of characters */
+ static int filter_pred_string(struct filter_pred *pred, void *event)
+ {
+@@ -671,10 +713,16 @@ static int filter_pred_string(struct filter_pred *pred, void *event)
+ static int filter_pred_pchar(struct filter_pred *pred, void *event)
+ {
+       char **addr = (char **)(event + pred->offset);
++      char *str;
+       int cmp, match;
+-      int len = strlen(*addr) + 1;    /* including tailing '\0' */
++      int len;
+-      cmp = pred->regex.match(*addr, &pred->regex, len);
++      str = test_string(*addr);
++      if (!str)
++              return 0;
++
++      len = strlen(str) + 1;  /* including tailing '\0' */
++      cmp = pred->regex.match(str, &pred->regex, len);
+       match = cmp ^ pred->not;
+@@ -1320,8 +1368,17 @@ static int parse_pred(const char *str, void *data,
+               } else if (field->filter_type == FILTER_DYN_STRING)
+                       pred->fn = filter_pred_strloc;
+-              else
++              else {
++
++                      if (!ustring_per_cpu) {
++                              /* Once allocated, keep it around for good */
++                              ustring_per_cpu = alloc_percpu(struct ustring_buffer);
++                              if (!ustring_per_cpu)
++                                      goto err_mem;
++                      }
++
+                       pred->fn = filter_pred_pchar;
++              }
+               /* go past the last quote */
+               i++;
+@@ -1387,6 +1444,9 @@ static int parse_pred(const char *str, void *data,
+ err_free:
+       kfree(pred);
+       return -EINVAL;
++err_mem:
++      kfree(pred);
++      return -ENOMEM;
+ }
+ enum {
+-- 
+2.34.1
+
diff --git a/queue-5.10/tracing-add-ustring-operation-to-filtering-string-po.patch b/queue-5.10/tracing-add-ustring-operation-to-filtering-string-po.patch
new file mode 100644 (file)
index 0000000..6f5df8a
--- /dev/null
@@ -0,0 +1,190 @@
+From 27df2542253ffb9db857900699a561eaaa754ad9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jan 2022 20:08:40 -0500
+Subject: tracing: Add ustring operation to filtering string pointers
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+[ Upstream commit f37c3bbc635994eda203a6da4ba0f9d05165a8d6 ]
+
+Since referencing user space pointers is special, if the user wants to
+filter on a field that is a pointer to user space, then they need to
+specify it.
+
+Add a ".ustring" attribute to the field name for filters to state that the
+field is pointing to user space such that the kernel can take the
+appropriate action to read that pointer.
+
+Link: https://lore.kernel.org/all/yt9d8rvmt2jq.fsf@linux.ibm.com/
+
+Fixes: 77360f9bbc7e ("tracing: Add test for user space strings when filtering on string pointers")
+Tested-by: Sven Schnelle <svens@linux.ibm.com>
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/trace/events.rst     |  9 ++++
+ kernel/trace/trace_events_filter.c | 81 +++++++++++++++++++++---------
+ 2 files changed, 66 insertions(+), 24 deletions(-)
+
+diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
+index 58a471b690e07..9df29a935757a 100644
+--- a/Documentation/trace/events.rst
++++ b/Documentation/trace/events.rst
+@@ -198,6 +198,15 @@ The glob (~) accepts a wild card character (\*,?) and character classes
+   prev_comm ~ "*sh*"
+   prev_comm ~ "ba*sh"
++If the field is a pointer that points into user space (for example
++"filename" from sys_enter_openat), then you have to append ".ustring" to the
++field name::
++
++  filename.ustring ~ "password"
++
++As the kernel will have to know how to retrieve the memory that the pointer
++is at from user space.
++
+ 5.2 Setting filters
+ -------------------
+diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
+index 7b1fb811cb0ab..a255ffbe342f3 100644
+--- a/kernel/trace/trace_events_filter.c
++++ b/kernel/trace/trace_events_filter.c
+@@ -665,6 +665,23 @@ struct ustring_buffer {
+ static __percpu struct ustring_buffer *ustring_per_cpu;
+ static __always_inline char *test_string(char *str)
++{
++      struct ustring_buffer *ubuf;
++      char *kstr;
++
++      if (!ustring_per_cpu)
++              return NULL;
++
++      ubuf = this_cpu_ptr(ustring_per_cpu);
++      kstr = ubuf->buffer;
++
++      /* For safety, do not trust the string pointer */
++      if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE))
++              return NULL;
++      return kstr;
++}
++
++static __always_inline char *test_ustring(char *str)
+ {
+       struct ustring_buffer *ubuf;
+       char __user *ustr;
+@@ -676,23 +693,11 @@ static __always_inline char *test_string(char *str)
+       ubuf = this_cpu_ptr(ustring_per_cpu);
+       kstr = ubuf->buffer;
+-      /*
+-       * We use TASK_SIZE to denote user or kernel space, but this will
+-       * not work for all architectures. If it picks the wrong one, it may
+-       * just fail the filter (but will not bug).
+-       *
+-       * TODO: Have a way to properly denote which one this is for.
+-       */
+-      if (likely((unsigned long)str >= TASK_SIZE)) {
+-              /* For safety, do not trust the string pointer */
+-              if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE))
+-                      return NULL;
+-      } else {
+-              /* user space address? */
+-              ustr = (char __user *)str;
+-              if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE))
+-                      return NULL;
+-      }
++      /* user space address? */
++      ustr = (char __user *)str;
++      if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE))
++              return NULL;
++
+       return kstr;
+ }
+@@ -709,24 +714,42 @@ static int filter_pred_string(struct filter_pred *pred, void *event)
+       return match;
+ }
++static __always_inline int filter_pchar(struct filter_pred *pred, char *str)
++{
++      int cmp, match;
++      int len;
++
++      len = strlen(str) + 1;  /* including tailing '\0' */
++      cmp = pred->regex.match(str, &pred->regex, len);
++
++      match = cmp ^ pred->not;
++
++      return match;
++}
+ /* Filter predicate for char * pointers */
+ static int filter_pred_pchar(struct filter_pred *pred, void *event)
+ {
+       char **addr = (char **)(event + pred->offset);
+       char *str;
+-      int cmp, match;
+-      int len;
+       str = test_string(*addr);
+       if (!str)
+               return 0;
+-      len = strlen(str) + 1;  /* including tailing '\0' */
+-      cmp = pred->regex.match(str, &pred->regex, len);
++      return filter_pchar(pred, str);
++}
+-      match = cmp ^ pred->not;
++/* Filter predicate for char * pointers in user space*/
++static int filter_pred_pchar_user(struct filter_pred *pred, void *event)
++{
++      char **addr = (char **)(event + pred->offset);
++      char *str;
+-      return match;
++      str = test_ustring(*addr);
++      if (!str)
++              return 0;
++
++      return filter_pchar(pred, str);
+ }
+ /*
+@@ -1206,6 +1229,7 @@ static int parse_pred(const char *str, void *data,
+       struct filter_pred *pred = NULL;
+       char num_buf[24];       /* Big enough to hold an address */
+       char *field_name;
++      bool ustring = false;
+       char q;
+       u64 val;
+       int len;
+@@ -1240,6 +1264,12 @@ static int parse_pred(const char *str, void *data,
+               return -EINVAL;
+       }
++      /* See if the field is a user space string */
++      if ((len = str_has_prefix(str + i, ".ustring"))) {
++              ustring = true;
++              i += len;
++      }
++
+       while (isspace(str[i]))
+               i++;
+@@ -1377,7 +1407,10 @@ static int parse_pred(const char *str, void *data,
+                                       goto err_mem;
+                       }
+-                      pred->fn = filter_pred_pchar;
++                      if (ustring)
++                              pred->fn = filter_pred_pchar_user;
++                      else
++                              pred->fn = filter_pred_pchar;
+               }
+               /* go past the last quote */
+               i++;
+-- 
+2.34.1
+