From: Sasha Levin Date: Thu, 3 Mar 2022 21:33:05 +0000 (-0500) Subject: Fixes for 5.10 X-Git-Tag: v4.9.305~97 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7f8ea1fb2db00d02d9db61e88f7939099c0e09f3;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.10 Signed-off-by: Sasha Levin --- diff --git a/queue-5.10/ata-pata_hpt37x-fix-pci-clock-detection.patch b/queue-5.10/ata-pata_hpt37x-fix-pci-clock-detection.patch new file mode 100644 index 00000000000..e2082408160 --- /dev/null +++ b/queue-5.10/ata-pata_hpt37x-fix-pci-clock-detection.patch @@ -0,0 +1,45 @@ +From 6264ac59d8e7772085817151fda5ca98ce01448f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 19 Feb 2022 23:04:29 +0300 +Subject: ata: pata_hpt37x: fix PCI clock detection + +From: Sergey Shtylyov + +[ Upstream commit 5f6b0f2d037c8864f20ff15311c695f65eb09db5 ] + +The f_CNT register (at the PCI config. address 0x78) is 16-bit, not +8-bit! The bug was there from the very start... :-( + +Signed-off-by: Sergey Shtylyov +Fixes: 669a5db411d8 ("[libata] Add a bunch of PATA drivers.") +Cc: stable@vger.kernel.org +Signed-off-by: Damien Le Moal +Signed-off-by: Sasha Levin +--- + drivers/ata/pata_hpt37x.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c +index 499a947d56ddb..fef46de2f6b23 100644 +--- a/drivers/ata/pata_hpt37x.c ++++ b/drivers/ata/pata_hpt37x.c +@@ -962,14 +962,14 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id) + + if ((freq >> 12) != 0xABCDE) { + int i; +- u8 sr; ++ u16 sr; + u32 total = 0; + + pr_warn("BIOS has not set timing clocks\n"); + + /* This is the process the HPT371 BIOS is reported to use */ + for (i = 0; i < 128; i++) { +- pci_read_config_byte(dev, 0x78, &sr); ++ pci_read_config_word(dev, 0x78, &sr); + total += sr & 0x1FF; + udelay(15); + } +-- +2.34.1 + diff --git a/queue-5.10/drm-amdgpu-check-vm-ready-by-amdgpu_vm-evicting-flag.patch b/queue-5.10/drm-amdgpu-check-vm-ready-by-amdgpu_vm-evicting-flag.patch new file mode 100644 index 00000000000..8e6daecd614 --- /dev/null +++ b/queue-5.10/drm-amdgpu-check-vm-ready-by-amdgpu_vm-evicting-flag.patch @@ -0,0 +1,83 @@ +From a6d60d77200808719ffb6765923ba8e909ae847e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Feb 2022 17:53:56 +0800 +Subject: drm/amdgpu: check vm ready by amdgpu_vm->evicting flag +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Qiang Yu + +[ Upstream commit c1a66c3bc425ff93774fb2f6eefa67b83170dd7e ] + +Workstation application ANSA/META v21.1.4 get this error dmesg when +running CI test suite provided by ANSA/META: +[drm:amdgpu_gem_va_ioctl [amdgpu]] *ERROR* Couldn't update BO_VA (-16) + +This is caused by: +1. create a 256MB buffer in invisible VRAM +2. CPU map the buffer and access it causes vm_fault and try to move + it to visible VRAM +3. force visible VRAM space and traverse all VRAM bos to check if + evicting this bo is valuable +4. when checking a VM bo (in invisible VRAM), amdgpu_vm_evictable() + will set amdgpu_vm->evicting, but latter due to not in visible + VRAM, won't really evict it so not add it to amdgpu_vm->evicted +5. before next CS to clear the amdgpu_vm->evicting, user VM ops + ioctl will pass amdgpu_vm_ready() (check amdgpu_vm->evicted) + but fail in amdgpu_vm_bo_update_mapping() (check + amdgpu_vm->evicting) and get this error log + +This error won't affect functionality as next CS will finish the +waiting VM ops. But we'd better clear the error log by checking +the amdgpu_vm->evicting flag in amdgpu_vm_ready() to stop calling +amdgpu_vm_bo_update_mapping() later. + +Another reason is amdgpu_vm->evicted list holds all BOs (both +user buffer and page table), but only page table BOs' eviction +prevent VM ops. amdgpu_vm->evicting flag is set only for page +table BOs, so we should use evicting flag instead of evicted list +in amdgpu_vm_ready(). + +The side effect of this change is: previously blocked VM op (user +buffer in "evicted" list but no page table in it) gets done +immediately. + +v2: update commit comments. + +Acked-by: Paul Menzel +Reviewed-by: Christian König +Signed-off-by: Qiang Yu +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +index b47829ff30af7..47cc038d7d506 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +@@ -715,11 +715,16 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, + * Check if all VM PDs/PTs are ready for updates + * + * Returns: +- * True if eviction list is empty. ++ * True if VM is not evicting. + */ + bool amdgpu_vm_ready(struct amdgpu_vm *vm) + { +- return list_empty(&vm->evicted); ++ bool ret; ++ ++ amdgpu_vm_eviction_lock(vm); ++ ret = !vm->evicting; ++ amdgpu_vm_eviction_unlock(vm); ++ return ret; + } + + /** +-- +2.34.1 + diff --git a/queue-5.10/exfat-fix-i_blocks-for-files-truncated-over-4-gib.patch b/queue-5.10/exfat-fix-i_blocks-for-files-truncated-over-4-gib.patch new file mode 100644 index 00000000000..b4589291a90 --- /dev/null +++ b/queue-5.10/exfat-fix-i_blocks-for-files-truncated-over-4-gib.patch @@ -0,0 +1,81 @@ +From 469b7cc76f58da21387f6f87be427d7da78b1e65 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 22 Nov 2021 22:02:37 +0900 +Subject: exfat: fix i_blocks for files truncated over 4 GiB + +From: Christophe Vu-Brugier + +[ Upstream commit 92fba084b79e6bc7b12fc118209f1922c1a2df56 ] + +In exfat_truncate(), the computation of inode->i_blocks is wrong if +the file is larger than 4 GiB because a 32-bit variable is used as a +mask. This is fixed and simplified by using round_up(). + +Also fix the same buggy computation in exfat_read_root() and another +(correct) one in exfat_fill_inode(). The latter was fixed another way +last month but can be simplified by using round_up() as well. See: + + commit 0c336d6e33f4 ("exfat: fix incorrect loading of i_blocks for + large files") + +Fixes: 98d917047e8b ("exfat: add file operations") +Cc: stable@vger.kernel.org # v5.7+ +Suggested-by: Matthew Wilcox +Reviewed-by: Sungjong Seo +Signed-off-by: Christophe Vu-Brugier +Signed-off-by: Namjae Jeon +Signed-off-by: Sasha Levin +--- + fs/exfat/file.c | 4 ++-- + fs/exfat/inode.c | 4 ++-- + fs/exfat/super.c | 4 ++-- + 3 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/fs/exfat/file.c b/fs/exfat/file.c +index 6258c5da3060b..c819e8427ea57 100644 +--- a/fs/exfat/file.c ++++ b/fs/exfat/file.c +@@ -250,8 +250,8 @@ void exfat_truncate(struct inode *inode, loff_t size) + else + mark_inode_dirty(inode); + +- inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) & +- ~(sbi->cluster_size - 1)) >> inode->i_blkbits; ++ inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> ++ inode->i_blkbits; + write_size: + aligned_size = i_size_read(inode); + if (aligned_size & (blocksize - 1)) { +diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c +index d7f11b7ab46c5..2a9f6a80584ee 100644 +--- a/fs/exfat/inode.c ++++ b/fs/exfat/inode.c +@@ -600,8 +600,8 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info) + + exfat_save_attr(inode, info->attr); + +- inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) & +- ~((loff_t)sbi->cluster_size - 1)) >> inode->i_blkbits; ++ inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> ++ inode->i_blkbits; + inode->i_mtime = info->mtime; + inode->i_ctime = info->mtime; + ei->i_crtime = info->crtime; +diff --git a/fs/exfat/super.c b/fs/exfat/super.c +index 7b91214a4110e..cd04c912f02e0 100644 +--- a/fs/exfat/super.c ++++ b/fs/exfat/super.c +@@ -364,8 +364,8 @@ static int exfat_read_root(struct inode *inode) + inode->i_op = &exfat_dir_inode_operations; + inode->i_fop = &exfat_dir_operations; + +- inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) +- & ~(sbi->cluster_size - 1)) >> inode->i_blkbits; ++ inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> ++ inode->i_blkbits; + ei->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff; + ei->i_size_aligned = i_size_read(inode); + ei->i_size_ondisk = i_size_read(inode); +-- +2.34.1 + diff --git a/queue-5.10/exfat-reuse-exfat_inode_info-variable-instead-of-cal.patch b/queue-5.10/exfat-reuse-exfat_inode_info-variable-instead-of-cal.patch new file mode 100644 index 00000000000..70a389994e1 --- /dev/null +++ b/queue-5.10/exfat-reuse-exfat_inode_info-variable-instead-of-cal.patch @@ -0,0 +1,134 @@ +From 763c459f8926db81943c34d6868f71990823ab93 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 2 Nov 2021 22:23:58 +0100 +Subject: exfat: reuse exfat_inode_info variable instead of calling EXFAT_I() + +From: Christophe Vu-Brugier + +[ Upstream commit 7dee6f57d7f22a89dd214518c778aec448270d4c ] + +Also add a local "struct exfat_inode_info *ei" variable to +exfat_truncate() to simplify the code. + +Signed-off-by: Christophe Vu-Brugier +Signed-off-by: Namjae Jeon +Signed-off-by: Sasha Levin +--- + fs/exfat/file.c | 14 +++++++------- + fs/exfat/inode.c | 9 ++++----- + fs/exfat/namei.c | 6 +++--- + fs/exfat/super.c | 6 +++--- + 4 files changed, 17 insertions(+), 18 deletions(-) + +diff --git a/fs/exfat/file.c b/fs/exfat/file.c +index a92478eabfa4e..6258c5da3060b 100644 +--- a/fs/exfat/file.c ++++ b/fs/exfat/file.c +@@ -109,8 +109,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) + exfat_set_volume_dirty(sb); + + num_clusters_new = EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi); +- num_clusters_phys = +- EXFAT_B_TO_CLU_ROUND_UP(EXFAT_I(inode)->i_size_ondisk, sbi); ++ num_clusters_phys = EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi); + + exfat_chain_set(&clu, ei->start_clu, num_clusters_phys, ei->flags); + +@@ -227,12 +226,13 @@ void exfat_truncate(struct inode *inode, loff_t size) + { + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); ++ struct exfat_inode_info *ei = EXFAT_I(inode); + unsigned int blocksize = i_blocksize(inode); + loff_t aligned_size; + int err; + + mutex_lock(&sbi->s_lock); +- if (EXFAT_I(inode)->start_clu == 0) { ++ if (ei->start_clu == 0) { + /* + * Empty start_clu != ~0 (not allocated) + */ +@@ -259,11 +259,11 @@ void exfat_truncate(struct inode *inode, loff_t size) + aligned_size++; + } + +- if (EXFAT_I(inode)->i_size_ondisk > i_size_read(inode)) +- EXFAT_I(inode)->i_size_ondisk = aligned_size; ++ if (ei->i_size_ondisk > i_size_read(inode)) ++ ei->i_size_ondisk = aligned_size; + +- if (EXFAT_I(inode)->i_size_aligned > i_size_read(inode)) +- EXFAT_I(inode)->i_size_aligned = aligned_size; ++ if (ei->i_size_aligned > i_size_read(inode)) ++ ei->i_size_aligned = aligned_size; + mutex_unlock(&sbi->s_lock); + } + +diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c +index 8b0288f70e93d..d7f11b7ab46c5 100644 +--- a/fs/exfat/inode.c ++++ b/fs/exfat/inode.c +@@ -114,10 +114,9 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset, + unsigned int local_clu_offset = clu_offset; + unsigned int num_to_be_allocated = 0, num_clusters = 0; + +- if (EXFAT_I(inode)->i_size_ondisk > 0) ++ if (ei->i_size_ondisk > 0) + num_clusters = +- EXFAT_B_TO_CLU_ROUND_UP(EXFAT_I(inode)->i_size_ondisk, +- sbi); ++ EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi); + + if (clu_offset >= num_clusters) + num_to_be_allocated = clu_offset - num_clusters + 1; +@@ -415,10 +414,10 @@ static int exfat_write_end(struct file *file, struct address_space *mapping, + + err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata); + +- if (EXFAT_I(inode)->i_size_aligned < i_size_read(inode)) { ++ if (ei->i_size_aligned < i_size_read(inode)) { + exfat_fs_error(inode->i_sb, + "invalid size(size(%llu) > aligned(%llu)\n", +- i_size_read(inode), EXFAT_I(inode)->i_size_aligned); ++ i_size_read(inode), ei->i_size_aligned); + return -EIO; + } + +diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c +index 2932b23a3b6c3..935f600509009 100644 +--- a/fs/exfat/namei.c ++++ b/fs/exfat/namei.c +@@ -395,9 +395,9 @@ static int exfat_find_empty_entry(struct inode *inode, + + /* directory inode should be updated in here */ + i_size_write(inode, size); +- EXFAT_I(inode)->i_size_ondisk += sbi->cluster_size; +- EXFAT_I(inode)->i_size_aligned += sbi->cluster_size; +- EXFAT_I(inode)->flags = p_dir->flags; ++ ei->i_size_ondisk += sbi->cluster_size; ++ ei->i_size_aligned += sbi->cluster_size; ++ ei->flags = p_dir->flags; + inode->i_blocks += 1 << sbi->sect_per_clus_bits; + } + +diff --git a/fs/exfat/super.c b/fs/exfat/super.c +index c6d8d2e534865..7b91214a4110e 100644 +--- a/fs/exfat/super.c ++++ b/fs/exfat/super.c +@@ -366,9 +366,9 @@ static int exfat_read_root(struct inode *inode) + + inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) + & ~(sbi->cluster_size - 1)) >> inode->i_blkbits; +- EXFAT_I(inode)->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff; +- EXFAT_I(inode)->i_size_aligned = i_size_read(inode); +- EXFAT_I(inode)->i_size_ondisk = i_size_read(inode); ++ ei->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff; ++ ei->i_size_aligned = i_size_read(inode); ++ ei->i_size_ondisk = i_size_read(inode); + + exfat_save_attr(inode, ATTR_SUBDIR); + inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = +-- +2.34.1 + diff --git a/queue-5.10/serial-stm32-prevent-tdr-register-overwrite-when-sen.patch b/queue-5.10/serial-stm32-prevent-tdr-register-overwrite-when-sen.patch new file mode 100644 index 00000000000..b95a4a23448 --- /dev/null +++ b/queue-5.10/serial-stm32-prevent-tdr-register-overwrite-when-sen.patch @@ -0,0 +1,57 @@ +From 0bdd10a0970a47105a5f77da57df07d6a26d58e0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Jan 2022 17:44:40 +0100 +Subject: serial: stm32: prevent TDR register overwrite when sending x_char + +From: Valentin Caron + +[ Upstream commit d3d079bde07e1b7deaeb57506dc0b86010121d17 ] + +When sending x_char in stm32_usart_transmit_chars(), driver can overwrite +the value of TDR register by the value of x_char. If this happens, the +previous value that was present in TDR register will not be sent through +uart. + +This code checks if the previous value in TDR register is sent before +writing the x_char value into register. + +Fixes: 48a6092fb41f ("serial: stm32-usart: Add STM32 USART Driver") +Cc: stable +Signed-off-by: Valentin Caron +Link: https://lore.kernel.org/r/20220111164441.6178-2-valentin.caron@foss.st.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/stm32-usart.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c +index 0eadf0547175c..6afae051ba8d1 100644 +--- a/drivers/tty/serial/stm32-usart.c ++++ b/drivers/tty/serial/stm32-usart.c +@@ -420,10 +420,22 @@ static void stm32_usart_transmit_chars(struct uart_port *port) + struct stm32_port *stm32_port = to_stm32_port(port); + const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; + struct circ_buf *xmit = &port->state->xmit; ++ u32 isr; ++ int ret; + + if (port->x_char) { + if (stm32_port->tx_dma_busy) + stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_DMAT); ++ ++ /* Check that TDR is empty before filling FIFO */ ++ ret = ++ readl_relaxed_poll_timeout_atomic(port->membase + ofs->isr, ++ isr, ++ (isr & USART_SR_TXE), ++ 10, 1000); ++ if (ret) ++ dev_warn(port->dev, "1 character may be erased\n"); ++ + writel_relaxed(port->x_char, port->membase + ofs->tdr); + port->x_char = 0; + port->icount.tx++; +-- +2.34.1 + diff --git a/queue-5.10/series b/queue-5.10/series index 5c25117290c..261d4350a85 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -15,3 +15,10 @@ i2c-qup-allow-compile_test.patch net-usb-cdc_mbim-avoid-altsetting-toggling-for-telit.patch usb-gadget-don-t-release-an-existing-dev-buf.patch usb-gadget-clear-related-members-when-goto-fail.patch +exfat-reuse-exfat_inode_info-variable-instead-of-cal.patch +exfat-fix-i_blocks-for-files-truncated-over-4-gib.patch +tracing-add-test-for-user-space-strings-when-filteri.patch +serial-stm32-prevent-tdr-register-overwrite-when-sen.patch +ata-pata_hpt37x-fix-pci-clock-detection.patch +drm-amdgpu-check-vm-ready-by-amdgpu_vm-evicting-flag.patch +tracing-add-ustring-operation-to-filtering-string-po.patch diff --git a/queue-5.10/tracing-add-test-for-user-space-strings-when-filteri.patch b/queue-5.10/tracing-add-test-for-user-space-strings-when-filteri.patch new file mode 100644 index 00000000000..645c89b746a --- /dev/null +++ b/queue-5.10/tracing-add-test-for-user-space-strings-when-filteri.patch @@ -0,0 +1,217 @@ +From c8a49e25979081b12e10347d8f0e23d16a1fabd4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Jan 2022 11:55:32 -0500 +Subject: tracing: Add test for user space strings when filtering on string + pointers + +From: Steven Rostedt + +[ Upstream commit 77360f9bbc7e5e2ab7a2c8b4c0244fbbfcfc6f62 ] + +Pingfan reported that the following causes a fault: + + echo "filename ~ \"cpu\"" > events/syscalls/sys_enter_openat/filter + echo 1 > events/syscalls/sys_enter_at/enable + +The reason is that trace event filter treats the user space pointer +defined by "filename" as a normal pointer to compare against the "cpu" +string. The following bug happened: + + kvm-03-guest16 login: [72198.026181] BUG: unable to handle page fault for address: 00007fffaae8ef60 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0001) - permissions violation + PGD 80000001008b7067 P4D 80000001008b7067 PUD 2393f1067 PMD 2393ec067 PTE 8000000108f47867 + Oops: 0001 [#1] PREEMPT SMP PTI + CPU: 1 PID: 1 Comm: systemd Kdump: loaded Not tainted 5.14.0-32.el9.x86_64 #1 + Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 + RIP: 0010:strlen+0x0/0x20 + Code: 48 89 f9 74 09 48 83 c1 01 80 39 00 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 + 48 83 c2 01 45 84 c0 75 ee c3 0f 1f 80 00 00 00 00 <80> 3f 00 74 10 48 89 f8 + 48 83 c0 01 80 38 00 75 f7 48 29 f8 c3 31 + RSP: 0018:ffffb5b900013e48 EFLAGS: 00010246 + RAX: 0000000000000018 RBX: ffff8fc1c49ede00 RCX: 0000000000000000 + RDX: 0000000000000020 RSI: ffff8fc1c02d601c RDI: 00007fffaae8ef60 + RBP: 00007fffaae8ef60 R08: 0005034f4ddb8ea4 R09: 0000000000000000 + R10: ffff8fc1c02d601c R11: 0000000000000000 R12: ffff8fc1c8a6e380 + R13: 0000000000000000 R14: ffff8fc1c02d6010 R15: ffff8fc1c00453c0 + FS: 00007fa86123db40(0000) GS:ffff8fc2ffd00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007fffaae8ef60 CR3: 0000000102880001 CR4: 00000000007706e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + PKRU: 55555554 + Call Trace: + filter_pred_pchar+0x18/0x40 + filter_match_preds+0x31/0x70 + ftrace_syscall_enter+0x27a/0x2c0 + syscall_trace_enter.constprop.0+0x1aa/0x1d0 + do_syscall_64+0x16/0x90 + entry_SYSCALL_64_after_hwframe+0x44/0xae + RIP: 0033:0x7fa861d88664 + +The above happened because the kernel tried to access user space directly +and triggered a "supervisor read access in kernel mode" fault. Worse yet, +the memory could not even be loaded yet, and a SEGFAULT could happen as +well. This could be true for kernel space accessing as well. + +To be even more robust, test both kernel and user space strings. If the +string fails to read, then simply have the filter fail. + +Note, TASK_SIZE is used to determine if the pointer is user or kernel space +and the appropriate strncpy_from_kernel/user_nofault() function is used to +copy the memory. For some architectures, the compare to TASK_SIZE may always +pick user space or kernel space. If it gets it wrong, the only thing is that +the filter will fail to match. In the future, this needs to be fixed to have +the event denote which should be used. But failing a filter is much better +than panicing the machine, and that can be solved later. + +Link: https://lore.kernel.org/all/20220107044951.22080-1-kernelfans@gmail.com/ +Link: https://lkml.kernel.org/r/20220110115532.536088fd@gandalf.local.home + +Cc: stable@vger.kernel.org +Cc: Ingo Molnar +Cc: Andrew Morton +Cc: Masami Hiramatsu +Cc: Tom Zanussi +Reported-by: Pingfan Liu +Tested-by: Pingfan Liu +Fixes: 87a342f5db69d ("tracing/filters: Support filtering for char * strings") +Signed-off-by: Steven Rostedt +Signed-off-by: Sasha Levin +--- + Documentation/trace/events.rst | 10 +++++ + kernel/trace/trace_events_filter.c | 66 ++++++++++++++++++++++++++++-- + 2 files changed, 73 insertions(+), 3 deletions(-) + +diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst +index 2a5aa48eff6c7..58a471b690e07 100644 +--- a/Documentation/trace/events.rst ++++ b/Documentation/trace/events.rst +@@ -230,6 +230,16 @@ Currently the caret ('^') for an error always appears at the beginning of + the filter string; the error message should still be useful though + even without more accurate position info. + ++5.2.1 Filter limitations ++------------------------ ++ ++If a filter is placed on a string pointer ``(char *)`` that does not point ++to a string on the ring buffer, but instead points to kernel or user space ++memory, then, for safety reasons, at most 1024 bytes of the content is ++copied onto a temporary buffer to do the compare. If the copy of the memory ++faults (the pointer points to memory that should not be accessed), then the ++string compare will be treated as not matching. ++ + 5.3 Clearing filters + -------------------- + +diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c +index 78a678eeb1409..7b1fb811cb0ab 100644 +--- a/kernel/trace/trace_events_filter.c ++++ b/kernel/trace/trace_events_filter.c +@@ -5,6 +5,7 @@ + * Copyright (C) 2009 Tom Zanussi + */ + ++#include + #include + #include + #include +@@ -654,6 +655,47 @@ DEFINE_EQUALITY_PRED(32); + DEFINE_EQUALITY_PRED(16); + DEFINE_EQUALITY_PRED(8); + ++/* user space strings temp buffer */ ++#define USTRING_BUF_SIZE 1024 ++ ++struct ustring_buffer { ++ char buffer[USTRING_BUF_SIZE]; ++}; ++ ++static __percpu struct ustring_buffer *ustring_per_cpu; ++ ++static __always_inline char *test_string(char *str) ++{ ++ struct ustring_buffer *ubuf; ++ char __user *ustr; ++ char *kstr; ++ ++ if (!ustring_per_cpu) ++ return NULL; ++ ++ ubuf = this_cpu_ptr(ustring_per_cpu); ++ kstr = ubuf->buffer; ++ ++ /* ++ * We use TASK_SIZE to denote user or kernel space, but this will ++ * not work for all architectures. If it picks the wrong one, it may ++ * just fail the filter (but will not bug). ++ * ++ * TODO: Have a way to properly denote which one this is for. ++ */ ++ if (likely((unsigned long)str >= TASK_SIZE)) { ++ /* For safety, do not trust the string pointer */ ++ if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE)) ++ return NULL; ++ } else { ++ /* user space address? */ ++ ustr = (char __user *)str; ++ if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE)) ++ return NULL; ++ } ++ return kstr; ++} ++ + /* Filter predicate for fixed sized arrays of characters */ + static int filter_pred_string(struct filter_pred *pred, void *event) + { +@@ -671,10 +713,16 @@ static int filter_pred_string(struct filter_pred *pred, void *event) + static int filter_pred_pchar(struct filter_pred *pred, void *event) + { + char **addr = (char **)(event + pred->offset); ++ char *str; + int cmp, match; +- int len = strlen(*addr) + 1; /* including tailing '\0' */ ++ int len; + +- cmp = pred->regex.match(*addr, &pred->regex, len); ++ str = test_string(*addr); ++ if (!str) ++ return 0; ++ ++ len = strlen(str) + 1; /* including tailing '\0' */ ++ cmp = pred->regex.match(str, &pred->regex, len); + + match = cmp ^ pred->not; + +@@ -1320,8 +1368,17 @@ static int parse_pred(const char *str, void *data, + + } else if (field->filter_type == FILTER_DYN_STRING) + pred->fn = filter_pred_strloc; +- else ++ else { ++ ++ if (!ustring_per_cpu) { ++ /* Once allocated, keep it around for good */ ++ ustring_per_cpu = alloc_percpu(struct ustring_buffer); ++ if (!ustring_per_cpu) ++ goto err_mem; ++ } ++ + pred->fn = filter_pred_pchar; ++ } + /* go past the last quote */ + i++; + +@@ -1387,6 +1444,9 @@ static int parse_pred(const char *str, void *data, + err_free: + kfree(pred); + return -EINVAL; ++err_mem: ++ kfree(pred); ++ return -ENOMEM; + } + + enum { +-- +2.34.1 + diff --git a/queue-5.10/tracing-add-ustring-operation-to-filtering-string-po.patch b/queue-5.10/tracing-add-ustring-operation-to-filtering-string-po.patch new file mode 100644 index 00000000000..6f5df8a5a33 --- /dev/null +++ b/queue-5.10/tracing-add-ustring-operation-to-filtering-string-po.patch @@ -0,0 +1,190 @@ +From 27df2542253ffb9db857900699a561eaaa754ad9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Jan 2022 20:08:40 -0500 +Subject: tracing: Add ustring operation to filtering string pointers + +From: Steven Rostedt + +[ Upstream commit f37c3bbc635994eda203a6da4ba0f9d05165a8d6 ] + +Since referencing user space pointers is special, if the user wants to +filter on a field that is a pointer to user space, then they need to +specify it. + +Add a ".ustring" attribute to the field name for filters to state that the +field is pointing to user space such that the kernel can take the +appropriate action to read that pointer. + +Link: https://lore.kernel.org/all/yt9d8rvmt2jq.fsf@linux.ibm.com/ + +Fixes: 77360f9bbc7e ("tracing: Add test for user space strings when filtering on string pointers") +Tested-by: Sven Schnelle +Signed-off-by: Steven Rostedt +Signed-off-by: Sasha Levin +--- + Documentation/trace/events.rst | 9 ++++ + kernel/trace/trace_events_filter.c | 81 +++++++++++++++++++++--------- + 2 files changed, 66 insertions(+), 24 deletions(-) + +diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst +index 58a471b690e07..9df29a935757a 100644 +--- a/Documentation/trace/events.rst ++++ b/Documentation/trace/events.rst +@@ -198,6 +198,15 @@ The glob (~) accepts a wild card character (\*,?) and character classes + prev_comm ~ "*sh*" + prev_comm ~ "ba*sh" + ++If the field is a pointer that points into user space (for example ++"filename" from sys_enter_openat), then you have to append ".ustring" to the ++field name:: ++ ++ filename.ustring ~ "password" ++ ++As the kernel will have to know how to retrieve the memory that the pointer ++is at from user space. ++ + 5.2 Setting filters + ------------------- + +diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c +index 7b1fb811cb0ab..a255ffbe342f3 100644 +--- a/kernel/trace/trace_events_filter.c ++++ b/kernel/trace/trace_events_filter.c +@@ -665,6 +665,23 @@ struct ustring_buffer { + static __percpu struct ustring_buffer *ustring_per_cpu; + + static __always_inline char *test_string(char *str) ++{ ++ struct ustring_buffer *ubuf; ++ char *kstr; ++ ++ if (!ustring_per_cpu) ++ return NULL; ++ ++ ubuf = this_cpu_ptr(ustring_per_cpu); ++ kstr = ubuf->buffer; ++ ++ /* For safety, do not trust the string pointer */ ++ if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE)) ++ return NULL; ++ return kstr; ++} ++ ++static __always_inline char *test_ustring(char *str) + { + struct ustring_buffer *ubuf; + char __user *ustr; +@@ -676,23 +693,11 @@ static __always_inline char *test_string(char *str) + ubuf = this_cpu_ptr(ustring_per_cpu); + kstr = ubuf->buffer; + +- /* +- * We use TASK_SIZE to denote user or kernel space, but this will +- * not work for all architectures. If it picks the wrong one, it may +- * just fail the filter (but will not bug). +- * +- * TODO: Have a way to properly denote which one this is for. +- */ +- if (likely((unsigned long)str >= TASK_SIZE)) { +- /* For safety, do not trust the string pointer */ +- if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE)) +- return NULL; +- } else { +- /* user space address? */ +- ustr = (char __user *)str; +- if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE)) +- return NULL; +- } ++ /* user space address? */ ++ ustr = (char __user *)str; ++ if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE)) ++ return NULL; ++ + return kstr; + } + +@@ -709,24 +714,42 @@ static int filter_pred_string(struct filter_pred *pred, void *event) + return match; + } + ++static __always_inline int filter_pchar(struct filter_pred *pred, char *str) ++{ ++ int cmp, match; ++ int len; ++ ++ len = strlen(str) + 1; /* including tailing '\0' */ ++ cmp = pred->regex.match(str, &pred->regex, len); ++ ++ match = cmp ^ pred->not; ++ ++ return match; ++} + /* Filter predicate for char * pointers */ + static int filter_pred_pchar(struct filter_pred *pred, void *event) + { + char **addr = (char **)(event + pred->offset); + char *str; +- int cmp, match; +- int len; + + str = test_string(*addr); + if (!str) + return 0; + +- len = strlen(str) + 1; /* including tailing '\0' */ +- cmp = pred->regex.match(str, &pred->regex, len); ++ return filter_pchar(pred, str); ++} + +- match = cmp ^ pred->not; ++/* Filter predicate for char * pointers in user space*/ ++static int filter_pred_pchar_user(struct filter_pred *pred, void *event) ++{ ++ char **addr = (char **)(event + pred->offset); ++ char *str; + +- return match; ++ str = test_ustring(*addr); ++ if (!str) ++ return 0; ++ ++ return filter_pchar(pred, str); + } + + /* +@@ -1206,6 +1229,7 @@ static int parse_pred(const char *str, void *data, + struct filter_pred *pred = NULL; + char num_buf[24]; /* Big enough to hold an address */ + char *field_name; ++ bool ustring = false; + char q; + u64 val; + int len; +@@ -1240,6 +1264,12 @@ static int parse_pred(const char *str, void *data, + return -EINVAL; + } + ++ /* See if the field is a user space string */ ++ if ((len = str_has_prefix(str + i, ".ustring"))) { ++ ustring = true; ++ i += len; ++ } ++ + while (isspace(str[i])) + i++; + +@@ -1377,7 +1407,10 @@ static int parse_pred(const char *str, void *data, + goto err_mem; + } + +- pred->fn = filter_pred_pchar; ++ if (ustring) ++ pred->fn = filter_pred_pchar_user; ++ else ++ pred->fn = filter_pred_pchar; + } + /* go past the last quote */ + i++; +-- +2.34.1 +