From: Sasha Levin Date: Thu, 3 Mar 2022 21:33:04 +0000 (-0500) Subject: Fixes for 5.16 X-Git-Tag: v4.9.305~99 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5f60ec3de4fe7ceac6524ae1fcd5a516b576e249;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.16 Signed-off-by: Sasha Levin --- diff --git a/queue-5.16/arm64-mark-start_backtrace-notrace-and-nokprobe_symb.patch b/queue-5.16/arm64-mark-start_backtrace-notrace-and-nokprobe_symb.patch new file mode 100644 index 00000000000..a1fc4ad266b --- /dev/null +++ b/queue-5.16/arm64-mark-start_backtrace-notrace-and-nokprobe_symb.patch @@ -0,0 +1,49 @@ +From 9b93f78bc256080274ff351d54eb73fe892aadf6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 24 Jan 2022 17:17:54 +0900 +Subject: arm64: Mark start_backtrace() notrace and NOKPROBE_SYMBOL + +From: Masami Hiramatsu + +[ Upstream commit 1e0924bd09916fab795fc2a21ec1d148f24299fd ] + +Mark the start_backtrace() as notrace and NOKPROBE_SYMBOL +because this function is called from ftrace and lockdep to +get the caller address via return_address(). The lockdep +is used in kprobes, it should also be NOKPROBE_SYMBOL. + +Fixes: b07f3499661c ("arm64: stacktrace: Move start_backtrace() out of the header") +Cc: # 5.13.x +Signed-off-by: Masami Hiramatsu +Reviewed-by: Mark Brown +Link: https://lore.kernel.org/r/164301227374.1433152.12808232644267107415.stgit@devnote2 +Signed-off-by: Catalin Marinas +Signed-off-by: Sasha Levin +--- + arch/arm64/kernel/stacktrace.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c +index 94f83cd44e507..0ee6bd390bd09 100644 +--- a/arch/arm64/kernel/stacktrace.c ++++ b/arch/arm64/kernel/stacktrace.c +@@ -33,7 +33,7 @@ + */ + + +-void start_backtrace(struct stackframe *frame, unsigned long fp, ++notrace void start_backtrace(struct stackframe *frame, unsigned long fp, + unsigned long pc) + { + frame->fp = fp; +@@ -55,6 +55,7 @@ void start_backtrace(struct stackframe *frame, unsigned long fp, + frame->prev_fp = 0; + frame->prev_type = STACK_TYPE_UNKNOWN; + } ++NOKPROBE_SYMBOL(start_backtrace); + + /* + * Unwind from one frame record (A) to the next frame record (B). +-- +2.34.1 + diff --git a/queue-5.16/ata-pata_hpt37x-fix-pci-clock-detection.patch b/queue-5.16/ata-pata_hpt37x-fix-pci-clock-detection.patch new file mode 100644 index 00000000000..275a435ecc1 --- /dev/null +++ b/queue-5.16/ata-pata_hpt37x-fix-pci-clock-detection.patch @@ -0,0 +1,45 @@ +From da843dd5695cde66ea4bcce7142d469b2d8514d9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 19 Feb 2022 23:04:29 +0300 +Subject: ata: pata_hpt37x: fix PCI clock detection + +From: Sergey Shtylyov + +[ Upstream commit 5f6b0f2d037c8864f20ff15311c695f65eb09db5 ] + +The f_CNT register (at the PCI config. address 0x78) is 16-bit, not +8-bit! The bug was there from the very start... :-( + +Signed-off-by: Sergey Shtylyov +Fixes: 669a5db411d8 ("[libata] Add a bunch of PATA drivers.") +Cc: stable@vger.kernel.org +Signed-off-by: Damien Le Moal +Signed-off-by: Sasha Levin +--- + drivers/ata/pata_hpt37x.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c +index ae8375e9d2681..9d371859e81ed 100644 +--- a/drivers/ata/pata_hpt37x.c ++++ b/drivers/ata/pata_hpt37x.c +@@ -964,14 +964,14 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id) + + if ((freq >> 12) != 0xABCDE) { + int i; +- u8 sr; ++ u16 sr; + u32 total = 0; + + pr_warn("BIOS has not set timing clocks\n"); + + /* This is the process the HPT371 BIOS is reported to use */ + for (i = 0; i < 128; i++) { +- pci_read_config_byte(dev, 0x78, &sr); ++ pci_read_config_word(dev, 0x78, &sr); + total += sr & 0x1FF; + udelay(15); + } +-- +2.34.1 + diff --git a/queue-5.16/drm-amdgpu-check-vm-ready-by-amdgpu_vm-evicting-flag.patch b/queue-5.16/drm-amdgpu-check-vm-ready-by-amdgpu_vm-evicting-flag.patch new file mode 100644 index 00000000000..69a22ec5d73 --- /dev/null +++ b/queue-5.16/drm-amdgpu-check-vm-ready-by-amdgpu_vm-evicting-flag.patch @@ -0,0 +1,83 @@ +From ba0f28782dfb657a17208d37a05a7187e3335be4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Feb 2022 17:53:56 +0800 +Subject: drm/amdgpu: check vm ready by amdgpu_vm->evicting flag +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Qiang Yu + +[ Upstream commit c1a66c3bc425ff93774fb2f6eefa67b83170dd7e ] + +Workstation application ANSA/META v21.1.4 get this error dmesg when +running CI test suite provided by ANSA/META: +[drm:amdgpu_gem_va_ioctl [amdgpu]] *ERROR* Couldn't update BO_VA (-16) + +This is caused by: +1. create a 256MB buffer in invisible VRAM +2. CPU map the buffer and access it causes vm_fault and try to move + it to visible VRAM +3. force visible VRAM space and traverse all VRAM bos to check if + evicting this bo is valuable +4. when checking a VM bo (in invisible VRAM), amdgpu_vm_evictable() + will set amdgpu_vm->evicting, but latter due to not in visible + VRAM, won't really evict it so not add it to amdgpu_vm->evicted +5. before next CS to clear the amdgpu_vm->evicting, user VM ops + ioctl will pass amdgpu_vm_ready() (check amdgpu_vm->evicted) + but fail in amdgpu_vm_bo_update_mapping() (check + amdgpu_vm->evicting) and get this error log + +This error won't affect functionality as next CS will finish the +waiting VM ops. But we'd better clear the error log by checking +the amdgpu_vm->evicting flag in amdgpu_vm_ready() to stop calling +amdgpu_vm_bo_update_mapping() later. + +Another reason is amdgpu_vm->evicted list holds all BOs (both +user buffer and page table), but only page table BOs' eviction +prevent VM ops. amdgpu_vm->evicting flag is set only for page +table BOs, so we should use evicting flag instead of evicted list +in amdgpu_vm_ready(). + +The side effect of this change is: previously blocked VM op (user +buffer in "evicted" list but no page table in it) gets done +immediately. + +v2: update commit comments. + +Acked-by: Paul Menzel +Reviewed-by: Christian König +Signed-off-by: Qiang Yu +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +index 0e7dc23f78e7f..bc8d83698880c 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +@@ -768,11 +768,16 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, + * Check if all VM PDs/PTs are ready for updates + * + * Returns: +- * True if eviction list is empty. ++ * True if VM is not evicting. + */ + bool amdgpu_vm_ready(struct amdgpu_vm *vm) + { +- return list_empty(&vm->evicted); ++ bool ret; ++ ++ amdgpu_vm_eviction_lock(vm); ++ ret = !vm->evicting; ++ amdgpu_vm_eviction_unlock(vm); ++ return ret; + } + + /** +-- +2.34.1 + diff --git a/queue-5.16/exfat-fix-i_blocks-for-files-truncated-over-4-gib.patch b/queue-5.16/exfat-fix-i_blocks-for-files-truncated-over-4-gib.patch new file mode 100644 index 00000000000..4b009767345 --- /dev/null +++ b/queue-5.16/exfat-fix-i_blocks-for-files-truncated-over-4-gib.patch @@ -0,0 +1,81 @@ +From 6d432547abd6645f7cc07a08f4eb84e73ac4c3f5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 22 Nov 2021 22:02:37 +0900 +Subject: exfat: fix i_blocks for files truncated over 4 GiB + +From: Christophe Vu-Brugier + +[ Upstream commit 92fba084b79e6bc7b12fc118209f1922c1a2df56 ] + +In exfat_truncate(), the computation of inode->i_blocks is wrong if +the file is larger than 4 GiB because a 32-bit variable is used as a +mask. This is fixed and simplified by using round_up(). + +Also fix the same buggy computation in exfat_read_root() and another +(correct) one in exfat_fill_inode(). The latter was fixed another way +last month but can be simplified by using round_up() as well. See: + + commit 0c336d6e33f4 ("exfat: fix incorrect loading of i_blocks for + large files") + +Fixes: 98d917047e8b ("exfat: add file operations") +Cc: stable@vger.kernel.org # v5.7+ +Suggested-by: Matthew Wilcox +Reviewed-by: Sungjong Seo +Signed-off-by: Christophe Vu-Brugier +Signed-off-by: Namjae Jeon +Signed-off-by: Sasha Levin +--- + fs/exfat/file.c | 4 ++-- + fs/exfat/inode.c | 4 ++-- + fs/exfat/super.c | 4 ++-- + 3 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/fs/exfat/file.c b/fs/exfat/file.c +index 848166d6d5e9d..d890fd34bb2d0 100644 +--- a/fs/exfat/file.c ++++ b/fs/exfat/file.c +@@ -251,8 +251,8 @@ void exfat_truncate(struct inode *inode, loff_t size) + else + mark_inode_dirty(inode); + +- inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) & +- ~(sbi->cluster_size - 1)) >> inode->i_blkbits; ++ inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> ++ inode->i_blkbits; + write_size: + aligned_size = i_size_read(inode); + if (aligned_size & (blocksize - 1)) { +diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c +index aca2e64d045b6..72a0ccfb616c3 100644 +--- a/fs/exfat/inode.c ++++ b/fs/exfat/inode.c +@@ -602,8 +602,8 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info) + + exfat_save_attr(inode, info->attr); + +- inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) & +- ~((loff_t)sbi->cluster_size - 1)) >> inode->i_blkbits; ++ inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> ++ inode->i_blkbits; + inode->i_mtime = info->mtime; + inode->i_ctime = info->mtime; + ei->i_crtime = info->crtime; +diff --git a/fs/exfat/super.c b/fs/exfat/super.c +index 1a2115d73a48a..4b5d02b1df585 100644 +--- a/fs/exfat/super.c ++++ b/fs/exfat/super.c +@@ -364,8 +364,8 @@ static int exfat_read_root(struct inode *inode) + inode->i_op = &exfat_dir_inode_operations; + inode->i_fop = &exfat_dir_operations; + +- inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) +- & ~(sbi->cluster_size - 1)) >> inode->i_blkbits; ++ inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> ++ inode->i_blkbits; + ei->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff; + ei->i_size_aligned = i_size_read(inode); + ei->i_size_ondisk = i_size_read(inode); +-- +2.34.1 + diff --git a/queue-5.16/exfat-reuse-exfat_inode_info-variable-instead-of-cal.patch b/queue-5.16/exfat-reuse-exfat_inode_info-variable-instead-of-cal.patch new file mode 100644 index 00000000000..5d4f1eb87e1 --- /dev/null +++ b/queue-5.16/exfat-reuse-exfat_inode_info-variable-instead-of-cal.patch @@ -0,0 +1,134 @@ +From 5dcaaecb0895d026a2954b4b5d82e8fe995f610c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 2 Nov 2021 22:23:58 +0100 +Subject: exfat: reuse exfat_inode_info variable instead of calling EXFAT_I() + +From: Christophe Vu-Brugier + +[ Upstream commit 7dee6f57d7f22a89dd214518c778aec448270d4c ] + +Also add a local "struct exfat_inode_info *ei" variable to +exfat_truncate() to simplify the code. + +Signed-off-by: Christophe Vu-Brugier +Signed-off-by: Namjae Jeon +Signed-off-by: Sasha Levin +--- + fs/exfat/file.c | 14 +++++++------- + fs/exfat/inode.c | 9 ++++----- + fs/exfat/namei.c | 6 +++--- + fs/exfat/super.c | 6 +++--- + 4 files changed, 17 insertions(+), 18 deletions(-) + +diff --git a/fs/exfat/file.c b/fs/exfat/file.c +index 6af0191b648f1..848166d6d5e9d 100644 +--- a/fs/exfat/file.c ++++ b/fs/exfat/file.c +@@ -110,8 +110,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) + exfat_set_volume_dirty(sb); + + num_clusters_new = EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi); +- num_clusters_phys = +- EXFAT_B_TO_CLU_ROUND_UP(EXFAT_I(inode)->i_size_ondisk, sbi); ++ num_clusters_phys = EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi); + + exfat_chain_set(&clu, ei->start_clu, num_clusters_phys, ei->flags); + +@@ -228,12 +227,13 @@ void exfat_truncate(struct inode *inode, loff_t size) + { + struct super_block *sb = inode->i_sb; + struct exfat_sb_info *sbi = EXFAT_SB(sb); ++ struct exfat_inode_info *ei = EXFAT_I(inode); + unsigned int blocksize = i_blocksize(inode); + loff_t aligned_size; + int err; + + mutex_lock(&sbi->s_lock); +- if (EXFAT_I(inode)->start_clu == 0) { ++ if (ei->start_clu == 0) { + /* + * Empty start_clu != ~0 (not allocated) + */ +@@ -260,11 +260,11 @@ void exfat_truncate(struct inode *inode, loff_t size) + aligned_size++; + } + +- if (EXFAT_I(inode)->i_size_ondisk > i_size_read(inode)) +- EXFAT_I(inode)->i_size_ondisk = aligned_size; ++ if (ei->i_size_ondisk > i_size_read(inode)) ++ ei->i_size_ondisk = aligned_size; + +- if (EXFAT_I(inode)->i_size_aligned > i_size_read(inode)) +- EXFAT_I(inode)->i_size_aligned = aligned_size; ++ if (ei->i_size_aligned > i_size_read(inode)) ++ ei->i_size_aligned = aligned_size; + mutex_unlock(&sbi->s_lock); + } + +diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c +index 1c7aa1ea4724c..aca2e64d045b6 100644 +--- a/fs/exfat/inode.c ++++ b/fs/exfat/inode.c +@@ -114,10 +114,9 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset, + unsigned int local_clu_offset = clu_offset; + unsigned int num_to_be_allocated = 0, num_clusters = 0; + +- if (EXFAT_I(inode)->i_size_ondisk > 0) ++ if (ei->i_size_ondisk > 0) + num_clusters = +- EXFAT_B_TO_CLU_ROUND_UP(EXFAT_I(inode)->i_size_ondisk, +- sbi); ++ EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi); + + if (clu_offset >= num_clusters) + num_to_be_allocated = clu_offset - num_clusters + 1; +@@ -416,10 +415,10 @@ static int exfat_write_end(struct file *file, struct address_space *mapping, + + err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata); + +- if (EXFAT_I(inode)->i_size_aligned < i_size_read(inode)) { ++ if (ei->i_size_aligned < i_size_read(inode)) { + exfat_fs_error(inode->i_sb, + "invalid size(size(%llu) > aligned(%llu)\n", +- i_size_read(inode), EXFAT_I(inode)->i_size_aligned); ++ i_size_read(inode), ei->i_size_aligned); + return -EIO; + } + +diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c +index 24b41103d1cc0..9d8ada781250b 100644 +--- a/fs/exfat/namei.c ++++ b/fs/exfat/namei.c +@@ -395,9 +395,9 @@ static int exfat_find_empty_entry(struct inode *inode, + + /* directory inode should be updated in here */ + i_size_write(inode, size); +- EXFAT_I(inode)->i_size_ondisk += sbi->cluster_size; +- EXFAT_I(inode)->i_size_aligned += sbi->cluster_size; +- EXFAT_I(inode)->flags = p_dir->flags; ++ ei->i_size_ondisk += sbi->cluster_size; ++ ei->i_size_aligned += sbi->cluster_size; ++ ei->flags = p_dir->flags; + inode->i_blocks += 1 << sbi->sect_per_clus_bits; + } + +diff --git a/fs/exfat/super.c b/fs/exfat/super.c +index 5539ffc20d164..1a2115d73a48a 100644 +--- a/fs/exfat/super.c ++++ b/fs/exfat/super.c +@@ -366,9 +366,9 @@ static int exfat_read_root(struct inode *inode) + + inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) + & ~(sbi->cluster_size - 1)) >> inode->i_blkbits; +- EXFAT_I(inode)->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff; +- EXFAT_I(inode)->i_size_aligned = i_size_read(inode); +- EXFAT_I(inode)->i_size_ondisk = i_size_read(inode); ++ ei->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff; ++ ei->i_size_aligned = i_size_read(inode); ++ ei->i_size_ondisk = i_size_read(inode); + + exfat_save_attr(inode, ATTR_SUBDIR); + inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = +-- +2.34.1 + diff --git a/queue-5.16/ext4-drop-ineligible-txn-start-stop-apis.patch b/queue-5.16/ext4-drop-ineligible-txn-start-stop-apis.patch new file mode 100644 index 00000000000..9afe2556721 --- /dev/null +++ b/queue-5.16/ext4-drop-ineligible-txn-start-stop-apis.patch @@ -0,0 +1,251 @@ +From f5b14d8a3321344787c8cff251bf27bb0935a0f2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Dec 2021 12:21:38 -0800 +Subject: ext4: drop ineligible txn start stop APIs + +From: Harshad Shirwadkar + +[ Upstream commit 7bbbe241ec7ce0def9f71464c878fdbd2b0dcf37 ] + +This patch drops ext4_fc_start_ineligible() and +ext4_fc_stop_ineligible() APIs. Fast commit ineligible transactions +should simply call ext4_fc_mark_ineligible() after starting the +trasaction. + +Signed-off-by: Harshad Shirwadkar +Link: https://lore.kernel.org/r/20211223202140.2061101-3-harshads@google.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/ext4/ext4.h | 6 ++-- + fs/ext4/extents.c | 6 ++-- + fs/ext4/fast_commit.c | 79 ++++++++----------------------------------- + fs/ext4/ioctl.c | 3 +- + fs/ext4/super.c | 1 - + 5 files changed, 20 insertions(+), 75 deletions(-) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index d248a01132c3b..f80e4de726869 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1725,9 +1725,9 @@ struct ext4_sb_info { + */ + struct work_struct s_error_work; + +- /* Ext4 fast commit stuff */ ++ /* Ext4 fast commit sub transaction ID */ + atomic_t s_fc_subtid; +- atomic_t s_fc_ineligible_updates; ++ + /* + * After commit starts, the main queue gets locked, and the further + * updates get added in the staging queue. +@@ -2926,8 +2926,6 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode, + void ext4_fc_track_create(handle_t *handle, struct dentry *dentry); + void ext4_fc_track_inode(handle_t *handle, struct inode *inode); + void ext4_fc_mark_ineligible(struct super_block *sb, int reason); +-void ext4_fc_start_ineligible(struct super_block *sb, int reason); +-void ext4_fc_stop_ineligible(struct super_block *sb); + void ext4_fc_start_update(struct inode *inode); + void ext4_fc_stop_update(struct inode *inode); + void ext4_fc_del(struct inode *inode); +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index 9b37d16b24ffd..d3a8d704d8b4f 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -5342,7 +5342,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) + ret = PTR_ERR(handle); + goto out_mmap; + } +- ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); ++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); + + down_write(&EXT4_I(inode)->i_data_sem); + ext4_discard_preallocations(inode, 0); +@@ -5381,7 +5381,6 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) + + out_stop: + ext4_journal_stop(handle); +- ext4_fc_stop_ineligible(sb); + out_mmap: + filemap_invalidate_unlock(mapping); + out_mutex: +@@ -5483,7 +5482,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) + ret = PTR_ERR(handle); + goto out_mmap; + } +- ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); ++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); + + /* Expand file to avoid data loss if there is error while shifting */ + inode->i_size += len; +@@ -5558,7 +5557,6 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) + + out_stop: + ext4_journal_stop(handle); +- ext4_fc_stop_ineligible(sb); + out_mmap: + filemap_invalidate_unlock(mapping); + out_mutex: +diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c +index 3b79fb063c07a..48e522bb7bca4 100644 +--- a/fs/ext4/fast_commit.c ++++ b/fs/ext4/fast_commit.c +@@ -65,21 +65,11 @@ + * + * Fast Commit Ineligibility + * ------------------------- +- * Not all operations are supported by fast commits today (e.g extended +- * attributes). Fast commit ineligibility is marked by calling one of the +- * two following functions: +- * +- * - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall +- * back to full commit. This is useful in case of transient errors. + * +- * - ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - This makes all +- * the fast commits happening between ext4_fc_start_ineligible() and +- * ext4_fc_stop_ineligible() and one fast commit after the call to +- * ext4_fc_stop_ineligible() to fall back to full commits. It is important to +- * make one more fast commit to fall back to full commit after stop call so +- * that it guaranteed that the fast commit ineligible operation contained +- * within ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() is +- * followed by at least 1 full commit. ++ * Not all operations are supported by fast commits today (e.g extended ++ * attributes). Fast commit ineligibility is marked by calling ++ * ext4_fc_mark_ineligible(): This makes next fast commit operation to fall back ++ * to full commit. + * + * Atomicity of commits + * -------------------- +@@ -328,44 +318,6 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason) + sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; + } + +-/* +- * Start a fast commit ineligible update. Any commits that happen while +- * such an operation is in progress fall back to full commits. +- */ +-void ext4_fc_start_ineligible(struct super_block *sb, int reason) +-{ +- struct ext4_sb_info *sbi = EXT4_SB(sb); +- +- if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || +- (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) +- return; +- +- WARN_ON(reason >= EXT4_FC_REASON_MAX); +- sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; +- atomic_inc(&sbi->s_fc_ineligible_updates); +-} +- +-/* +- * Stop a fast commit ineligible update. We set EXT4_MF_FC_INELIGIBLE flag here +- * to ensure that after stopping the ineligible update, at least one full +- * commit takes place. +- */ +-void ext4_fc_stop_ineligible(struct super_block *sb) +-{ +- if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || +- (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) +- return; +- +- ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); +- atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates); +-} +- +-static inline int ext4_fc_is_ineligible(struct super_block *sb) +-{ +- return (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE) || +- atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates)); +-} +- + /* + * Generic fast commit tracking function. If this is the first time this we are + * called after a full commit, we initialize fast commit fields and then call +@@ -391,7 +343,7 @@ static int ext4_fc_track_template( + (sbi->s_mount_state & EXT4_FC_REPLAY)) + return -EOPNOTSUPP; + +- if (ext4_fc_is_ineligible(inode->i_sb)) ++ if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE)) + return -EINVAL; + + tid = handle->h_transaction->t_tid; +@@ -1142,11 +1094,8 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid) + + start_time = ktime_get(); + +- if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || +- (ext4_fc_is_ineligible(sb))) { +- reason = EXT4_FC_REASON_INELIGIBLE; +- goto out; +- } ++ if (!test_opt2(sb, JOURNAL_FAST_COMMIT)) ++ return jbd2_complete_transaction(journal, commit_tid); + + restart_fc: + ret = jbd2_fc_begin_commit(journal, commit_tid); +@@ -1162,6 +1111,14 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid) + reason = EXT4_FC_REASON_FC_START_FAILED; + goto out; + } ++ /* ++ * After establishing journal barrier via jbd2_fc_begin_commit(), check ++ * if we are fast commit ineligible. ++ */ ++ if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) { ++ reason = EXT4_FC_REASON_INELIGIBLE; ++ goto out; ++ } + + fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize; + ret = ext4_fc_perform_commit(journal); +@@ -1180,12 +1137,6 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid) + atomic_inc(&sbi->s_fc_subtid); + jbd2_fc_end_commit(journal); + out: +- /* Has any ineligible update happened since we started? */ +- if (reason == EXT4_FC_REASON_OK && ext4_fc_is_ineligible(sb)) { +- sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; +- reason = EXT4_FC_REASON_INELIGIBLE; +- } +- + spin_lock(&sbi->s_fc_lock); + if (reason != EXT4_FC_REASON_OK && + reason != EXT4_FC_REASON_ALREADY_COMMITTED) { +diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c +index 220a4c8178b5e..fd70bebb14370 100644 +--- a/fs/ext4/ioctl.c ++++ b/fs/ext4/ioctl.c +@@ -169,7 +169,7 @@ static long swap_inode_boot_loader(struct super_block *sb, + err = -EINVAL; + goto err_out; + } +- ext4_fc_start_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT); ++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT); + + /* Protect extent tree against block allocations via delalloc */ + ext4_double_down_write_data_sem(inode, inode_bl); +@@ -252,7 +252,6 @@ static long swap_inode_boot_loader(struct super_block *sb, + + err_out1: + ext4_journal_stop(handle); +- ext4_fc_stop_ineligible(sb); + ext4_double_up_write_data_sem(inode, inode_bl); + + err_out: +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index 24a7ad80353b5..d304b72593d76 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -4620,7 +4620,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) + + /* Initialize fast commit stuff */ + atomic_set(&sbi->s_fc_subtid, 0); +- atomic_set(&sbi->s_fc_ineligible_updates, 0); + INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]); + INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]); + INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]); +-- +2.34.1 + diff --git a/queue-5.16/ext4-fast-commit-may-miss-file-actions.patch b/queue-5.16/ext4-fast-commit-may-miss-file-actions.patch new file mode 100644 index 00000000000..1b96a519dcd --- /dev/null +++ b/queue-5.16/ext4-fast-commit-may-miss-file-actions.patch @@ -0,0 +1,117 @@ +From dc0cacc25bc9df3a8521ebf4875cac0c2248bbfb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Jan 2022 17:36:55 +0800 +Subject: ext4: fast commit may miss file actions + +From: Xin Yin + +[ Upstream commit bdc8a53a6f2f0b1cb5f991440f2100732299eb93 ] + +in the follow scenario: +1. jbd start transaction n +2. task A get new handle for transaction n+1 +3. task A do some actions and add inode to FC_Q_MAIN fc_q +4. jbd complete transaction n and clear FC_Q_MAIN fc_q +5. task A call fsync + +Fast commit will lost the file actions during a full commit. + +we should also add updates to staging queue during a full commit. +and in ext4_fc_cleanup(), when reset a inode's fc track range, check +it's i_sync_tid, if it bigger than current transaction tid, do not +rest it, or we will lost the track range. + +And EXT4_MF_FC_COMMITTING is not needed anymore, so drop it. + +Signed-off-by: Xin Yin +Link: https://lore.kernel.org/r/20220117093655.35160-3-yinxin.x@bytedance.com +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Sasha Levin +--- + fs/ext4/ext4.h | 5 +---- + fs/ext4/fast_commit.c | 11 ++++++----- + fs/ext4/super.c | 1 - + 3 files changed, 7 insertions(+), 10 deletions(-) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 8b5015ea46199..c2cc9d78915b0 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1793,10 +1793,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) + enum { + EXT4_MF_MNTDIR_SAMPLED, + EXT4_MF_FS_ABORTED, /* Fatal error detected */ +- EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */ +- EXT4_MF_FC_COMMITTING /* File system underoing a fast +- * commit. +- */ ++ EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */ + }; + + static inline void ext4_set_mount_flag(struct super_block *sb, int bit) +diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c +index 0cdfc5003d91a..aca8414706346 100644 +--- a/fs/ext4/fast_commit.c ++++ b/fs/ext4/fast_commit.c +@@ -377,7 +377,8 @@ static int ext4_fc_track_template( + spin_lock(&sbi->s_fc_lock); + if (list_empty(&EXT4_I(inode)->i_fc_list)) + list_add_tail(&EXT4_I(inode)->i_fc_list, +- (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ? ++ (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || ++ sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ? + &sbi->s_fc_q[FC_Q_STAGING] : + &sbi->s_fc_q[FC_Q_MAIN]); + spin_unlock(&sbi->s_fc_lock); +@@ -430,7 +431,8 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) + node->fcd_name.len = dentry->d_name.len; + + spin_lock(&sbi->s_fc_lock); +- if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ++ if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || ++ sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) + list_add_tail(&node->fcd_list, + &sbi->s_fc_dentry_q[FC_Q_STAGING]); + else +@@ -896,7 +898,6 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal) + int ret = 0; + + spin_lock(&sbi->s_fc_lock); +- ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING); + list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { + ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING); + while (atomic_read(&ei->i_fc_updates)) { +@@ -1214,7 +1215,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid) + list_del_init(&iter->i_fc_list); + ext4_clear_inode_state(&iter->vfs_inode, + EXT4_STATE_FC_COMMITTING); +- ext4_fc_reset_inode(&iter->vfs_inode); ++ if (iter->i_sync_tid <= tid) ++ ext4_fc_reset_inode(&iter->vfs_inode); + /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */ + smp_mb(); + #if (BITS_PER_LONG < 64) +@@ -1243,7 +1245,6 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid) + list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], + &sbi->s_fc_q[FC_Q_MAIN]); + +- ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); + if (tid >= sbi->s_fc_ineligible_tid) { + sbi->s_fc_ineligible_tid = 0; + ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index 888b2db92924d..32ca34403dcec 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -4626,7 +4626,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) + INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]); + sbi->s_fc_bytes = 0; + ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); +- ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); + sbi->s_fc_ineligible_tid = 0; + spin_lock_init(&sbi->s_fc_lock); + memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats)); +-- +2.34.1 + diff --git a/queue-5.16/ext4-fast-commit-may-not-fallback-for-ineligible-com.patch b/queue-5.16/ext4-fast-commit-may-not-fallback-for-ineligible-com.patch new file mode 100644 index 00000000000..93583702fdd --- /dev/null +++ b/queue-5.16/ext4-fast-commit-may-not-fallback-for-ineligible-com.patch @@ -0,0 +1,329 @@ +From 642f8c30b87603489a33051fd74a51c0a4447cf7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 17 Jan 2022 17:36:54 +0800 +Subject: ext4: fast commit may not fallback for ineligible commit + +From: Xin Yin + +[ Upstream commit e85c81ba8859a4c839bcd69c5d83b32954133a5b ] + +For the follow scenario: +1. jbd start commit transaction n +2. task A get new handle for transaction n+1 +3. task A do some ineligible actions and mark FC_INELIGIBLE +4. jbd complete transaction n and clean FC_INELIGIBLE +5. task A call fsync + +In this case fast commit will not fallback to full commit and +transaction n+1 also not handled by jbd. + +Make ext4_fc_mark_ineligible() also record transaction tid for +latest ineligible case, when call ext4_fc_cleanup() check +current transaction tid, if small than latest ineligible tid +do not clear the EXT4_MF_FC_INELIGIBLE. + +Reported-by: kernel test robot +Reported-by: Dan Carpenter +Reported-by: Ritesh Harjani +Suggested-by: Harshad Shirwadkar +Signed-off-by: Xin Yin +Link: https://lore.kernel.org/r/20220117093655.35160-2-yinxin.x@bytedance.com +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Sasha Levin +--- + fs/ext4/ext4.h | 3 ++- + fs/ext4/extents.c | 4 ++-- + fs/ext4/fast_commit.c | 33 +++++++++++++++++++++++++-------- + fs/ext4/inode.c | 4 ++-- + fs/ext4/ioctl.c | 4 ++-- + fs/ext4/namei.c | 4 ++-- + fs/ext4/super.c | 1 + + fs/ext4/xattr.c | 6 +++--- + fs/jbd2/commit.c | 2 +- + fs/jbd2/journal.c | 2 +- + include/linux/jbd2.h | 2 +- + 11 files changed, 42 insertions(+), 23 deletions(-) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 470fd3c2aef54..8b5015ea46199 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1747,6 +1747,7 @@ struct ext4_sb_info { + spinlock_t s_fc_lock; + struct buffer_head *s_fc_bh; + struct ext4_fc_stats s_fc_stats; ++ tid_t s_fc_ineligible_tid; + #ifdef CONFIG_EXT4_DEBUG + int s_fc_debug_max_replay; + #endif +@@ -2924,7 +2925,7 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode, + struct dentry *dentry); + void ext4_fc_track_create(handle_t *handle, struct dentry *dentry); + void ext4_fc_track_inode(handle_t *handle, struct inode *inode); +-void ext4_fc_mark_ineligible(struct super_block *sb, int reason); ++void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle); + void ext4_fc_start_update(struct inode *inode); + void ext4_fc_stop_update(struct inode *inode); + void ext4_fc_del(struct inode *inode); +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index d3a8d704d8b4f..d2667189be7e5 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -5342,7 +5342,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) + ret = PTR_ERR(handle); + goto out_mmap; + } +- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); ++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); + + down_write(&EXT4_I(inode)->i_data_sem); + ext4_discard_preallocations(inode, 0); +@@ -5482,7 +5482,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) + ret = PTR_ERR(handle); + goto out_mmap; + } +- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); ++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); + + /* Expand file to avoid data loss if there is error while shifting */ + inode->i_size += len; +diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c +index 1b935feec6f6b..0cdfc5003d91a 100644 +--- a/fs/ext4/fast_commit.c ++++ b/fs/ext4/fast_commit.c +@@ -302,18 +302,32 @@ void ext4_fc_del(struct inode *inode) + } + + /* +- * Mark file system as fast commit ineligible. This means that next commit +- * operation would result in a full jbd2 commit. ++ * Mark file system as fast commit ineligible, and record latest ++ * ineligible transaction tid. This means until the recorded ++ * transaction, commit operation would result in a full jbd2 commit. + */ +-void ext4_fc_mark_ineligible(struct super_block *sb, int reason) ++void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle) + { + struct ext4_sb_info *sbi = EXT4_SB(sb); ++ tid_t tid; + + if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || + (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) + return; + + ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); ++ if (handle && !IS_ERR(handle)) ++ tid = handle->h_transaction->t_tid; ++ else { ++ read_lock(&sbi->s_journal->j_state_lock); ++ tid = sbi->s_journal->j_running_transaction ? ++ sbi->s_journal->j_running_transaction->t_tid : 0; ++ read_unlock(&sbi->s_journal->j_state_lock); ++ } ++ spin_lock(&sbi->s_fc_lock); ++ if (sbi->s_fc_ineligible_tid < tid) ++ sbi->s_fc_ineligible_tid = tid; ++ spin_unlock(&sbi->s_fc_lock); + WARN_ON(reason >= EXT4_FC_REASON_MAX); + sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; + } +@@ -389,7 +403,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) + mutex_unlock(&ei->i_fc_lock); + node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); + if (!node) { +- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); ++ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL); + mutex_lock(&ei->i_fc_lock); + return -ENOMEM; + } +@@ -402,7 +416,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) + if (!node->fcd_name.name) { + kmem_cache_free(ext4_fc_dentry_cachep, node); + ext4_fc_mark_ineligible(inode->i_sb, +- EXT4_FC_REASON_NOMEM); ++ EXT4_FC_REASON_NOMEM, NULL); + mutex_lock(&ei->i_fc_lock); + return -ENOMEM; + } +@@ -504,7 +518,7 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode) + + if (ext4_should_journal_data(inode)) { + ext4_fc_mark_ineligible(inode->i_sb, +- EXT4_FC_REASON_INODE_JOURNAL_DATA); ++ EXT4_FC_REASON_INODE_JOURNAL_DATA, handle); + return; + } + +@@ -1182,7 +1196,7 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid) + * Fast commit cleanup routine. This is called after every fast commit and + * full commit. full is true if we are called after a full commit. + */ +-static void ext4_fc_cleanup(journal_t *journal, int full) ++static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid) + { + struct super_block *sb = journal->j_private; + struct ext4_sb_info *sbi = EXT4_SB(sb); +@@ -1230,7 +1244,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full) + &sbi->s_fc_q[FC_Q_MAIN]); + + ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); +- ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); ++ if (tid >= sbi->s_fc_ineligible_tid) { ++ sbi->s_fc_ineligible_tid = 0; ++ ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); ++ } + + if (full) + sbi->s_fc_bytes = 0; +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 3bdfe010e17f9..2f5686dfa30d5 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -337,7 +337,7 @@ void ext4_evict_inode(struct inode *inode) + return; + no_delete: + if (!list_empty(&EXT4_I(inode)->i_fc_list)) +- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); ++ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL); + ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ + } + +@@ -5983,7 +5983,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) + return PTR_ERR(handle); + + ext4_fc_mark_ineligible(inode->i_sb, +- EXT4_FC_REASON_JOURNAL_FLAG_CHANGE); ++ EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, handle); + err = ext4_mark_inode_dirty(handle, inode); + ext4_handle_sync(handle); + ext4_journal_stop(handle); +diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c +index fd70bebb14370..f61b59045c6d3 100644 +--- a/fs/ext4/ioctl.c ++++ b/fs/ext4/ioctl.c +@@ -169,7 +169,7 @@ static long swap_inode_boot_loader(struct super_block *sb, + err = -EINVAL; + goto err_out; + } +- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT); ++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT, handle); + + /* Protect extent tree against block allocations via delalloc */ + ext4_double_down_write_data_sem(inode, inode_bl); +@@ -1075,7 +1075,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) + + err = ext4_resize_fs(sb, n_blocks_count); + if (EXT4_SB(sb)->s_journal) { +- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE); ++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE, NULL); + jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); + err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0); + jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); +diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c +index 52c9bd154122a..47b9f87dbc6f7 100644 +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -3889,7 +3889,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, + * dirents in directories. + */ + ext4_fc_mark_ineligible(old.inode->i_sb, +- EXT4_FC_REASON_RENAME_DIR); ++ EXT4_FC_REASON_RENAME_DIR, handle); + } else { + if (new.inode) + ext4_fc_track_unlink(handle, new.dentry); +@@ -4049,7 +4049,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, + if (unlikely(retval)) + goto end_rename; + ext4_fc_mark_ineligible(new.inode->i_sb, +- EXT4_FC_REASON_CROSS_RENAME); ++ EXT4_FC_REASON_CROSS_RENAME, handle); + if (old.dir_bh) { + retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); + if (retval) +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index d304b72593d76..888b2db92924d 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -4627,6 +4627,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) + sbi->s_fc_bytes = 0; + ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); + ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); ++ sbi->s_fc_ineligible_tid = 0; + spin_lock_init(&sbi->s_fc_lock); + memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats)); + sbi->s_fc_replay_state.fc_regions = NULL; +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index 1e0fc1ed845bf..0423253490986 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -2408,7 +2408,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, + if (IS_SYNC(inode)) + ext4_handle_sync(handle); + } +- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR); ++ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle); + + cleanup: + brelse(is.iloc.bh); +@@ -2486,7 +2486,7 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name, + if (error == 0) + error = error2; + } +- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR); ++ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, NULL); + + return error; + } +@@ -2920,7 +2920,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, + error); + goto cleanup; + } +- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR); ++ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle); + } + error = 0; + cleanup: +diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c +index 3cc4ab2ba7f4f..d188fa913a075 100644 +--- a/fs/jbd2/commit.c ++++ b/fs/jbd2/commit.c +@@ -1170,7 +1170,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) + if (journal->j_commit_callback) + journal->j_commit_callback(journal, commit_transaction); + if (journal->j_fc_cleanup_callback) +- journal->j_fc_cleanup_callback(journal, 1); ++ journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid); + + trace_jbd2_end_commit(journal, commit_transaction); + jbd_debug(1, "JBD2: commit %d complete, head %d\n", +diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c +index bd9ac98916043..1f8493ef181d6 100644 +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -769,7 +769,7 @@ EXPORT_SYMBOL(jbd2_fc_begin_commit); + static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback) + { + if (journal->j_fc_cleanup_callback) +- journal->j_fc_cleanup_callback(journal, 0); ++ journal->j_fc_cleanup_callback(journal, 0, tid); + write_lock(&journal->j_state_lock); + journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING; + if (fallback) +diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h +index fd933c45281af..d63b8106796e2 100644 +--- a/include/linux/jbd2.h ++++ b/include/linux/jbd2.h +@@ -1295,7 +1295,7 @@ struct journal_s + * Clean-up after fast commit or full commit. JBD2 calls this function + * after every commit operation. + */ +- void (*j_fc_cleanup_callback)(struct journal_s *journal, int); ++ void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid); + + /** + * @j_fc_replay_callback: +-- +2.34.1 + diff --git a/queue-5.16/ext4-simplify-updating-of-fast-commit-stats.patch b/queue-5.16/ext4-simplify-updating-of-fast-commit-stats.patch new file mode 100644 index 00000000000..897fb40dc99 --- /dev/null +++ b/queue-5.16/ext4-simplify-updating-of-fast-commit-stats.patch @@ -0,0 +1,237 @@ +From d29ecbeb7189008f18506200809e12a2f7fe49f4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Dec 2021 12:21:39 -0800 +Subject: ext4: simplify updating of fast commit stats + +From: Harshad Shirwadkar + +[ Upstream commit 0915e464cb274648e1ef1663e1356e53ff400983 ] + +Move fast commit stats updating logic to a separate function from +ext4_fc_commit(). This significantly improves readability of +ext4_fc_commit(). + +Signed-off-by: Harshad Shirwadkar +Link: https://lore.kernel.org/r/20211223202140.2061101-4-harshads@google.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/ext4/ext4.h | 1 - + fs/ext4/fast_commit.c | 99 +++++++++++++++++++++++-------------------- + fs/ext4/fast_commit.h | 27 ++++++------ + 3 files changed, 68 insertions(+), 59 deletions(-) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index f80e4de726869..470fd3c2aef54 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1747,7 +1747,6 @@ struct ext4_sb_info { + spinlock_t s_fc_lock; + struct buffer_head *s_fc_bh; + struct ext4_fc_stats s_fc_stats; +- u64 s_fc_avg_commit_time; + #ifdef CONFIG_EXT4_DEBUG + int s_fc_debug_max_replay; + #endif +diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c +index 48e522bb7bca4..1b935feec6f6b 100644 +--- a/fs/ext4/fast_commit.c ++++ b/fs/ext4/fast_commit.c +@@ -1075,6 +1075,32 @@ static int ext4_fc_perform_commit(journal_t *journal) + return ret; + } + ++static void ext4_fc_update_stats(struct super_block *sb, int status, ++ u64 commit_time, int nblks) ++{ ++ struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats; ++ ++ jbd_debug(1, "Fast commit ended with status = %d", status); ++ if (status == EXT4_FC_STATUS_OK) { ++ stats->fc_num_commits++; ++ stats->fc_numblks += nblks; ++ if (likely(stats->s_fc_avg_commit_time)) ++ stats->s_fc_avg_commit_time = ++ (commit_time + ++ stats->s_fc_avg_commit_time * 3) / 4; ++ else ++ stats->s_fc_avg_commit_time = commit_time; ++ } else if (status == EXT4_FC_STATUS_FAILED || ++ status == EXT4_FC_STATUS_INELIGIBLE) { ++ if (status == EXT4_FC_STATUS_FAILED) ++ stats->fc_failed_commits++; ++ stats->fc_ineligible_commits++; ++ } else { ++ stats->fc_skipped_commits++; ++ } ++ trace_ext4_fc_commit_stop(sb, nblks, status); ++} ++ + /* + * The main commit entry point. Performs a fast commit for transaction + * commit_tid if needed. If it's not possible to perform a fast commit +@@ -1087,7 +1113,7 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid) + struct ext4_sb_info *sbi = EXT4_SB(sb); + int nblks = 0, ret, bsize = journal->j_blocksize; + int subtid = atomic_read(&sbi->s_fc_subtid); +- int reason = EXT4_FC_REASON_OK, fc_bufs_before = 0; ++ int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0; + ktime_t start_time, commit_time; + + trace_ext4_fc_commit_start(sb); +@@ -1104,69 +1130,52 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid) + if (atomic_read(&sbi->s_fc_subtid) <= subtid && + commit_tid > journal->j_commit_sequence) + goto restart_fc; +- reason = EXT4_FC_REASON_ALREADY_COMMITTED; +- goto out; ++ ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0); ++ return 0; + } else if (ret) { +- sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; +- reason = EXT4_FC_REASON_FC_START_FAILED; +- goto out; ++ /* ++ * Commit couldn't start. Just update stats and perform a ++ * full commit. ++ */ ++ ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0); ++ return jbd2_complete_transaction(journal, commit_tid); + } ++ + /* + * After establishing journal barrier via jbd2_fc_begin_commit(), check + * if we are fast commit ineligible. + */ + if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) { +- reason = EXT4_FC_REASON_INELIGIBLE; +- goto out; ++ status = EXT4_FC_STATUS_INELIGIBLE; ++ goto fallback; + } + + fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize; + ret = ext4_fc_perform_commit(journal); + if (ret < 0) { +- sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; +- reason = EXT4_FC_REASON_FC_FAILED; +- goto out; ++ status = EXT4_FC_STATUS_FAILED; ++ goto fallback; + } + nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before; + ret = jbd2_fc_wait_bufs(journal, nblks); + if (ret < 0) { +- sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++; +- reason = EXT4_FC_REASON_FC_FAILED; +- goto out; ++ status = EXT4_FC_STATUS_FAILED; ++ goto fallback; + } + atomic_inc(&sbi->s_fc_subtid); +- jbd2_fc_end_commit(journal); +-out: +- spin_lock(&sbi->s_fc_lock); +- if (reason != EXT4_FC_REASON_OK && +- reason != EXT4_FC_REASON_ALREADY_COMMITTED) { +- sbi->s_fc_stats.fc_ineligible_commits++; +- } else { +- sbi->s_fc_stats.fc_num_commits++; +- sbi->s_fc_stats.fc_numblks += nblks; +- } +- spin_unlock(&sbi->s_fc_lock); +- nblks = (reason == EXT4_FC_REASON_OK) ? nblks : 0; +- trace_ext4_fc_commit_stop(sb, nblks, reason); +- commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); ++ ret = jbd2_fc_end_commit(journal); + /* +- * weight the commit time higher than the average time so we don't +- * react too strongly to vast changes in the commit time ++ * weight the commit time higher than the average time so we ++ * don't react too strongly to vast changes in the commit time + */ +- if (likely(sbi->s_fc_avg_commit_time)) +- sbi->s_fc_avg_commit_time = (commit_time + +- sbi->s_fc_avg_commit_time * 3) / 4; +- else +- sbi->s_fc_avg_commit_time = commit_time; +- jbd_debug(1, +- "Fast commit ended with blks = %d, reason = %d, subtid - %d", +- nblks, reason, subtid); +- if (reason == EXT4_FC_REASON_FC_FAILED) +- return jbd2_fc_end_commit_fallback(journal); +- if (reason == EXT4_FC_REASON_FC_START_FAILED || +- reason == EXT4_FC_REASON_INELIGIBLE) +- return jbd2_complete_transaction(journal, commit_tid); +- return 0; ++ commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); ++ ext4_fc_update_stats(sb, status, commit_time, nblks); ++ return ret; ++ ++fallback: ++ ret = jbd2_fc_end_commit_fallback(journal); ++ ext4_fc_update_stats(sb, status, 0, 0); ++ return ret; + } + + /* +@@ -2132,7 +2141,7 @@ int ext4_fc_info_show(struct seq_file *seq, void *v) + "fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n", + stats->fc_num_commits, stats->fc_ineligible_commits, + stats->fc_numblks, +- div_u64(sbi->s_fc_avg_commit_time, 1000)); ++ div_u64(stats->s_fc_avg_commit_time, 1000)); + seq_puts(seq, "Ineligible reasons:\n"); + for (i = 0; i < EXT4_FC_REASON_MAX; i++) + seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i], +diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h +index 937c381b4c85e..083ad1cb705a7 100644 +--- a/fs/ext4/fast_commit.h ++++ b/fs/ext4/fast_commit.h +@@ -71,21 +71,19 @@ struct ext4_fc_tail { + }; + + /* +- * Fast commit reason codes ++ * Fast commit status codes ++ */ ++enum { ++ EXT4_FC_STATUS_OK = 0, ++ EXT4_FC_STATUS_INELIGIBLE, ++ EXT4_FC_STATUS_SKIPPED, ++ EXT4_FC_STATUS_FAILED, ++}; ++ ++/* ++ * Fast commit ineligiblity reasons: + */ + enum { +- /* +- * Commit status codes: +- */ +- EXT4_FC_REASON_OK = 0, +- EXT4_FC_REASON_INELIGIBLE, +- EXT4_FC_REASON_ALREADY_COMMITTED, +- EXT4_FC_REASON_FC_START_FAILED, +- EXT4_FC_REASON_FC_FAILED, +- +- /* +- * Fast commit ineligiblity reasons: +- */ + EXT4_FC_REASON_XATTR = 0, + EXT4_FC_REASON_CROSS_RENAME, + EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, +@@ -117,7 +115,10 @@ struct ext4_fc_stats { + unsigned int fc_ineligible_reason_count[EXT4_FC_REASON_MAX]; + unsigned long fc_num_commits; + unsigned long fc_ineligible_commits; ++ unsigned long fc_failed_commits; ++ unsigned long fc_skipped_commits; + unsigned long fc_numblks; ++ u64 s_fc_avg_commit_time; + }; + + #define EXT4_FC_REPLAY_REALLOC_INCREMENT 4 +-- +2.34.1 + diff --git a/queue-5.16/ipv6-fix-skb-drops-in-igmp6_event_query-and-igmp6_ev.patch b/queue-5.16/ipv6-fix-skb-drops-in-igmp6_event_query-and-igmp6_ev.patch new file mode 100644 index 00000000000..2f5820d5647 --- /dev/null +++ b/queue-5.16/ipv6-fix-skb-drops-in-igmp6_event_query-and-igmp6_ev.patch @@ -0,0 +1,120 @@ +From d524916e6a467112345767ffd5187488dfd6277c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 3 Mar 2022 09:37:28 -0800 +Subject: ipv6: fix skb drops in igmp6_event_query() and igmp6_event_report() + +From: Eric Dumazet + +[ Upstream commit 2d3916f3189172d5c69d33065c3c21119fe539fc ] + +While investigating on why a synchronize_net() has been added recently +in ipv6_mc_down(), I found that igmp6_event_query() and igmp6_event_report() +might drop skbs in some cases. + +Discussion about removing synchronize_net() from ipv6_mc_down() +will happen in a different thread. + +Fixes: f185de28d9ae ("mld: add new workqueues for process mld events") +Signed-off-by: Eric Dumazet +Cc: Taehee Yoo +Cc: Cong Wang +Cc: David Ahern +Link: https://lore.kernel.org/r/20220303173728.937869-1-eric.dumazet@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/ndisc.h | 4 ++-- + net/ipv6/mcast.c | 32 ++++++++++++-------------------- + 2 files changed, 14 insertions(+), 22 deletions(-) + +diff --git a/include/net/ndisc.h b/include/net/ndisc.h +index 04341d86585de..5e37e58586796 100644 +--- a/include/net/ndisc.h ++++ b/include/net/ndisc.h +@@ -487,9 +487,9 @@ int igmp6_late_init(void); + void igmp6_cleanup(void); + void igmp6_late_cleanup(void); + +-int igmp6_event_query(struct sk_buff *skb); ++void igmp6_event_query(struct sk_buff *skb); + +-int igmp6_event_report(struct sk_buff *skb); ++void igmp6_event_report(struct sk_buff *skb); + + + #ifdef CONFIG_SYSCTL +diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c +index a8861db52c187..909f937befd71 100644 +--- a/net/ipv6/mcast.c ++++ b/net/ipv6/mcast.c +@@ -1371,27 +1371,23 @@ static void mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld, + } + + /* called with rcu_read_lock() */ +-int igmp6_event_query(struct sk_buff *skb) ++void igmp6_event_query(struct sk_buff *skb) + { + struct inet6_dev *idev = __in6_dev_get(skb->dev); + +- if (!idev) +- return -EINVAL; +- +- if (idev->dead) { +- kfree_skb(skb); +- return -ENODEV; +- } ++ if (!idev || idev->dead) ++ goto out; + + spin_lock_bh(&idev->mc_query_lock); + if (skb_queue_len(&idev->mc_query_queue) < MLD_MAX_SKBS) { + __skb_queue_tail(&idev->mc_query_queue, skb); + if (!mod_delayed_work(mld_wq, &idev->mc_query_work, 0)) + in6_dev_hold(idev); ++ skb = NULL; + } + spin_unlock_bh(&idev->mc_query_lock); +- +- return 0; ++out: ++ kfree_skb(skb); + } + + static void __mld_query_work(struct sk_buff *skb) +@@ -1542,27 +1538,23 @@ static void mld_query_work(struct work_struct *work) + } + + /* called with rcu_read_lock() */ +-int igmp6_event_report(struct sk_buff *skb) ++void igmp6_event_report(struct sk_buff *skb) + { + struct inet6_dev *idev = __in6_dev_get(skb->dev); + +- if (!idev) +- return -EINVAL; +- +- if (idev->dead) { +- kfree_skb(skb); +- return -ENODEV; +- } ++ if (!idev || idev->dead) ++ goto out; + + spin_lock_bh(&idev->mc_report_lock); + if (skb_queue_len(&idev->mc_report_queue) < MLD_MAX_SKBS) { + __skb_queue_tail(&idev->mc_report_queue, skb); + if (!mod_delayed_work(mld_wq, &idev->mc_report_work, 0)) + in6_dev_hold(idev); ++ skb = NULL; + } + spin_unlock_bh(&idev->mc_report_lock); +- +- return 0; ++out: ++ kfree_skb(skb); + } + + static void __mld_report_work(struct sk_buff *skb) +-- +2.34.1 + diff --git a/queue-5.16/kvm-arm64-workaround-cortex-a510-s-single-step-and-p.patch b/queue-5.16/kvm-arm64-workaround-cortex-a510-s-single-step-and-p.patch new file mode 100644 index 00000000000..031d19d9304 --- /dev/null +++ b/queue-5.16/kvm-arm64-workaround-cortex-a510-s-single-step-and-p.patch @@ -0,0 +1,152 @@ +From 248f6fc5c8c2cbdfaa4d22847ba276c1ea01f5bb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 27 Jan 2022 12:20:52 +0000 +Subject: KVM: arm64: Workaround Cortex-A510's single-step and PAC trap errata + +From: James Morse + +[ Upstream commit 1dd498e5e26ad71e3e9130daf72cfb6a693fee03 ] + +Cortex-A510's erratum #2077057 causes SPSR_EL2 to be corrupted when +single-stepping authenticated ERET instructions. A single step is +expected, but a pointer authentication trap is taken instead. The +erratum causes SPSR_EL1 to be copied to SPSR_EL2, which could allow +EL1 to cause a return to EL2 with a guest controlled ELR_EL2. + +Because the conditions require an ERET into active-not-pending state, +this is only a problem for the EL2 when EL2 is stepping EL1. In this case +the previous SPSR_EL2 value is preserved in struct kvm_vcpu, and can be +restored. + +Cc: stable@vger.kernel.org # 53960faf2b73: arm64: Add Cortex-A510 CPU part definition +Cc: stable@vger.kernel.org +Signed-off-by: James Morse +[maz: fixup cpucaps ordering] +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20220127122052.1584324-5-james.morse@arm.com +Signed-off-by: Sasha Levin +--- + Documentation/arm64/silicon-errata.rst | 2 ++ + arch/arm64/Kconfig | 16 ++++++++++++++++ + arch/arm64/kernel/cpu_errata.c | 8 ++++++++ + arch/arm64/kvm/hyp/include/hyp/switch.h | 20 +++++++++++++++++++- + arch/arm64/tools/cpucaps | 5 +++-- + 5 files changed, 48 insertions(+), 3 deletions(-) + +diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst +index 0ec7b7f1524b1..ea281dd755171 100644 +--- a/Documentation/arm64/silicon-errata.rst ++++ b/Documentation/arm64/silicon-errata.rst +@@ -100,6 +100,8 @@ stable kernels. + +----------------+-----------------+-----------------+-----------------------------+ + | ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 | + +----------------+-----------------+-----------------+-----------------------------+ ++| ARM | Cortex-A510 | #2077057 | ARM64_ERRATUM_2077057 | +++----------------+-----------------+-----------------+-----------------------------+ + | ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | + +----------------+-----------------+-----------------+-----------------------------+ + | ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | +diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig +index ae0e93871ee5f..651bf217465e9 100644 +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -681,6 +681,22 @@ config ARM64_ERRATUM_2051678 + + If unsure, say Y. + ++config ARM64_ERRATUM_2077057 ++ bool "Cortex-A510: 2077057: workaround software-step corrupting SPSR_EL2" ++ help ++ This option adds the workaround for ARM Cortex-A510 erratum 2077057. ++ Affected Cortex-A510 may corrupt SPSR_EL2 when the a step exception is ++ expected, but a Pointer Authentication trap is taken instead. The ++ erratum causes SPSR_EL1 to be copied to SPSR_EL2, which could allow ++ EL1 to cause a return to EL2 with a guest controlled ELR_EL2. ++ ++ This can only happen when EL2 is stepping EL1. ++ ++ When these conditions occur, the SPSR_EL2 value is unchanged from the ++ previous guest entry, and can be restored from the in-memory copy. ++ ++ If unsure, say Y. ++ + config ARM64_ERRATUM_2119858 + bool "Cortex-A710/X2: 2119858: workaround TRBE overwriting trace data in FILL mode" + default y +diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c +index 066098198c248..b217941713a8d 100644 +--- a/arch/arm64/kernel/cpu_errata.c ++++ b/arch/arm64/kernel/cpu_errata.c +@@ -600,6 +600,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { + CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus), + }, + #endif ++#ifdef CONFIG_ARM64_ERRATUM_2077057 ++ { ++ .desc = "ARM erratum 2077057", ++ .capability = ARM64_WORKAROUND_2077057, ++ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, ++ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2), ++ }, ++#endif + #ifdef CONFIG_ARM64_ERRATUM_2064142 + { + .desc = "ARM erratum 2064142", +diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h +index adb67f8c9d7d3..3ae9c0b944878 100644 +--- a/arch/arm64/kvm/hyp/include/hyp/switch.h ++++ b/arch/arm64/kvm/hyp/include/hyp/switch.h +@@ -424,6 +424,24 @@ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code) + return false; + } + ++static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code) ++{ ++ /* ++ * Check for the conditions of Cortex-A510's #2077057. When these occur ++ * SPSR_EL2 can't be trusted, but isn't needed either as it is ++ * unchanged from the value in vcpu_gp_regs(vcpu)->pstate. ++ * Are we single-stepping the guest, and took a PAC exception from the ++ * active-not-pending state? ++ */ ++ if (cpus_have_final_cap(ARM64_WORKAROUND_2077057) && ++ vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && ++ *vcpu_cpsr(vcpu) & DBG_SPSR_SS && ++ ESR_ELx_EC(read_sysreg_el2(SYS_ESR)) == ESR_ELx_EC_PAC) ++ write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR); ++ ++ vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR); ++} ++ + /* + * Return true when we were able to fixup the guest exit and should return to + * the guest, false when we should restore the host state and return to the +@@ -435,7 +453,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) + * Save PSTATE early so that we can evaluate the vcpu mode + * early on. + */ +- vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR); ++ synchronize_vcpu_pstate(vcpu, exit_code); + + /* + * Check whether we want to repaint the state one way or +diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps +index e7719e8f18def..9c65b1e25a965 100644 +--- a/arch/arm64/tools/cpucaps ++++ b/arch/arm64/tools/cpucaps +@@ -55,9 +55,10 @@ WORKAROUND_1418040 + WORKAROUND_1463225 + WORKAROUND_1508412 + WORKAROUND_1542419 +-WORKAROUND_2064142 +-WORKAROUND_2038923 + WORKAROUND_1902691 ++WORKAROUND_2038923 ++WORKAROUND_2064142 ++WORKAROUND_2077057 + WORKAROUND_TRBE_OVERWRITE_FILL_MODE + WORKAROUND_TSB_FLUSH_FAILURE + WORKAROUND_TRBE_WRITE_OUT_OF_RANGE +-- +2.34.1 + diff --git a/queue-5.16/kvm-x86-add-kvm_cap_enable_cap-to-x86.patch b/queue-5.16/kvm-x86-add-kvm_cap_enable_cap-to-x86.patch new file mode 100644 index 00000000000..f66c792b156 --- /dev/null +++ b/queue-5.16/kvm-x86-add-kvm_cap_enable_cap-to-x86.patch @@ -0,0 +1,53 @@ +From 768f4b6c9d9178cefd0355fde27e1d8aa3d3f89a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 14 Feb 2022 21:29:51 +0000 +Subject: KVM: x86: Add KVM_CAP_ENABLE_CAP to x86 + +From: Aaron Lewis + +[ Upstream commit 127770ac0d043435375ab86434f31a93efa88215 ] + +Follow the precedent set by other architectures that support the VCPU +ioctl, KVM_ENABLE_CAP, and advertise the VM extension, KVM_CAP_ENABLE_CAP. +This way, userspace can ensure that KVM_ENABLE_CAP is available on a +vcpu before using it. + +Fixes: 5c919412fe61 ("kvm/x86: Hyper-V synthetic interrupt controller") +Signed-off-by: Aaron Lewis +Message-Id: <20220214212950.1776943-1-aaronlewis@google.com> +Cc: stable@vger.kernel.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +--- + Documentation/virt/kvm/api.rst | 2 +- + arch/x86/kvm/x86.c | 1 + + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst +index aeeb071c76881..9df9eadaeb5c2 100644 +--- a/Documentation/virt/kvm/api.rst ++++ b/Documentation/virt/kvm/api.rst +@@ -1391,7 +1391,7 @@ documentation when it pops into existence). + ------------------- + + :Capability: KVM_CAP_ENABLE_CAP +-:Architectures: mips, ppc, s390 ++:Architectures: mips, ppc, s390, x86 + :Type: vcpu ioctl + :Parameters: struct kvm_enable_cap (in) + :Returns: 0 on success; -1 on error +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 0714fa0e7ede0..c6eb3e45e3d80 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -4163,6 +4163,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) + case KVM_CAP_SREGS2: + case KVM_CAP_EXIT_ON_EMULATION_FAILURE: + case KVM_CAP_VCPU_ATTRIBUTES: ++ case KVM_CAP_ENABLE_CAP: + r = 1; + break; + case KVM_CAP_EXIT_HYPERCALL: +-- +2.34.1 + diff --git a/queue-5.16/sched-fair-fix-fault-in-reweight_entity.patch b/queue-5.16/sched-fair-fix-fault-in-reweight_entity.patch new file mode 100644 index 00000000000..9dde504a387 --- /dev/null +++ b/queue-5.16/sched-fair-fix-fault-in-reweight_entity.patch @@ -0,0 +1,105 @@ +From 9f407ba6c655408e85afec84fdd872507b6a2954 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 3 Feb 2022 08:18:46 -0800 +Subject: sched/fair: Fix fault in reweight_entity +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Tadeusz Struk + +[ Upstream commit 13765de8148f71fa795e0a6607de37c49ea5915a ] + +Syzbot found a GPF in reweight_entity. This has been bisected to +commit 4ef0c5c6b5ba ("kernel/sched: Fix sched_fork() access an invalid +sched_task_group") + +There is a race between sched_post_fork() and setpriority(PRIO_PGRP) +within a thread group that causes a null-ptr-deref in +reweight_entity() in CFS. The scenario is that the main process spawns +number of new threads, which then call setpriority(PRIO_PGRP, 0, -20), +wait, and exit. For each of the new threads the copy_process() gets +invoked, which adds the new task_struct and calls sched_post_fork() +for it. + +In the above scenario there is a possibility that +setpriority(PRIO_PGRP) and set_one_prio() will be called for a thread +in the group that is just being created by copy_process(), and for +which the sched_post_fork() has not been executed yet. This will +trigger a null pointer dereference in reweight_entity(), as it will +try to access the run queue pointer, which hasn't been set. + +Before the mentioned change the cfs_rq pointer for the task has been +set in sched_fork(), which is called much earlier in copy_process(), +before the new task is added to the thread_group. Now it is done in +the sched_post_fork(), which is called after that. To fix the issue +the remove the update_load param from the update_load param() function +and call reweight_task() only if the task flag doesn't have the +TASK_NEW flag set. + +Fixes: 4ef0c5c6b5ba ("kernel/sched: Fix sched_fork() access an invalid sched_task_group") +Reported-by: syzbot+af7a719bc92395ee41b3@syzkaller.appspotmail.com +Signed-off-by: Tadeusz Struk +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Dietmar Eggemann +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20220203161846.1160750-1-tadeusz.struk@linaro.org +Signed-off-by: Sasha Levin +--- + kernel/sched/core.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index d24823b3c3f9f..35b256b789680 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -1203,8 +1203,9 @@ int tg_nop(struct task_group *tg, void *data) + } + #endif + +-static void set_load_weight(struct task_struct *p, bool update_load) ++static void set_load_weight(struct task_struct *p) + { ++ bool update_load = !(READ_ONCE(p->__state) & TASK_NEW); + int prio = p->static_prio - MAX_RT_PRIO; + struct load_weight *load = &p->se.load; + +@@ -4392,7 +4393,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) + p->static_prio = NICE_TO_PRIO(0); + + p->prio = p->normal_prio = p->static_prio; +- set_load_weight(p, false); ++ set_load_weight(p); + + /* + * We don't need the reset flag anymore after the fork. It has +@@ -6879,7 +6880,7 @@ void set_user_nice(struct task_struct *p, long nice) + put_prev_task(rq, p); + + p->static_prio = NICE_TO_PRIO(nice); +- set_load_weight(p, true); ++ set_load_weight(p); + old_prio = p->prio; + p->prio = effective_prio(p); + +@@ -7170,7 +7171,7 @@ static void __setscheduler_params(struct task_struct *p, + */ + p->rt_priority = attr->sched_priority; + p->normal_prio = normal_prio(p); +- set_load_weight(p, true); ++ set_load_weight(p); + } + + /* +@@ -9409,7 +9410,7 @@ void __init sched_init(void) + #endif + } + +- set_load_weight(&init_task, false); ++ set_load_weight(&init_task); + + /* + * The boot idle thread does lazy MMU switching as well: +-- +2.34.1 + diff --git a/queue-5.16/serial-stm32-prevent-tdr-register-overwrite-when-sen.patch b/queue-5.16/serial-stm32-prevent-tdr-register-overwrite-when-sen.patch new file mode 100644 index 00000000000..0b5fba48f56 --- /dev/null +++ b/queue-5.16/serial-stm32-prevent-tdr-register-overwrite-when-sen.patch @@ -0,0 +1,57 @@ +From 36807348826799579605d1c23d7af6b7b253d0fa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Jan 2022 17:44:40 +0100 +Subject: serial: stm32: prevent TDR register overwrite when sending x_char + +From: Valentin Caron + +[ Upstream commit d3d079bde07e1b7deaeb57506dc0b86010121d17 ] + +When sending x_char in stm32_usart_transmit_chars(), driver can overwrite +the value of TDR register by the value of x_char. If this happens, the +previous value that was present in TDR register will not be sent through +uart. + +This code checks if the previous value in TDR register is sent before +writing the x_char value into register. + +Fixes: 48a6092fb41f ("serial: stm32-usart: Add STM32 USART Driver") +Cc: stable +Signed-off-by: Valentin Caron +Link: https://lore.kernel.org/r/20220111164441.6178-2-valentin.caron@foss.st.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/stm32-usart.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c +index 2d3fbcbfaf108..93c2a5c956540 100644 +--- a/drivers/tty/serial/stm32-usart.c ++++ b/drivers/tty/serial/stm32-usart.c +@@ -520,10 +520,22 @@ static void stm32_usart_transmit_chars(struct uart_port *port) + struct stm32_port *stm32_port = to_stm32_port(port); + const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; + struct circ_buf *xmit = &port->state->xmit; ++ u32 isr; ++ int ret; + + if (port->x_char) { + if (stm32_port->tx_dma_busy) + stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_DMAT); ++ ++ /* Check that TDR is empty before filling FIFO */ ++ ret = ++ readl_relaxed_poll_timeout_atomic(port->membase + ofs->isr, ++ isr, ++ (isr & USART_SR_TXE), ++ 10, 1000); ++ if (ret) ++ dev_warn(port->dev, "1 character may be erased\n"); ++ + writel_relaxed(port->x_char, port->membase + ofs->tdr); + port->x_char = 0; + port->icount.tx++; +-- +2.34.1 + diff --git a/queue-5.16/series b/queue-5.16/series index efec9e75ddf..c80383fe37a 100644 --- a/queue-5.16/series +++ b/queue-5.16/series @@ -27,3 +27,19 @@ net-usb-cdc_mbim-avoid-altsetting-toggling-for-telit.patch block-map-add-__gfp_zero-flag-for-alloc_page-in-func.patch usb-gadget-don-t-release-an-existing-dev-buf.patch usb-gadget-clear-related-members-when-goto-fail.patch +exfat-reuse-exfat_inode_info-variable-instead-of-cal.patch +exfat-fix-i_blocks-for-files-truncated-over-4-gib.patch +tracing-add-test-for-user-space-strings-when-filteri.patch +arm64-mark-start_backtrace-notrace-and-nokprobe_symb.patch +serial-stm32-prevent-tdr-register-overwrite-when-sen.patch +kvm-arm64-workaround-cortex-a510-s-single-step-and-p.patch +ext4-drop-ineligible-txn-start-stop-apis.patch +ext4-simplify-updating-of-fast-commit-stats.patch +ext4-fast-commit-may-not-fallback-for-ineligible-com.patch +ext4-fast-commit-may-miss-file-actions.patch +sched-fair-fix-fault-in-reweight_entity.patch +kvm-x86-add-kvm_cap_enable_cap-to-x86.patch +ata-pata_hpt37x-fix-pci-clock-detection.patch +drm-amdgpu-check-vm-ready-by-amdgpu_vm-evicting-flag.patch +tracing-add-ustring-operation-to-filtering-string-po.patch +ipv6-fix-skb-drops-in-igmp6_event_query-and-igmp6_ev.patch diff --git a/queue-5.16/tracing-add-test-for-user-space-strings-when-filteri.patch b/queue-5.16/tracing-add-test-for-user-space-strings-when-filteri.patch new file mode 100644 index 00000000000..8b455695ea4 --- /dev/null +++ b/queue-5.16/tracing-add-test-for-user-space-strings-when-filteri.patch @@ -0,0 +1,217 @@ +From d003cfd3df7bf6ed1560ad3fe2abd2102c0b564e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Jan 2022 11:55:32 -0500 +Subject: tracing: Add test for user space strings when filtering on string + pointers + +From: Steven Rostedt + +[ Upstream commit 77360f9bbc7e5e2ab7a2c8b4c0244fbbfcfc6f62 ] + +Pingfan reported that the following causes a fault: + + echo "filename ~ \"cpu\"" > events/syscalls/sys_enter_openat/filter + echo 1 > events/syscalls/sys_enter_at/enable + +The reason is that trace event filter treats the user space pointer +defined by "filename" as a normal pointer to compare against the "cpu" +string. The following bug happened: + + kvm-03-guest16 login: [72198.026181] BUG: unable to handle page fault for address: 00007fffaae8ef60 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0001) - permissions violation + PGD 80000001008b7067 P4D 80000001008b7067 PUD 2393f1067 PMD 2393ec067 PTE 8000000108f47867 + Oops: 0001 [#1] PREEMPT SMP PTI + CPU: 1 PID: 1 Comm: systemd Kdump: loaded Not tainted 5.14.0-32.el9.x86_64 #1 + Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 + RIP: 0010:strlen+0x0/0x20 + Code: 48 89 f9 74 09 48 83 c1 01 80 39 00 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 + 48 83 c2 01 45 84 c0 75 ee c3 0f 1f 80 00 00 00 00 <80> 3f 00 74 10 48 89 f8 + 48 83 c0 01 80 38 00 75 f7 48 29 f8 c3 31 + RSP: 0018:ffffb5b900013e48 EFLAGS: 00010246 + RAX: 0000000000000018 RBX: ffff8fc1c49ede00 RCX: 0000000000000000 + RDX: 0000000000000020 RSI: ffff8fc1c02d601c RDI: 00007fffaae8ef60 + RBP: 00007fffaae8ef60 R08: 0005034f4ddb8ea4 R09: 0000000000000000 + R10: ffff8fc1c02d601c R11: 0000000000000000 R12: ffff8fc1c8a6e380 + R13: 0000000000000000 R14: ffff8fc1c02d6010 R15: ffff8fc1c00453c0 + FS: 00007fa86123db40(0000) GS:ffff8fc2ffd00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007fffaae8ef60 CR3: 0000000102880001 CR4: 00000000007706e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + PKRU: 55555554 + Call Trace: + filter_pred_pchar+0x18/0x40 + filter_match_preds+0x31/0x70 + ftrace_syscall_enter+0x27a/0x2c0 + syscall_trace_enter.constprop.0+0x1aa/0x1d0 + do_syscall_64+0x16/0x90 + entry_SYSCALL_64_after_hwframe+0x44/0xae + RIP: 0033:0x7fa861d88664 + +The above happened because the kernel tried to access user space directly +and triggered a "supervisor read access in kernel mode" fault. Worse yet, +the memory could not even be loaded yet, and a SEGFAULT could happen as +well. This could be true for kernel space accessing as well. + +To be even more robust, test both kernel and user space strings. If the +string fails to read, then simply have the filter fail. + +Note, TASK_SIZE is used to determine if the pointer is user or kernel space +and the appropriate strncpy_from_kernel/user_nofault() function is used to +copy the memory. For some architectures, the compare to TASK_SIZE may always +pick user space or kernel space. If it gets it wrong, the only thing is that +the filter will fail to match. In the future, this needs to be fixed to have +the event denote which should be used. But failing a filter is much better +than panicing the machine, and that can be solved later. + +Link: https://lore.kernel.org/all/20220107044951.22080-1-kernelfans@gmail.com/ +Link: https://lkml.kernel.org/r/20220110115532.536088fd@gandalf.local.home + +Cc: stable@vger.kernel.org +Cc: Ingo Molnar +Cc: Andrew Morton +Cc: Masami Hiramatsu +Cc: Tom Zanussi +Reported-by: Pingfan Liu +Tested-by: Pingfan Liu +Fixes: 87a342f5db69d ("tracing/filters: Support filtering for char * strings") +Signed-off-by: Steven Rostedt +Signed-off-by: Sasha Levin +--- + Documentation/trace/events.rst | 10 +++++ + kernel/trace/trace_events_filter.c | 66 ++++++++++++++++++++++++++++-- + 2 files changed, 73 insertions(+), 3 deletions(-) + +diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst +index 8ddb9b09451c8..45e66a60a816a 100644 +--- a/Documentation/trace/events.rst ++++ b/Documentation/trace/events.rst +@@ -230,6 +230,16 @@ Currently the caret ('^') for an error always appears at the beginning of + the filter string; the error message should still be useful though + even without more accurate position info. + ++5.2.1 Filter limitations ++------------------------ ++ ++If a filter is placed on a string pointer ``(char *)`` that does not point ++to a string on the ring buffer, but instead points to kernel or user space ++memory, then, for safety reasons, at most 1024 bytes of the content is ++copied onto a temporary buffer to do the compare. If the copy of the memory ++faults (the pointer points to memory that should not be accessed), then the ++string compare will be treated as not matching. ++ + 5.3 Clearing filters + -------------------- + +diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c +index c9124038b140f..d3eb3c630f601 100644 +--- a/kernel/trace/trace_events_filter.c ++++ b/kernel/trace/trace_events_filter.c +@@ -5,6 +5,7 @@ + * Copyright (C) 2009 Tom Zanussi + */ + ++#include + #include + #include + #include +@@ -654,6 +655,47 @@ DEFINE_EQUALITY_PRED(32); + DEFINE_EQUALITY_PRED(16); + DEFINE_EQUALITY_PRED(8); + ++/* user space strings temp buffer */ ++#define USTRING_BUF_SIZE 1024 ++ ++struct ustring_buffer { ++ char buffer[USTRING_BUF_SIZE]; ++}; ++ ++static __percpu struct ustring_buffer *ustring_per_cpu; ++ ++static __always_inline char *test_string(char *str) ++{ ++ struct ustring_buffer *ubuf; ++ char __user *ustr; ++ char *kstr; ++ ++ if (!ustring_per_cpu) ++ return NULL; ++ ++ ubuf = this_cpu_ptr(ustring_per_cpu); ++ kstr = ubuf->buffer; ++ ++ /* ++ * We use TASK_SIZE to denote user or kernel space, but this will ++ * not work for all architectures. If it picks the wrong one, it may ++ * just fail the filter (but will not bug). ++ * ++ * TODO: Have a way to properly denote which one this is for. ++ */ ++ if (likely((unsigned long)str >= TASK_SIZE)) { ++ /* For safety, do not trust the string pointer */ ++ if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE)) ++ return NULL; ++ } else { ++ /* user space address? */ ++ ustr = (char __user *)str; ++ if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE)) ++ return NULL; ++ } ++ return kstr; ++} ++ + /* Filter predicate for fixed sized arrays of characters */ + static int filter_pred_string(struct filter_pred *pred, void *event) + { +@@ -671,10 +713,16 @@ static int filter_pred_string(struct filter_pred *pred, void *event) + static int filter_pred_pchar(struct filter_pred *pred, void *event) + { + char **addr = (char **)(event + pred->offset); ++ char *str; + int cmp, match; +- int len = strlen(*addr) + 1; /* including tailing '\0' */ ++ int len; + +- cmp = pred->regex.match(*addr, &pred->regex, len); ++ str = test_string(*addr); ++ if (!str) ++ return 0; ++ ++ len = strlen(str) + 1; /* including tailing '\0' */ ++ cmp = pred->regex.match(str, &pred->regex, len); + + match = cmp ^ pred->not; + +@@ -1320,8 +1368,17 @@ static int parse_pred(const char *str, void *data, + + } else if (field->filter_type == FILTER_DYN_STRING) + pred->fn = filter_pred_strloc; +- else ++ else { ++ ++ if (!ustring_per_cpu) { ++ /* Once allocated, keep it around for good */ ++ ustring_per_cpu = alloc_percpu(struct ustring_buffer); ++ if (!ustring_per_cpu) ++ goto err_mem; ++ } ++ + pred->fn = filter_pred_pchar; ++ } + /* go past the last quote */ + i++; + +@@ -1387,6 +1444,9 @@ static int parse_pred(const char *str, void *data, + err_free: + kfree(pred); + return -EINVAL; ++err_mem: ++ kfree(pred); ++ return -ENOMEM; + } + + enum { +-- +2.34.1 + diff --git a/queue-5.16/tracing-add-ustring-operation-to-filtering-string-po.patch b/queue-5.16/tracing-add-ustring-operation-to-filtering-string-po.patch new file mode 100644 index 00000000000..11f23d14581 --- /dev/null +++ b/queue-5.16/tracing-add-ustring-operation-to-filtering-string-po.patch @@ -0,0 +1,190 @@ +From 641b43ce7c85d47bfb4887dc81217b3334be4593 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Jan 2022 20:08:40 -0500 +Subject: tracing: Add ustring operation to filtering string pointers + +From: Steven Rostedt + +[ Upstream commit f37c3bbc635994eda203a6da4ba0f9d05165a8d6 ] + +Since referencing user space pointers is special, if the user wants to +filter on a field that is a pointer to user space, then they need to +specify it. + +Add a ".ustring" attribute to the field name for filters to state that the +field is pointing to user space such that the kernel can take the +appropriate action to read that pointer. + +Link: https://lore.kernel.org/all/yt9d8rvmt2jq.fsf@linux.ibm.com/ + +Fixes: 77360f9bbc7e ("tracing: Add test for user space strings when filtering on string pointers") +Tested-by: Sven Schnelle +Signed-off-by: Steven Rostedt +Signed-off-by: Sasha Levin +--- + Documentation/trace/events.rst | 9 ++++ + kernel/trace/trace_events_filter.c | 81 +++++++++++++++++++++--------- + 2 files changed, 66 insertions(+), 24 deletions(-) + +diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst +index 45e66a60a816a..c47f381d0c002 100644 +--- a/Documentation/trace/events.rst ++++ b/Documentation/trace/events.rst +@@ -198,6 +198,15 @@ The glob (~) accepts a wild card character (\*,?) and character classes + prev_comm ~ "*sh*" + prev_comm ~ "ba*sh" + ++If the field is a pointer that points into user space (for example ++"filename" from sys_enter_openat), then you have to append ".ustring" to the ++field name:: ++ ++ filename.ustring ~ "password" ++ ++As the kernel will have to know how to retrieve the memory that the pointer ++is at from user space. ++ + 5.2 Setting filters + ------------------- + +diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c +index d3eb3c630f601..06d6318ee5377 100644 +--- a/kernel/trace/trace_events_filter.c ++++ b/kernel/trace/trace_events_filter.c +@@ -665,6 +665,23 @@ struct ustring_buffer { + static __percpu struct ustring_buffer *ustring_per_cpu; + + static __always_inline char *test_string(char *str) ++{ ++ struct ustring_buffer *ubuf; ++ char *kstr; ++ ++ if (!ustring_per_cpu) ++ return NULL; ++ ++ ubuf = this_cpu_ptr(ustring_per_cpu); ++ kstr = ubuf->buffer; ++ ++ /* For safety, do not trust the string pointer */ ++ if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE)) ++ return NULL; ++ return kstr; ++} ++ ++static __always_inline char *test_ustring(char *str) + { + struct ustring_buffer *ubuf; + char __user *ustr; +@@ -676,23 +693,11 @@ static __always_inline char *test_string(char *str) + ubuf = this_cpu_ptr(ustring_per_cpu); + kstr = ubuf->buffer; + +- /* +- * We use TASK_SIZE to denote user or kernel space, but this will +- * not work for all architectures. If it picks the wrong one, it may +- * just fail the filter (but will not bug). +- * +- * TODO: Have a way to properly denote which one this is for. +- */ +- if (likely((unsigned long)str >= TASK_SIZE)) { +- /* For safety, do not trust the string pointer */ +- if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE)) +- return NULL; +- } else { +- /* user space address? */ +- ustr = (char __user *)str; +- if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE)) +- return NULL; +- } ++ /* user space address? */ ++ ustr = (char __user *)str; ++ if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE)) ++ return NULL; ++ + return kstr; + } + +@@ -709,24 +714,42 @@ static int filter_pred_string(struct filter_pred *pred, void *event) + return match; + } + ++static __always_inline int filter_pchar(struct filter_pred *pred, char *str) ++{ ++ int cmp, match; ++ int len; ++ ++ len = strlen(str) + 1; /* including tailing '\0' */ ++ cmp = pred->regex.match(str, &pred->regex, len); ++ ++ match = cmp ^ pred->not; ++ ++ return match; ++} + /* Filter predicate for char * pointers */ + static int filter_pred_pchar(struct filter_pred *pred, void *event) + { + char **addr = (char **)(event + pred->offset); + char *str; +- int cmp, match; +- int len; + + str = test_string(*addr); + if (!str) + return 0; + +- len = strlen(str) + 1; /* including tailing '\0' */ +- cmp = pred->regex.match(str, &pred->regex, len); ++ return filter_pchar(pred, str); ++} + +- match = cmp ^ pred->not; ++/* Filter predicate for char * pointers in user space*/ ++static int filter_pred_pchar_user(struct filter_pred *pred, void *event) ++{ ++ char **addr = (char **)(event + pred->offset); ++ char *str; + +- return match; ++ str = test_ustring(*addr); ++ if (!str) ++ return 0; ++ ++ return filter_pchar(pred, str); + } + + /* +@@ -1206,6 +1229,7 @@ static int parse_pred(const char *str, void *data, + struct filter_pred *pred = NULL; + char num_buf[24]; /* Big enough to hold an address */ + char *field_name; ++ bool ustring = false; + char q; + u64 val; + int len; +@@ -1240,6 +1264,12 @@ static int parse_pred(const char *str, void *data, + return -EINVAL; + } + ++ /* See if the field is a user space string */ ++ if ((len = str_has_prefix(str + i, ".ustring"))) { ++ ustring = true; ++ i += len; ++ } ++ + while (isspace(str[i])) + i++; + +@@ -1377,7 +1407,10 @@ static int parse_pred(const char *str, void *data, + goto err_mem; + } + +- pred->fn = filter_pred_pchar; ++ if (ustring) ++ pred->fn = filter_pred_pchar_user; ++ else ++ pred->fn = filter_pred_pchar; + } + /* go past the last quote */ + i++; +-- +2.34.1 +