--- /dev/null
+From 9b93f78bc256080274ff351d54eb73fe892aadf6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 24 Jan 2022 17:17:54 +0900
+Subject: arm64: Mark start_backtrace() notrace and NOKPROBE_SYMBOL
+
+From: Masami Hiramatsu <mhiramat@kernel.org>
+
+[ Upstream commit 1e0924bd09916fab795fc2a21ec1d148f24299fd ]
+
+Mark the start_backtrace() as notrace and NOKPROBE_SYMBOL
+because this function is called from ftrace and lockdep to
+get the caller address via return_address(). The lockdep
+is used in kprobes, it should also be NOKPROBE_SYMBOL.
+
+Fixes: b07f3499661c ("arm64: stacktrace: Move start_backtrace() out of the header")
+Cc: <stable@vger.kernel.org> # 5.13.x
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Reviewed-by: Mark Brown <broonie@kernel.org>
+Link: https://lore.kernel.org/r/164301227374.1433152.12808232644267107415.stgit@devnote2
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/kernel/stacktrace.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
+index 94f83cd44e507..0ee6bd390bd09 100644
+--- a/arch/arm64/kernel/stacktrace.c
++++ b/arch/arm64/kernel/stacktrace.c
+@@ -33,7 +33,7 @@
+ */
+
+
+-void start_backtrace(struct stackframe *frame, unsigned long fp,
++notrace void start_backtrace(struct stackframe *frame, unsigned long fp,
+ unsigned long pc)
+ {
+ frame->fp = fp;
+@@ -55,6 +55,7 @@ void start_backtrace(struct stackframe *frame, unsigned long fp,
+ frame->prev_fp = 0;
+ frame->prev_type = STACK_TYPE_UNKNOWN;
+ }
++NOKPROBE_SYMBOL(start_backtrace);
+
+ /*
+ * Unwind from one frame record (A) to the next frame record (B).
+--
+2.34.1
+
--- /dev/null
+From da843dd5695cde66ea4bcce7142d469b2d8514d9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 19 Feb 2022 23:04:29 +0300
+Subject: ata: pata_hpt37x: fix PCI clock detection
+
+From: Sergey Shtylyov <s.shtylyov@omp.ru>
+
+[ Upstream commit 5f6b0f2d037c8864f20ff15311c695f65eb09db5 ]
+
+The f_CNT register (at the PCI config. address 0x78) is 16-bit, not
+8-bit! The bug was there from the very start... :-(
+
+Signed-off-by: Sergey Shtylyov <s.shtylyov@omp.ru>
+Fixes: 669a5db411d8 ("[libata] Add a bunch of PATA drivers.")
+Cc: stable@vger.kernel.org
+Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ata/pata_hpt37x.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c
+index ae8375e9d2681..9d371859e81ed 100644
+--- a/drivers/ata/pata_hpt37x.c
++++ b/drivers/ata/pata_hpt37x.c
+@@ -964,14 +964,14 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
+
+ if ((freq >> 12) != 0xABCDE) {
+ int i;
+- u8 sr;
++ u16 sr;
+ u32 total = 0;
+
+ pr_warn("BIOS has not set timing clocks\n");
+
+ /* This is the process the HPT371 BIOS is reported to use */
+ for (i = 0; i < 128; i++) {
+- pci_read_config_byte(dev, 0x78, &sr);
++ pci_read_config_word(dev, 0x78, &sr);
+ total += sr & 0x1FF;
+ udelay(15);
+ }
+--
+2.34.1
+
--- /dev/null
+From ba0f28782dfb657a17208d37a05a7187e3335be4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Feb 2022 17:53:56 +0800
+Subject: drm/amdgpu: check vm ready by amdgpu_vm->evicting flag
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Qiang Yu <qiang.yu@amd.com>
+
+[ Upstream commit c1a66c3bc425ff93774fb2f6eefa67b83170dd7e ]
+
+Workstation application ANSA/META v21.1.4 get this error dmesg when
+running CI test suite provided by ANSA/META:
+[drm:amdgpu_gem_va_ioctl [amdgpu]] *ERROR* Couldn't update BO_VA (-16)
+
+This is caused by:
+1. create a 256MB buffer in invisible VRAM
+2. CPU map the buffer and access it causes vm_fault and try to move
+ it to visible VRAM
+3. force visible VRAM space and traverse all VRAM bos to check if
+ evicting this bo is valuable
+4. when checking a VM bo (in invisible VRAM), amdgpu_vm_evictable()
+ will set amdgpu_vm->evicting, but latter due to not in visible
+ VRAM, won't really evict it so not add it to amdgpu_vm->evicted
+5. before next CS to clear the amdgpu_vm->evicting, user VM ops
+ ioctl will pass amdgpu_vm_ready() (check amdgpu_vm->evicted)
+ but fail in amdgpu_vm_bo_update_mapping() (check
+ amdgpu_vm->evicting) and get this error log
+
+This error won't affect functionality as next CS will finish the
+waiting VM ops. But we'd better clear the error log by checking
+the amdgpu_vm->evicting flag in amdgpu_vm_ready() to stop calling
+amdgpu_vm_bo_update_mapping() later.
+
+Another reason is amdgpu_vm->evicted list holds all BOs (both
+user buffer and page table), but only page table BOs' eviction
+prevent VM ops. amdgpu_vm->evicting flag is set only for page
+table BOs, so we should use evicting flag instead of evicted list
+in amdgpu_vm_ready().
+
+The side effect of this change is: previously blocked VM op (user
+buffer in "evicted" list but no page table in it) gets done
+immediately.
+
+v2: update commit comments.
+
+Acked-by: Paul Menzel <pmenzel@molgen.mpg.de>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Qiang Yu <qiang.yu@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+index 0e7dc23f78e7f..bc8d83698880c 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+@@ -768,11 +768,16 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ * Check if all VM PDs/PTs are ready for updates
+ *
+ * Returns:
+- * True if eviction list is empty.
++ * True if VM is not evicting.
+ */
+ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
+ {
+- return list_empty(&vm->evicted);
++ bool ret;
++
++ amdgpu_vm_eviction_lock(vm);
++ ret = !vm->evicting;
++ amdgpu_vm_eviction_unlock(vm);
++ return ret;
+ }
+
+ /**
+--
+2.34.1
+
--- /dev/null
+From 6d432547abd6645f7cc07a08f4eb84e73ac4c3f5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Nov 2021 22:02:37 +0900
+Subject: exfat: fix i_blocks for files truncated over 4 GiB
+
+From: Christophe Vu-Brugier <christophe.vu-brugier@seagate.com>
+
+[ Upstream commit 92fba084b79e6bc7b12fc118209f1922c1a2df56 ]
+
+In exfat_truncate(), the computation of inode->i_blocks is wrong if
+the file is larger than 4 GiB because a 32-bit variable is used as a
+mask. This is fixed and simplified by using round_up().
+
+Also fix the same buggy computation in exfat_read_root() and another
+(correct) one in exfat_fill_inode(). The latter was fixed another way
+last month but can be simplified by using round_up() as well. See:
+
+ commit 0c336d6e33f4 ("exfat: fix incorrect loading of i_blocks for
+ large files")
+
+Fixes: 98d917047e8b ("exfat: add file operations")
+Cc: stable@vger.kernel.org # v5.7+
+Suggested-by: Matthew Wilcox <willy@infradead.org>
+Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
+Signed-off-by: Christophe Vu-Brugier <christophe.vu-brugier@seagate.com>
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/exfat/file.c | 4 ++--
+ fs/exfat/inode.c | 4 ++--
+ fs/exfat/super.c | 4 ++--
+ 3 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/fs/exfat/file.c b/fs/exfat/file.c
+index 848166d6d5e9d..d890fd34bb2d0 100644
+--- a/fs/exfat/file.c
++++ b/fs/exfat/file.c
+@@ -251,8 +251,8 @@ void exfat_truncate(struct inode *inode, loff_t size)
+ else
+ mark_inode_dirty(inode);
+
+- inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) &
+- ~(sbi->cluster_size - 1)) >> inode->i_blkbits;
++ inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >>
++ inode->i_blkbits;
+ write_size:
+ aligned_size = i_size_read(inode);
+ if (aligned_size & (blocksize - 1)) {
+diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
+index aca2e64d045b6..72a0ccfb616c3 100644
+--- a/fs/exfat/inode.c
++++ b/fs/exfat/inode.c
+@@ -602,8 +602,8 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info)
+
+ exfat_save_attr(inode, info->attr);
+
+- inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) &
+- ~((loff_t)sbi->cluster_size - 1)) >> inode->i_blkbits;
++ inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >>
++ inode->i_blkbits;
+ inode->i_mtime = info->mtime;
+ inode->i_ctime = info->mtime;
+ ei->i_crtime = info->crtime;
+diff --git a/fs/exfat/super.c b/fs/exfat/super.c
+index 1a2115d73a48a..4b5d02b1df585 100644
+--- a/fs/exfat/super.c
++++ b/fs/exfat/super.c
+@@ -364,8 +364,8 @@ static int exfat_read_root(struct inode *inode)
+ inode->i_op = &exfat_dir_inode_operations;
+ inode->i_fop = &exfat_dir_operations;
+
+- inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1))
+- & ~(sbi->cluster_size - 1)) >> inode->i_blkbits;
++ inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >>
++ inode->i_blkbits;
+ ei->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff;
+ ei->i_size_aligned = i_size_read(inode);
+ ei->i_size_ondisk = i_size_read(inode);
+--
+2.34.1
+
--- /dev/null
+From 5dcaaecb0895d026a2954b4b5d82e8fe995f610c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Nov 2021 22:23:58 +0100
+Subject: exfat: reuse exfat_inode_info variable instead of calling EXFAT_I()
+
+From: Christophe Vu-Brugier <christophe.vu-brugier@seagate.com>
+
+[ Upstream commit 7dee6f57d7f22a89dd214518c778aec448270d4c ]
+
+Also add a local "struct exfat_inode_info *ei" variable to
+exfat_truncate() to simplify the code.
+
+Signed-off-by: Christophe Vu-Brugier <christophe.vu-brugier@seagate.com>
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/exfat/file.c | 14 +++++++-------
+ fs/exfat/inode.c | 9 ++++-----
+ fs/exfat/namei.c | 6 +++---
+ fs/exfat/super.c | 6 +++---
+ 4 files changed, 17 insertions(+), 18 deletions(-)
+
+diff --git a/fs/exfat/file.c b/fs/exfat/file.c
+index 6af0191b648f1..848166d6d5e9d 100644
+--- a/fs/exfat/file.c
++++ b/fs/exfat/file.c
+@@ -110,8 +110,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size)
+ exfat_set_volume_dirty(sb);
+
+ num_clusters_new = EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi);
+- num_clusters_phys =
+- EXFAT_B_TO_CLU_ROUND_UP(EXFAT_I(inode)->i_size_ondisk, sbi);
++ num_clusters_phys = EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi);
+
+ exfat_chain_set(&clu, ei->start_clu, num_clusters_phys, ei->flags);
+
+@@ -228,12 +227,13 @@ void exfat_truncate(struct inode *inode, loff_t size)
+ {
+ struct super_block *sb = inode->i_sb;
+ struct exfat_sb_info *sbi = EXFAT_SB(sb);
++ struct exfat_inode_info *ei = EXFAT_I(inode);
+ unsigned int blocksize = i_blocksize(inode);
+ loff_t aligned_size;
+ int err;
+
+ mutex_lock(&sbi->s_lock);
+- if (EXFAT_I(inode)->start_clu == 0) {
++ if (ei->start_clu == 0) {
+ /*
+ * Empty start_clu != ~0 (not allocated)
+ */
+@@ -260,11 +260,11 @@ void exfat_truncate(struct inode *inode, loff_t size)
+ aligned_size++;
+ }
+
+- if (EXFAT_I(inode)->i_size_ondisk > i_size_read(inode))
+- EXFAT_I(inode)->i_size_ondisk = aligned_size;
++ if (ei->i_size_ondisk > i_size_read(inode))
++ ei->i_size_ondisk = aligned_size;
+
+- if (EXFAT_I(inode)->i_size_aligned > i_size_read(inode))
+- EXFAT_I(inode)->i_size_aligned = aligned_size;
++ if (ei->i_size_aligned > i_size_read(inode))
++ ei->i_size_aligned = aligned_size;
+ mutex_unlock(&sbi->s_lock);
+ }
+
+diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
+index 1c7aa1ea4724c..aca2e64d045b6 100644
+--- a/fs/exfat/inode.c
++++ b/fs/exfat/inode.c
+@@ -114,10 +114,9 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset,
+ unsigned int local_clu_offset = clu_offset;
+ unsigned int num_to_be_allocated = 0, num_clusters = 0;
+
+- if (EXFAT_I(inode)->i_size_ondisk > 0)
++ if (ei->i_size_ondisk > 0)
+ num_clusters =
+- EXFAT_B_TO_CLU_ROUND_UP(EXFAT_I(inode)->i_size_ondisk,
+- sbi);
++ EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi);
+
+ if (clu_offset >= num_clusters)
+ num_to_be_allocated = clu_offset - num_clusters + 1;
+@@ -416,10 +415,10 @@ static int exfat_write_end(struct file *file, struct address_space *mapping,
+
+ err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
+
+- if (EXFAT_I(inode)->i_size_aligned < i_size_read(inode)) {
++ if (ei->i_size_aligned < i_size_read(inode)) {
+ exfat_fs_error(inode->i_sb,
+ "invalid size(size(%llu) > aligned(%llu)\n",
+- i_size_read(inode), EXFAT_I(inode)->i_size_aligned);
++ i_size_read(inode), ei->i_size_aligned);
+ return -EIO;
+ }
+
+diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
+index 24b41103d1cc0..9d8ada781250b 100644
+--- a/fs/exfat/namei.c
++++ b/fs/exfat/namei.c
+@@ -395,9 +395,9 @@ static int exfat_find_empty_entry(struct inode *inode,
+
+ /* directory inode should be updated in here */
+ i_size_write(inode, size);
+- EXFAT_I(inode)->i_size_ondisk += sbi->cluster_size;
+- EXFAT_I(inode)->i_size_aligned += sbi->cluster_size;
+- EXFAT_I(inode)->flags = p_dir->flags;
++ ei->i_size_ondisk += sbi->cluster_size;
++ ei->i_size_aligned += sbi->cluster_size;
++ ei->flags = p_dir->flags;
+ inode->i_blocks += 1 << sbi->sect_per_clus_bits;
+ }
+
+diff --git a/fs/exfat/super.c b/fs/exfat/super.c
+index 5539ffc20d164..1a2115d73a48a 100644
+--- a/fs/exfat/super.c
++++ b/fs/exfat/super.c
+@@ -366,9 +366,9 @@ static int exfat_read_root(struct inode *inode)
+
+ inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1))
+ & ~(sbi->cluster_size - 1)) >> inode->i_blkbits;
+- EXFAT_I(inode)->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff;
+- EXFAT_I(inode)->i_size_aligned = i_size_read(inode);
+- EXFAT_I(inode)->i_size_ondisk = i_size_read(inode);
++ ei->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff;
++ ei->i_size_aligned = i_size_read(inode);
++ ei->i_size_ondisk = i_size_read(inode);
+
+ exfat_save_attr(inode, ATTR_SUBDIR);
+ inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
+--
+2.34.1
+
--- /dev/null
+From f5b14d8a3321344787c8cff251bf27bb0935a0f2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Dec 2021 12:21:38 -0800
+Subject: ext4: drop ineligible txn start stop APIs
+
+From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
+
+[ Upstream commit 7bbbe241ec7ce0def9f71464c878fdbd2b0dcf37 ]
+
+This patch drops ext4_fc_start_ineligible() and
+ext4_fc_stop_ineligible() APIs. Fast commit ineligible transactions
+should simply call ext4_fc_mark_ineligible() after starting the
+trasaction.
+
+Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
+Link: https://lore.kernel.org/r/20211223202140.2061101-3-harshads@google.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/ext4.h | 6 ++--
+ fs/ext4/extents.c | 6 ++--
+ fs/ext4/fast_commit.c | 79 ++++++++-----------------------------------
+ fs/ext4/ioctl.c | 3 +-
+ fs/ext4/super.c | 1 -
+ 5 files changed, 20 insertions(+), 75 deletions(-)
+
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index d248a01132c3b..f80e4de726869 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1725,9 +1725,9 @@ struct ext4_sb_info {
+ */
+ struct work_struct s_error_work;
+
+- /* Ext4 fast commit stuff */
++ /* Ext4 fast commit sub transaction ID */
+ atomic_t s_fc_subtid;
+- atomic_t s_fc_ineligible_updates;
++
+ /*
+ * After commit starts, the main queue gets locked, and the further
+ * updates get added in the staging queue.
+@@ -2926,8 +2926,6 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
+ void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
+ void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
+ void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
+-void ext4_fc_start_ineligible(struct super_block *sb, int reason);
+-void ext4_fc_stop_ineligible(struct super_block *sb);
+ void ext4_fc_start_update(struct inode *inode);
+ void ext4_fc_stop_update(struct inode *inode);
+ void ext4_fc_del(struct inode *inode);
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index 9b37d16b24ffd..d3a8d704d8b4f 100644
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -5342,7 +5342,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
+ ret = PTR_ERR(handle);
+ goto out_mmap;
+ }
+- ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
+
+ down_write(&EXT4_I(inode)->i_data_sem);
+ ext4_discard_preallocations(inode, 0);
+@@ -5381,7 +5381,6 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
+
+ out_stop:
+ ext4_journal_stop(handle);
+- ext4_fc_stop_ineligible(sb);
+ out_mmap:
+ filemap_invalidate_unlock(mapping);
+ out_mutex:
+@@ -5483,7 +5482,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
+ ret = PTR_ERR(handle);
+ goto out_mmap;
+ }
+- ext4_fc_start_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
+
+ /* Expand file to avoid data loss if there is error while shifting */
+ inode->i_size += len;
+@@ -5558,7 +5557,6 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
+
+ out_stop:
+ ext4_journal_stop(handle);
+- ext4_fc_stop_ineligible(sb);
+ out_mmap:
+ filemap_invalidate_unlock(mapping);
+ out_mutex:
+diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
+index 3b79fb063c07a..48e522bb7bca4 100644
+--- a/fs/ext4/fast_commit.c
++++ b/fs/ext4/fast_commit.c
+@@ -65,21 +65,11 @@
+ *
+ * Fast Commit Ineligibility
+ * -------------------------
+- * Not all operations are supported by fast commits today (e.g extended
+- * attributes). Fast commit ineligibility is marked by calling one of the
+- * two following functions:
+- *
+- * - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall
+- * back to full commit. This is useful in case of transient errors.
+ *
+- * - ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - This makes all
+- * the fast commits happening between ext4_fc_start_ineligible() and
+- * ext4_fc_stop_ineligible() and one fast commit after the call to
+- * ext4_fc_stop_ineligible() to fall back to full commits. It is important to
+- * make one more fast commit to fall back to full commit after stop call so
+- * that it guaranteed that the fast commit ineligible operation contained
+- * within ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() is
+- * followed by at least 1 full commit.
++ * Not all operations are supported by fast commits today (e.g extended
++ * attributes). Fast commit ineligibility is marked by calling
++ * ext4_fc_mark_ineligible(): This makes next fast commit operation to fall back
++ * to full commit.
+ *
+ * Atomicity of commits
+ * --------------------
+@@ -328,44 +318,6 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
+ sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
+ }
+
+-/*
+- * Start a fast commit ineligible update. Any commits that happen while
+- * such an operation is in progress fall back to full commits.
+- */
+-void ext4_fc_start_ineligible(struct super_block *sb, int reason)
+-{
+- struct ext4_sb_info *sbi = EXT4_SB(sb);
+-
+- if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
+- (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
+- return;
+-
+- WARN_ON(reason >= EXT4_FC_REASON_MAX);
+- sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
+- atomic_inc(&sbi->s_fc_ineligible_updates);
+-}
+-
+-/*
+- * Stop a fast commit ineligible update. We set EXT4_MF_FC_INELIGIBLE flag here
+- * to ensure that after stopping the ineligible update, at least one full
+- * commit takes place.
+- */
+-void ext4_fc_stop_ineligible(struct super_block *sb)
+-{
+- if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
+- (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
+- return;
+-
+- ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+- atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates);
+-}
+-
+-static inline int ext4_fc_is_ineligible(struct super_block *sb)
+-{
+- return (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE) ||
+- atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates));
+-}
+-
+ /*
+ * Generic fast commit tracking function. If this is the first time this we are
+ * called after a full commit, we initialize fast commit fields and then call
+@@ -391,7 +343,7 @@ static int ext4_fc_track_template(
+ (sbi->s_mount_state & EXT4_FC_REPLAY))
+ return -EOPNOTSUPP;
+
+- if (ext4_fc_is_ineligible(inode->i_sb))
++ if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
+ return -EINVAL;
+
+ tid = handle->h_transaction->t_tid;
+@@ -1142,11 +1094,8 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
+
+ start_time = ktime_get();
+
+- if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
+- (ext4_fc_is_ineligible(sb))) {
+- reason = EXT4_FC_REASON_INELIGIBLE;
+- goto out;
+- }
++ if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
++ return jbd2_complete_transaction(journal, commit_tid);
+
+ restart_fc:
+ ret = jbd2_fc_begin_commit(journal, commit_tid);
+@@ -1162,6 +1111,14 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
+ reason = EXT4_FC_REASON_FC_START_FAILED;
+ goto out;
+ }
++ /*
++ * After establishing journal barrier via jbd2_fc_begin_commit(), check
++ * if we are fast commit ineligible.
++ */
++ if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) {
++ reason = EXT4_FC_REASON_INELIGIBLE;
++ goto out;
++ }
+
+ fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
+ ret = ext4_fc_perform_commit(journal);
+@@ -1180,12 +1137,6 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
+ atomic_inc(&sbi->s_fc_subtid);
+ jbd2_fc_end_commit(journal);
+ out:
+- /* Has any ineligible update happened since we started? */
+- if (reason == EXT4_FC_REASON_OK && ext4_fc_is_ineligible(sb)) {
+- sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
+- reason = EXT4_FC_REASON_INELIGIBLE;
+- }
+-
+ spin_lock(&sbi->s_fc_lock);
+ if (reason != EXT4_FC_REASON_OK &&
+ reason != EXT4_FC_REASON_ALREADY_COMMITTED) {
+diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
+index 220a4c8178b5e..fd70bebb14370 100644
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -169,7 +169,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
+ err = -EINVAL;
+ goto err_out;
+ }
+- ext4_fc_start_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT);
++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT);
+
+ /* Protect extent tree against block allocations via delalloc */
+ ext4_double_down_write_data_sem(inode, inode_bl);
+@@ -252,7 +252,6 @@ static long swap_inode_boot_loader(struct super_block *sb,
+
+ err_out1:
+ ext4_journal_stop(handle);
+- ext4_fc_stop_ineligible(sb);
+ ext4_double_up_write_data_sem(inode, inode_bl);
+
+ err_out:
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index 24a7ad80353b5..d304b72593d76 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -4620,7 +4620,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+
+ /* Initialize fast commit stuff */
+ atomic_set(&sbi->s_fc_subtid, 0);
+- atomic_set(&sbi->s_fc_ineligible_updates, 0);
+ INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]);
+ INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]);
+ INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
+--
+2.34.1
+
--- /dev/null
+From dc0cacc25bc9df3a8521ebf4875cac0c2248bbfb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jan 2022 17:36:55 +0800
+Subject: ext4: fast commit may miss file actions
+
+From: Xin Yin <yinxin.x@bytedance.com>
+
+[ Upstream commit bdc8a53a6f2f0b1cb5f991440f2100732299eb93 ]
+
+in the follow scenario:
+1. jbd start transaction n
+2. task A get new handle for transaction n+1
+3. task A do some actions and add inode to FC_Q_MAIN fc_q
+4. jbd complete transaction n and clear FC_Q_MAIN fc_q
+5. task A call fsync
+
+Fast commit will lost the file actions during a full commit.
+
+we should also add updates to staging queue during a full commit.
+and in ext4_fc_cleanup(), when reset a inode's fc track range, check
+it's i_sync_tid, if it bigger than current transaction tid, do not
+rest it, or we will lost the track range.
+
+And EXT4_MF_FC_COMMITTING is not needed anymore, so drop it.
+
+Signed-off-by: Xin Yin <yinxin.x@bytedance.com>
+Link: https://lore.kernel.org/r/20220117093655.35160-3-yinxin.x@bytedance.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/ext4.h | 5 +----
+ fs/ext4/fast_commit.c | 11 ++++++-----
+ fs/ext4/super.c | 1 -
+ 3 files changed, 7 insertions(+), 10 deletions(-)
+
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index 8b5015ea46199..c2cc9d78915b0 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1793,10 +1793,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
+ enum {
+ EXT4_MF_MNTDIR_SAMPLED,
+ EXT4_MF_FS_ABORTED, /* Fatal error detected */
+- EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */
+- EXT4_MF_FC_COMMITTING /* File system underoing a fast
+- * commit.
+- */
++ EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */
+ };
+
+ static inline void ext4_set_mount_flag(struct super_block *sb, int bit)
+diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
+index 0cdfc5003d91a..aca8414706346 100644
+--- a/fs/ext4/fast_commit.c
++++ b/fs/ext4/fast_commit.c
+@@ -377,7 +377,8 @@ static int ext4_fc_track_template(
+ spin_lock(&sbi->s_fc_lock);
+ if (list_empty(&EXT4_I(inode)->i_fc_list))
+ list_add_tail(&EXT4_I(inode)->i_fc_list,
+- (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ?
++ (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
++ sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ?
+ &sbi->s_fc_q[FC_Q_STAGING] :
+ &sbi->s_fc_q[FC_Q_MAIN]);
+ spin_unlock(&sbi->s_fc_lock);
+@@ -430,7 +431,8 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
+ node->fcd_name.len = dentry->d_name.len;
+
+ spin_lock(&sbi->s_fc_lock);
+- if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING))
++ if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
++ sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
+ list_add_tail(&node->fcd_list,
+ &sbi->s_fc_dentry_q[FC_Q_STAGING]);
+ else
+@@ -896,7 +898,6 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal)
+ int ret = 0;
+
+ spin_lock(&sbi->s_fc_lock);
+- ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);
+ list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
+ ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
+ while (atomic_read(&ei->i_fc_updates)) {
+@@ -1214,7 +1215,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
+ list_del_init(&iter->i_fc_list);
+ ext4_clear_inode_state(&iter->vfs_inode,
+ EXT4_STATE_FC_COMMITTING);
+- ext4_fc_reset_inode(&iter->vfs_inode);
++ if (iter->i_sync_tid <= tid)
++ ext4_fc_reset_inode(&iter->vfs_inode);
+ /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
+ smp_mb();
+ #if (BITS_PER_LONG < 64)
+@@ -1243,7 +1245,6 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
+ list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
+ &sbi->s_fc_q[FC_Q_MAIN]);
+
+- ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
+ if (tid >= sbi->s_fc_ineligible_tid) {
+ sbi->s_fc_ineligible_tid = 0;
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index 888b2db92924d..32ca34403dcec 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -4626,7 +4626,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+ INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
+ sbi->s_fc_bytes = 0;
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+- ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
+ sbi->s_fc_ineligible_tid = 0;
+ spin_lock_init(&sbi->s_fc_lock);
+ memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
+--
+2.34.1
+
--- /dev/null
+From 642f8c30b87603489a33051fd74a51c0a4447cf7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jan 2022 17:36:54 +0800
+Subject: ext4: fast commit may not fallback for ineligible commit
+
+From: Xin Yin <yinxin.x@bytedance.com>
+
+[ Upstream commit e85c81ba8859a4c839bcd69c5d83b32954133a5b ]
+
+For the follow scenario:
+1. jbd start commit transaction n
+2. task A get new handle for transaction n+1
+3. task A do some ineligible actions and mark FC_INELIGIBLE
+4. jbd complete transaction n and clean FC_INELIGIBLE
+5. task A call fsync
+
+In this case fast commit will not fallback to full commit and
+transaction n+1 also not handled by jbd.
+
+Make ext4_fc_mark_ineligible() also record transaction tid for
+latest ineligible case, when call ext4_fc_cleanup() check
+current transaction tid, if small than latest ineligible tid
+do not clear the EXT4_MF_FC_INELIGIBLE.
+
+Reported-by: kernel test robot <lkp@intel.com>
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reported-by: Ritesh Harjani <riteshh@linux.ibm.com>
+Suggested-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
+Signed-off-by: Xin Yin <yinxin.x@bytedance.com>
+Link: https://lore.kernel.org/r/20220117093655.35160-2-yinxin.x@bytedance.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/ext4.h | 3 ++-
+ fs/ext4/extents.c | 4 ++--
+ fs/ext4/fast_commit.c | 33 +++++++++++++++++++++++++--------
+ fs/ext4/inode.c | 4 ++--
+ fs/ext4/ioctl.c | 4 ++--
+ fs/ext4/namei.c | 4 ++--
+ fs/ext4/super.c | 1 +
+ fs/ext4/xattr.c | 6 +++---
+ fs/jbd2/commit.c | 2 +-
+ fs/jbd2/journal.c | 2 +-
+ include/linux/jbd2.h | 2 +-
+ 11 files changed, 42 insertions(+), 23 deletions(-)
+
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index 470fd3c2aef54..8b5015ea46199 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1747,6 +1747,7 @@ struct ext4_sb_info {
+ spinlock_t s_fc_lock;
+ struct buffer_head *s_fc_bh;
+ struct ext4_fc_stats s_fc_stats;
++ tid_t s_fc_ineligible_tid;
+ #ifdef CONFIG_EXT4_DEBUG
+ int s_fc_debug_max_replay;
+ #endif
+@@ -2924,7 +2925,7 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
+ struct dentry *dentry);
+ void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
+ void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
+-void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
++void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle);
+ void ext4_fc_start_update(struct inode *inode);
+ void ext4_fc_stop_update(struct inode *inode);
+ void ext4_fc_del(struct inode *inode);
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index d3a8d704d8b4f..d2667189be7e5 100644
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -5342,7 +5342,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
+ ret = PTR_ERR(handle);
+ goto out_mmap;
+ }
+- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
+
+ down_write(&EXT4_I(inode)->i_data_sem);
+ ext4_discard_preallocations(inode, 0);
+@@ -5482,7 +5482,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
+ ret = PTR_ERR(handle);
+ goto out_mmap;
+ }
+- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
+
+ /* Expand file to avoid data loss if there is error while shifting */
+ inode->i_size += len;
+diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
+index 1b935feec6f6b..0cdfc5003d91a 100644
+--- a/fs/ext4/fast_commit.c
++++ b/fs/ext4/fast_commit.c
+@@ -302,18 +302,32 @@ void ext4_fc_del(struct inode *inode)
+ }
+
+ /*
+- * Mark file system as fast commit ineligible. This means that next commit
+- * operation would result in a full jbd2 commit.
++ * Mark file system as fast commit ineligible, and record latest
++ * ineligible transaction tid. This means until the recorded
++ * transaction, commit operation would result in a full jbd2 commit.
+ */
+-void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
++void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
++ tid_t tid;
+
+ if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
+ (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
+ return;
+
+ ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
++ if (handle && !IS_ERR(handle))
++ tid = handle->h_transaction->t_tid;
++ else {
++ read_lock(&sbi->s_journal->j_state_lock);
++ tid = sbi->s_journal->j_running_transaction ?
++ sbi->s_journal->j_running_transaction->t_tid : 0;
++ read_unlock(&sbi->s_journal->j_state_lock);
++ }
++ spin_lock(&sbi->s_fc_lock);
++ if (sbi->s_fc_ineligible_tid < tid)
++ sbi->s_fc_ineligible_tid = tid;
++ spin_unlock(&sbi->s_fc_lock);
+ WARN_ON(reason >= EXT4_FC_REASON_MAX);
+ sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
+ }
+@@ -389,7 +403,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
+ mutex_unlock(&ei->i_fc_lock);
+ node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
+ if (!node) {
+- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
++ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
+ mutex_lock(&ei->i_fc_lock);
+ return -ENOMEM;
+ }
+@@ -402,7 +416,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
+ if (!node->fcd_name.name) {
+ kmem_cache_free(ext4_fc_dentry_cachep, node);
+ ext4_fc_mark_ineligible(inode->i_sb,
+- EXT4_FC_REASON_NOMEM);
++ EXT4_FC_REASON_NOMEM, NULL);
+ mutex_lock(&ei->i_fc_lock);
+ return -ENOMEM;
+ }
+@@ -504,7 +518,7 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
+
+ if (ext4_should_journal_data(inode)) {
+ ext4_fc_mark_ineligible(inode->i_sb,
+- EXT4_FC_REASON_INODE_JOURNAL_DATA);
++ EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
+ return;
+ }
+
+@@ -1182,7 +1196,7 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
+ * Fast commit cleanup routine. This is called after every fast commit and
+ * full commit. full is true if we are called after a full commit.
+ */
+-static void ext4_fc_cleanup(journal_t *journal, int full)
++static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
+ {
+ struct super_block *sb = journal->j_private;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+@@ -1230,7 +1244,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
+ &sbi->s_fc_q[FC_Q_MAIN]);
+
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
+- ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
++ if (tid >= sbi->s_fc_ineligible_tid) {
++ sbi->s_fc_ineligible_tid = 0;
++ ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
++ }
+
+ if (full)
+ sbi->s_fc_bytes = 0;
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 3bdfe010e17f9..2f5686dfa30d5 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -337,7 +337,7 @@ void ext4_evict_inode(struct inode *inode)
+ return;
+ no_delete:
+ if (!list_empty(&EXT4_I(inode)->i_fc_list))
+- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
++ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
+ ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
+ }
+
+@@ -5983,7 +5983,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
+ return PTR_ERR(handle);
+
+ ext4_fc_mark_ineligible(inode->i_sb,
+- EXT4_FC_REASON_JOURNAL_FLAG_CHANGE);
++ EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, handle);
+ err = ext4_mark_inode_dirty(handle, inode);
+ ext4_handle_sync(handle);
+ ext4_journal_stop(handle);
+diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
+index fd70bebb14370..f61b59045c6d3 100644
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -169,7 +169,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
+ err = -EINVAL;
+ goto err_out;
+ }
+- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT);
++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT, handle);
+
+ /* Protect extent tree against block allocations via delalloc */
+ ext4_double_down_write_data_sem(inode, inode_bl);
+@@ -1075,7 +1075,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+
+ err = ext4_resize_fs(sb, n_blocks_count);
+ if (EXT4_SB(sb)->s_journal) {
+- ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE);
++ ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE, NULL);
+ jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
+ err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
+ jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
+index 52c9bd154122a..47b9f87dbc6f7 100644
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -3889,7 +3889,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ * dirents in directories.
+ */
+ ext4_fc_mark_ineligible(old.inode->i_sb,
+- EXT4_FC_REASON_RENAME_DIR);
++ EXT4_FC_REASON_RENAME_DIR, handle);
+ } else {
+ if (new.inode)
+ ext4_fc_track_unlink(handle, new.dentry);
+@@ -4049,7 +4049,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
+ if (unlikely(retval))
+ goto end_rename;
+ ext4_fc_mark_ineligible(new.inode->i_sb,
+- EXT4_FC_REASON_CROSS_RENAME);
++ EXT4_FC_REASON_CROSS_RENAME, handle);
+ if (old.dir_bh) {
+ retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
+ if (retval)
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index d304b72593d76..888b2db92924d 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -4627,6 +4627,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+ sbi->s_fc_bytes = 0;
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
++ sbi->s_fc_ineligible_tid = 0;
+ spin_lock_init(&sbi->s_fc_lock);
+ memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
+ sbi->s_fc_replay_state.fc_regions = NULL;
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 1e0fc1ed845bf..0423253490986 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -2408,7 +2408,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
+ if (IS_SYNC(inode))
+ ext4_handle_sync(handle);
+ }
+- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
++ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
+
+ cleanup:
+ brelse(is.iloc.bh);
+@@ -2486,7 +2486,7 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
+ if (error == 0)
+ error = error2;
+ }
+- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
++ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, NULL);
+
+ return error;
+ }
+@@ -2920,7 +2920,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
+ error);
+ goto cleanup;
+ }
+- ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
++ ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
+ }
+ error = 0;
+ cleanup:
+diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
+index 3cc4ab2ba7f4f..d188fa913a075 100644
+--- a/fs/jbd2/commit.c
++++ b/fs/jbd2/commit.c
+@@ -1170,7 +1170,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
+ if (journal->j_commit_callback)
+ journal->j_commit_callback(journal, commit_transaction);
+ if (journal->j_fc_cleanup_callback)
+- journal->j_fc_cleanup_callback(journal, 1);
++ journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid);
+
+ trace_jbd2_end_commit(journal, commit_transaction);
+ jbd_debug(1, "JBD2: commit %d complete, head %d\n",
+diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
+index bd9ac98916043..1f8493ef181d6 100644
+--- a/fs/jbd2/journal.c
++++ b/fs/jbd2/journal.c
+@@ -769,7 +769,7 @@ EXPORT_SYMBOL(jbd2_fc_begin_commit);
+ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
+ {
+ if (journal->j_fc_cleanup_callback)
+- journal->j_fc_cleanup_callback(journal, 0);
++ journal->j_fc_cleanup_callback(journal, 0, tid);
+ write_lock(&journal->j_state_lock);
+ journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
+ if (fallback)
+diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
+index fd933c45281af..d63b8106796e2 100644
+--- a/include/linux/jbd2.h
++++ b/include/linux/jbd2.h
+@@ -1295,7 +1295,7 @@ struct journal_s
+ * Clean-up after fast commit or full commit. JBD2 calls this function
+ * after every commit operation.
+ */
+- void (*j_fc_cleanup_callback)(struct journal_s *journal, int);
++ void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid);
+
+ /**
+ * @j_fc_replay_callback:
+--
+2.34.1
+
--- /dev/null
+From d29ecbeb7189008f18506200809e12a2f7fe49f4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Dec 2021 12:21:39 -0800
+Subject: ext4: simplify updating of fast commit stats
+
+From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
+
+[ Upstream commit 0915e464cb274648e1ef1663e1356e53ff400983 ]
+
+Move fast commit stats updating logic to a separate function from
+ext4_fc_commit(). This significantly improves readability of
+ext4_fc_commit().
+
+Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
+Link: https://lore.kernel.org/r/20211223202140.2061101-4-harshads@google.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/ext4.h | 1 -
+ fs/ext4/fast_commit.c | 99 +++++++++++++++++++++++--------------------
+ fs/ext4/fast_commit.h | 27 ++++++------
+ 3 files changed, 68 insertions(+), 59 deletions(-)
+
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index f80e4de726869..470fd3c2aef54 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1747,7 +1747,6 @@ struct ext4_sb_info {
+ spinlock_t s_fc_lock;
+ struct buffer_head *s_fc_bh;
+ struct ext4_fc_stats s_fc_stats;
+- u64 s_fc_avg_commit_time;
+ #ifdef CONFIG_EXT4_DEBUG
+ int s_fc_debug_max_replay;
+ #endif
+diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
+index 48e522bb7bca4..1b935feec6f6b 100644
+--- a/fs/ext4/fast_commit.c
++++ b/fs/ext4/fast_commit.c
+@@ -1075,6 +1075,32 @@ static int ext4_fc_perform_commit(journal_t *journal)
+ return ret;
+ }
+
++static void ext4_fc_update_stats(struct super_block *sb, int status,
++ u64 commit_time, int nblks)
++{
++ struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats;
++
++ jbd_debug(1, "Fast commit ended with status = %d", status);
++ if (status == EXT4_FC_STATUS_OK) {
++ stats->fc_num_commits++;
++ stats->fc_numblks += nblks;
++ if (likely(stats->s_fc_avg_commit_time))
++ stats->s_fc_avg_commit_time =
++ (commit_time +
++ stats->s_fc_avg_commit_time * 3) / 4;
++ else
++ stats->s_fc_avg_commit_time = commit_time;
++ } else if (status == EXT4_FC_STATUS_FAILED ||
++ status == EXT4_FC_STATUS_INELIGIBLE) {
++ if (status == EXT4_FC_STATUS_FAILED)
++ stats->fc_failed_commits++;
++ stats->fc_ineligible_commits++;
++ } else {
++ stats->fc_skipped_commits++;
++ }
++ trace_ext4_fc_commit_stop(sb, nblks, status);
++}
++
+ /*
+ * The main commit entry point. Performs a fast commit for transaction
+ * commit_tid if needed. If it's not possible to perform a fast commit
+@@ -1087,7 +1113,7 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int nblks = 0, ret, bsize = journal->j_blocksize;
+ int subtid = atomic_read(&sbi->s_fc_subtid);
+- int reason = EXT4_FC_REASON_OK, fc_bufs_before = 0;
++ int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0;
+ ktime_t start_time, commit_time;
+
+ trace_ext4_fc_commit_start(sb);
+@@ -1104,69 +1130,52 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
+ if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
+ commit_tid > journal->j_commit_sequence)
+ goto restart_fc;
+- reason = EXT4_FC_REASON_ALREADY_COMMITTED;
+- goto out;
++ ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0);
++ return 0;
+ } else if (ret) {
+- sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
+- reason = EXT4_FC_REASON_FC_START_FAILED;
+- goto out;
++ /*
++ * Commit couldn't start. Just update stats and perform a
++ * full commit.
++ */
++ ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0);
++ return jbd2_complete_transaction(journal, commit_tid);
+ }
++
+ /*
+ * After establishing journal barrier via jbd2_fc_begin_commit(), check
+ * if we are fast commit ineligible.
+ */
+ if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) {
+- reason = EXT4_FC_REASON_INELIGIBLE;
+- goto out;
++ status = EXT4_FC_STATUS_INELIGIBLE;
++ goto fallback;
+ }
+
+ fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
+ ret = ext4_fc_perform_commit(journal);
+ if (ret < 0) {
+- sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
+- reason = EXT4_FC_REASON_FC_FAILED;
+- goto out;
++ status = EXT4_FC_STATUS_FAILED;
++ goto fallback;
+ }
+ nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
+ ret = jbd2_fc_wait_bufs(journal, nblks);
+ if (ret < 0) {
+- sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
+- reason = EXT4_FC_REASON_FC_FAILED;
+- goto out;
++ status = EXT4_FC_STATUS_FAILED;
++ goto fallback;
+ }
+ atomic_inc(&sbi->s_fc_subtid);
+- jbd2_fc_end_commit(journal);
+-out:
+- spin_lock(&sbi->s_fc_lock);
+- if (reason != EXT4_FC_REASON_OK &&
+- reason != EXT4_FC_REASON_ALREADY_COMMITTED) {
+- sbi->s_fc_stats.fc_ineligible_commits++;
+- } else {
+- sbi->s_fc_stats.fc_num_commits++;
+- sbi->s_fc_stats.fc_numblks += nblks;
+- }
+- spin_unlock(&sbi->s_fc_lock);
+- nblks = (reason == EXT4_FC_REASON_OK) ? nblks : 0;
+- trace_ext4_fc_commit_stop(sb, nblks, reason);
+- commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
++ ret = jbd2_fc_end_commit(journal);
+ /*
+- * weight the commit time higher than the average time so we don't
+- * react too strongly to vast changes in the commit time
++ * weight the commit time higher than the average time so we
++ * don't react too strongly to vast changes in the commit time
+ */
+- if (likely(sbi->s_fc_avg_commit_time))
+- sbi->s_fc_avg_commit_time = (commit_time +
+- sbi->s_fc_avg_commit_time * 3) / 4;
+- else
+- sbi->s_fc_avg_commit_time = commit_time;
+- jbd_debug(1,
+- "Fast commit ended with blks = %d, reason = %d, subtid - %d",
+- nblks, reason, subtid);
+- if (reason == EXT4_FC_REASON_FC_FAILED)
+- return jbd2_fc_end_commit_fallback(journal);
+- if (reason == EXT4_FC_REASON_FC_START_FAILED ||
+- reason == EXT4_FC_REASON_INELIGIBLE)
+- return jbd2_complete_transaction(journal, commit_tid);
+- return 0;
++ commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
++ ext4_fc_update_stats(sb, status, commit_time, nblks);
++ return ret;
++
++fallback:
++ ret = jbd2_fc_end_commit_fallback(journal);
++ ext4_fc_update_stats(sb, status, 0, 0);
++ return ret;
+ }
+
+ /*
+@@ -2132,7 +2141,7 @@ int ext4_fc_info_show(struct seq_file *seq, void *v)
+ "fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n",
+ stats->fc_num_commits, stats->fc_ineligible_commits,
+ stats->fc_numblks,
+- div_u64(sbi->s_fc_avg_commit_time, 1000));
++ div_u64(stats->s_fc_avg_commit_time, 1000));
+ seq_puts(seq, "Ineligible reasons:\n");
+ for (i = 0; i < EXT4_FC_REASON_MAX; i++)
+ seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i],
+diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h
+index 937c381b4c85e..083ad1cb705a7 100644
+--- a/fs/ext4/fast_commit.h
++++ b/fs/ext4/fast_commit.h
+@@ -71,21 +71,19 @@ struct ext4_fc_tail {
+ };
+
+ /*
+- * Fast commit reason codes
++ * Fast commit status codes
++ */
++enum {
++ EXT4_FC_STATUS_OK = 0,
++ EXT4_FC_STATUS_INELIGIBLE,
++ EXT4_FC_STATUS_SKIPPED,
++ EXT4_FC_STATUS_FAILED,
++};
++
++/*
++ * Fast commit ineligiblity reasons:
+ */
+ enum {
+- /*
+- * Commit status codes:
+- */
+- EXT4_FC_REASON_OK = 0,
+- EXT4_FC_REASON_INELIGIBLE,
+- EXT4_FC_REASON_ALREADY_COMMITTED,
+- EXT4_FC_REASON_FC_START_FAILED,
+- EXT4_FC_REASON_FC_FAILED,
+-
+- /*
+- * Fast commit ineligiblity reasons:
+- */
+ EXT4_FC_REASON_XATTR = 0,
+ EXT4_FC_REASON_CROSS_RENAME,
+ EXT4_FC_REASON_JOURNAL_FLAG_CHANGE,
+@@ -117,7 +115,10 @@ struct ext4_fc_stats {
+ unsigned int fc_ineligible_reason_count[EXT4_FC_REASON_MAX];
+ unsigned long fc_num_commits;
+ unsigned long fc_ineligible_commits;
++ unsigned long fc_failed_commits;
++ unsigned long fc_skipped_commits;
+ unsigned long fc_numblks;
++ u64 s_fc_avg_commit_time;
+ };
+
+ #define EXT4_FC_REPLAY_REALLOC_INCREMENT 4
+--
+2.34.1
+
--- /dev/null
+From d524916e6a467112345767ffd5187488dfd6277c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 3 Mar 2022 09:37:28 -0800
+Subject: ipv6: fix skb drops in igmp6_event_query() and igmp6_event_report()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 2d3916f3189172d5c69d33065c3c21119fe539fc ]
+
+While investigating on why a synchronize_net() has been added recently
+in ipv6_mc_down(), I found that igmp6_event_query() and igmp6_event_report()
+might drop skbs in some cases.
+
+Discussion about removing synchronize_net() from ipv6_mc_down()
+will happen in a different thread.
+
+Fixes: f185de28d9ae ("mld: add new workqueues for process mld events")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Taehee Yoo <ap420073@gmail.com>
+Cc: Cong Wang <xiyou.wangcong@gmail.com>
+Cc: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20220303173728.937869-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/ndisc.h | 4 ++--
+ net/ipv6/mcast.c | 32 ++++++++++++--------------------
+ 2 files changed, 14 insertions(+), 22 deletions(-)
+
+diff --git a/include/net/ndisc.h b/include/net/ndisc.h
+index 04341d86585de..5e37e58586796 100644
+--- a/include/net/ndisc.h
++++ b/include/net/ndisc.h
+@@ -487,9 +487,9 @@ int igmp6_late_init(void);
+ void igmp6_cleanup(void);
+ void igmp6_late_cleanup(void);
+
+-int igmp6_event_query(struct sk_buff *skb);
++void igmp6_event_query(struct sk_buff *skb);
+
+-int igmp6_event_report(struct sk_buff *skb);
++void igmp6_event_report(struct sk_buff *skb);
+
+
+ #ifdef CONFIG_SYSCTL
+diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
+index a8861db52c187..909f937befd71 100644
+--- a/net/ipv6/mcast.c
++++ b/net/ipv6/mcast.c
+@@ -1371,27 +1371,23 @@ static void mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
+ }
+
+ /* called with rcu_read_lock() */
+-int igmp6_event_query(struct sk_buff *skb)
++void igmp6_event_query(struct sk_buff *skb)
+ {
+ struct inet6_dev *idev = __in6_dev_get(skb->dev);
+
+- if (!idev)
+- return -EINVAL;
+-
+- if (idev->dead) {
+- kfree_skb(skb);
+- return -ENODEV;
+- }
++ if (!idev || idev->dead)
++ goto out;
+
+ spin_lock_bh(&idev->mc_query_lock);
+ if (skb_queue_len(&idev->mc_query_queue) < MLD_MAX_SKBS) {
+ __skb_queue_tail(&idev->mc_query_queue, skb);
+ if (!mod_delayed_work(mld_wq, &idev->mc_query_work, 0))
+ in6_dev_hold(idev);
++ skb = NULL;
+ }
+ spin_unlock_bh(&idev->mc_query_lock);
+-
+- return 0;
++out:
++ kfree_skb(skb);
+ }
+
+ static void __mld_query_work(struct sk_buff *skb)
+@@ -1542,27 +1538,23 @@ static void mld_query_work(struct work_struct *work)
+ }
+
+ /* called with rcu_read_lock() */
+-int igmp6_event_report(struct sk_buff *skb)
++void igmp6_event_report(struct sk_buff *skb)
+ {
+ struct inet6_dev *idev = __in6_dev_get(skb->dev);
+
+- if (!idev)
+- return -EINVAL;
+-
+- if (idev->dead) {
+- kfree_skb(skb);
+- return -ENODEV;
+- }
++ if (!idev || idev->dead)
++ goto out;
+
+ spin_lock_bh(&idev->mc_report_lock);
+ if (skb_queue_len(&idev->mc_report_queue) < MLD_MAX_SKBS) {
+ __skb_queue_tail(&idev->mc_report_queue, skb);
+ if (!mod_delayed_work(mld_wq, &idev->mc_report_work, 0))
+ in6_dev_hold(idev);
++ skb = NULL;
+ }
+ spin_unlock_bh(&idev->mc_report_lock);
+-
+- return 0;
++out:
++ kfree_skb(skb);
+ }
+
+ static void __mld_report_work(struct sk_buff *skb)
+--
+2.34.1
+
--- /dev/null
+From 248f6fc5c8c2cbdfaa4d22847ba276c1ea01f5bb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Jan 2022 12:20:52 +0000
+Subject: KVM: arm64: Workaround Cortex-A510's single-step and PAC trap errata
+
+From: James Morse <james.morse@arm.com>
+
+[ Upstream commit 1dd498e5e26ad71e3e9130daf72cfb6a693fee03 ]
+
+Cortex-A510's erratum #2077057 causes SPSR_EL2 to be corrupted when
+single-stepping authenticated ERET instructions. A single step is
+expected, but a pointer authentication trap is taken instead. The
+erratum causes SPSR_EL1 to be copied to SPSR_EL2, which could allow
+EL1 to cause a return to EL2 with a guest controlled ELR_EL2.
+
+Because the conditions require an ERET into active-not-pending state,
+this is only a problem for the EL2 when EL2 is stepping EL1. In this case
+the previous SPSR_EL2 value is preserved in struct kvm_vcpu, and can be
+restored.
+
+Cc: stable@vger.kernel.org # 53960faf2b73: arm64: Add Cortex-A510 CPU part definition
+Cc: stable@vger.kernel.org
+Signed-off-by: James Morse <james.morse@arm.com>
+[maz: fixup cpucaps ordering]
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20220127122052.1584324-5-james.morse@arm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/arm64/silicon-errata.rst | 2 ++
+ arch/arm64/Kconfig | 16 ++++++++++++++++
+ arch/arm64/kernel/cpu_errata.c | 8 ++++++++
+ arch/arm64/kvm/hyp/include/hyp/switch.h | 20 +++++++++++++++++++-
+ arch/arm64/tools/cpucaps | 5 +++--
+ 5 files changed, 48 insertions(+), 3 deletions(-)
+
+diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
+index 0ec7b7f1524b1..ea281dd755171 100644
+--- a/Documentation/arm64/silicon-errata.rst
++++ b/Documentation/arm64/silicon-errata.rst
+@@ -100,6 +100,8 @@ stable kernels.
+ +----------------+-----------------+-----------------+-----------------------------+
+ | ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 |
+ +----------------+-----------------+-----------------+-----------------------------+
++| ARM | Cortex-A510 | #2077057 | ARM64_ERRATUM_2077057 |
+++----------------+-----------------+-----------------+-----------------------------+
+ | ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 |
+ +----------------+-----------------+-----------------+-----------------------------+
+ | ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 |
+diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
+index ae0e93871ee5f..651bf217465e9 100644
+--- a/arch/arm64/Kconfig
++++ b/arch/arm64/Kconfig
+@@ -681,6 +681,22 @@ config ARM64_ERRATUM_2051678
+
+ If unsure, say Y.
+
++config ARM64_ERRATUM_2077057
++ bool "Cortex-A510: 2077057: workaround software-step corrupting SPSR_EL2"
++ help
++ This option adds the workaround for ARM Cortex-A510 erratum 2077057.
++ Affected Cortex-A510 may corrupt SPSR_EL2 when the a step exception is
++ expected, but a Pointer Authentication trap is taken instead. The
++ erratum causes SPSR_EL1 to be copied to SPSR_EL2, which could allow
++ EL1 to cause a return to EL2 with a guest controlled ELR_EL2.
++
++ This can only happen when EL2 is stepping EL1.
++
++ When these conditions occur, the SPSR_EL2 value is unchanged from the
++ previous guest entry, and can be restored from the in-memory copy.
++
++ If unsure, say Y.
++
+ config ARM64_ERRATUM_2119858
+ bool "Cortex-A710/X2: 2119858: workaround TRBE overwriting trace data in FILL mode"
+ default y
+diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
+index 066098198c248..b217941713a8d 100644
+--- a/arch/arm64/kernel/cpu_errata.c
++++ b/arch/arm64/kernel/cpu_errata.c
+@@ -600,6 +600,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
+ CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus),
+ },
+ #endif
++#ifdef CONFIG_ARM64_ERRATUM_2077057
++ {
++ .desc = "ARM erratum 2077057",
++ .capability = ARM64_WORKAROUND_2077057,
++ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
++ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2),
++ },
++#endif
+ #ifdef CONFIG_ARM64_ERRATUM_2064142
+ {
+ .desc = "ARM erratum 2064142",
+diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
+index adb67f8c9d7d3..3ae9c0b944878 100644
+--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
++++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
+@@ -424,6 +424,24 @@ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+ return false;
+ }
+
++static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code)
++{
++ /*
++ * Check for the conditions of Cortex-A510's #2077057. When these occur
++ * SPSR_EL2 can't be trusted, but isn't needed either as it is
++ * unchanged from the value in vcpu_gp_regs(vcpu)->pstate.
++ * Are we single-stepping the guest, and took a PAC exception from the
++ * active-not-pending state?
++ */
++ if (cpus_have_final_cap(ARM64_WORKAROUND_2077057) &&
++ vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
++ *vcpu_cpsr(vcpu) & DBG_SPSR_SS &&
++ ESR_ELx_EC(read_sysreg_el2(SYS_ESR)) == ESR_ELx_EC_PAC)
++ write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
++
++ vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
++}
++
+ /*
+ * Return true when we were able to fixup the guest exit and should return to
+ * the guest, false when we should restore the host state and return to the
+@@ -435,7 +453,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+ * Save PSTATE early so that we can evaluate the vcpu mode
+ * early on.
+ */
+- vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
++ synchronize_vcpu_pstate(vcpu, exit_code);
+
+ /*
+ * Check whether we want to repaint the state one way or
+diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
+index e7719e8f18def..9c65b1e25a965 100644
+--- a/arch/arm64/tools/cpucaps
++++ b/arch/arm64/tools/cpucaps
+@@ -55,9 +55,10 @@ WORKAROUND_1418040
+ WORKAROUND_1463225
+ WORKAROUND_1508412
+ WORKAROUND_1542419
+-WORKAROUND_2064142
+-WORKAROUND_2038923
+ WORKAROUND_1902691
++WORKAROUND_2038923
++WORKAROUND_2064142
++WORKAROUND_2077057
+ WORKAROUND_TRBE_OVERWRITE_FILL_MODE
+ WORKAROUND_TSB_FLUSH_FAILURE
+ WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
+--
+2.34.1
+
--- /dev/null
+From 768f4b6c9d9178cefd0355fde27e1d8aa3d3f89a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Feb 2022 21:29:51 +0000
+Subject: KVM: x86: Add KVM_CAP_ENABLE_CAP to x86
+
+From: Aaron Lewis <aaronlewis@google.com>
+
+[ Upstream commit 127770ac0d043435375ab86434f31a93efa88215 ]
+
+Follow the precedent set by other architectures that support the VCPU
+ioctl, KVM_ENABLE_CAP, and advertise the VM extension, KVM_CAP_ENABLE_CAP.
+This way, userspace can ensure that KVM_ENABLE_CAP is available on a
+vcpu before using it.
+
+Fixes: 5c919412fe61 ("kvm/x86: Hyper-V synthetic interrupt controller")
+Signed-off-by: Aaron Lewis <aaronlewis@google.com>
+Message-Id: <20220214212950.1776943-1-aaronlewis@google.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/virt/kvm/api.rst | 2 +-
+ arch/x86/kvm/x86.c | 1 +
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
+index aeeb071c76881..9df9eadaeb5c2 100644
+--- a/Documentation/virt/kvm/api.rst
++++ b/Documentation/virt/kvm/api.rst
+@@ -1391,7 +1391,7 @@ documentation when it pops into existence).
+ -------------------
+
+ :Capability: KVM_CAP_ENABLE_CAP
+-:Architectures: mips, ppc, s390
++:Architectures: mips, ppc, s390, x86
+ :Type: vcpu ioctl
+ :Parameters: struct kvm_enable_cap (in)
+ :Returns: 0 on success; -1 on error
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 0714fa0e7ede0..c6eb3e45e3d80 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -4163,6 +4163,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
+ case KVM_CAP_SREGS2:
+ case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
+ case KVM_CAP_VCPU_ATTRIBUTES:
++ case KVM_CAP_ENABLE_CAP:
+ r = 1;
+ break;
+ case KVM_CAP_EXIT_HYPERCALL:
+--
+2.34.1
+
--- /dev/null
+From 9f407ba6c655408e85afec84fdd872507b6a2954 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 3 Feb 2022 08:18:46 -0800
+Subject: sched/fair: Fix fault in reweight_entity
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tadeusz Struk <tadeusz.struk@linaro.org>
+
+[ Upstream commit 13765de8148f71fa795e0a6607de37c49ea5915a ]
+
+Syzbot found a GPF in reweight_entity. This has been bisected to
+commit 4ef0c5c6b5ba ("kernel/sched: Fix sched_fork() access an invalid
+sched_task_group")
+
+There is a race between sched_post_fork() and setpriority(PRIO_PGRP)
+within a thread group that causes a null-ptr-deref in
+reweight_entity() in CFS. The scenario is that the main process spawns
+number of new threads, which then call setpriority(PRIO_PGRP, 0, -20),
+wait, and exit. For each of the new threads the copy_process() gets
+invoked, which adds the new task_struct and calls sched_post_fork()
+for it.
+
+In the above scenario there is a possibility that
+setpriority(PRIO_PGRP) and set_one_prio() will be called for a thread
+in the group that is just being created by copy_process(), and for
+which the sched_post_fork() has not been executed yet. This will
+trigger a null pointer dereference in reweight_entity(), as it will
+try to access the run queue pointer, which hasn't been set.
+
+Before the mentioned change the cfs_rq pointer for the task has been
+set in sched_fork(), which is called much earlier in copy_process(),
+before the new task is added to the thread_group. Now it is done in
+the sched_post_fork(), which is called after that. To fix the issue
+the remove the update_load param from the update_load param() function
+and call reweight_task() only if the task flag doesn't have the
+TASK_NEW flag set.
+
+Fixes: 4ef0c5c6b5ba ("kernel/sched: Fix sched_fork() access an invalid sched_task_group")
+Reported-by: syzbot+af7a719bc92395ee41b3@syzkaller.appspotmail.com
+Signed-off-by: Tadeusz Struk <tadeusz.struk@linaro.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20220203161846.1160750-1-tadeusz.struk@linaro.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/core.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index d24823b3c3f9f..35b256b789680 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1203,8 +1203,9 @@ int tg_nop(struct task_group *tg, void *data)
+ }
+ #endif
+
+-static void set_load_weight(struct task_struct *p, bool update_load)
++static void set_load_weight(struct task_struct *p)
+ {
++ bool update_load = !(READ_ONCE(p->__state) & TASK_NEW);
+ int prio = p->static_prio - MAX_RT_PRIO;
+ struct load_weight *load = &p->se.load;
+
+@@ -4392,7 +4393,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
+ p->static_prio = NICE_TO_PRIO(0);
+
+ p->prio = p->normal_prio = p->static_prio;
+- set_load_weight(p, false);
++ set_load_weight(p);
+
+ /*
+ * We don't need the reset flag anymore after the fork. It has
+@@ -6879,7 +6880,7 @@ void set_user_nice(struct task_struct *p, long nice)
+ put_prev_task(rq, p);
+
+ p->static_prio = NICE_TO_PRIO(nice);
+- set_load_weight(p, true);
++ set_load_weight(p);
+ old_prio = p->prio;
+ p->prio = effective_prio(p);
+
+@@ -7170,7 +7171,7 @@ static void __setscheduler_params(struct task_struct *p,
+ */
+ p->rt_priority = attr->sched_priority;
+ p->normal_prio = normal_prio(p);
+- set_load_weight(p, true);
++ set_load_weight(p);
+ }
+
+ /*
+@@ -9409,7 +9410,7 @@ void __init sched_init(void)
+ #endif
+ }
+
+- set_load_weight(&init_task, false);
++ set_load_weight(&init_task);
+
+ /*
+ * The boot idle thread does lazy MMU switching as well:
+--
+2.34.1
+
--- /dev/null
+From 36807348826799579605d1c23d7af6b7b253d0fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Jan 2022 17:44:40 +0100
+Subject: serial: stm32: prevent TDR register overwrite when sending x_char
+
+From: Valentin Caron <valentin.caron@foss.st.com>
+
+[ Upstream commit d3d079bde07e1b7deaeb57506dc0b86010121d17 ]
+
+When sending x_char in stm32_usart_transmit_chars(), driver can overwrite
+the value of TDR register by the value of x_char. If this happens, the
+previous value that was present in TDR register will not be sent through
+uart.
+
+This code checks if the previous value in TDR register is sent before
+writing the x_char value into register.
+
+Fixes: 48a6092fb41f ("serial: stm32-usart: Add STM32 USART Driver")
+Cc: stable <stable@vger.kernel.org>
+Signed-off-by: Valentin Caron <valentin.caron@foss.st.com>
+Link: https://lore.kernel.org/r/20220111164441.6178-2-valentin.caron@foss.st.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/stm32-usart.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
+index 2d3fbcbfaf108..93c2a5c956540 100644
+--- a/drivers/tty/serial/stm32-usart.c
++++ b/drivers/tty/serial/stm32-usart.c
+@@ -520,10 +520,22 @@ static void stm32_usart_transmit_chars(struct uart_port *port)
+ struct stm32_port *stm32_port = to_stm32_port(port);
+ const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+ struct circ_buf *xmit = &port->state->xmit;
++ u32 isr;
++ int ret;
+
+ if (port->x_char) {
+ if (stm32_port->tx_dma_busy)
+ stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_DMAT);
++
++ /* Check that TDR is empty before filling FIFO */
++ ret =
++ readl_relaxed_poll_timeout_atomic(port->membase + ofs->isr,
++ isr,
++ (isr & USART_SR_TXE),
++ 10, 1000);
++ if (ret)
++ dev_warn(port->dev, "1 character may be erased\n");
++
+ writel_relaxed(port->x_char, port->membase + ofs->tdr);
+ port->x_char = 0;
+ port->icount.tx++;
+--
+2.34.1
+
block-map-add-__gfp_zero-flag-for-alloc_page-in-func.patch
usb-gadget-don-t-release-an-existing-dev-buf.patch
usb-gadget-clear-related-members-when-goto-fail.patch
+exfat-reuse-exfat_inode_info-variable-instead-of-cal.patch
+exfat-fix-i_blocks-for-files-truncated-over-4-gib.patch
+tracing-add-test-for-user-space-strings-when-filteri.patch
+arm64-mark-start_backtrace-notrace-and-nokprobe_symb.patch
+serial-stm32-prevent-tdr-register-overwrite-when-sen.patch
+kvm-arm64-workaround-cortex-a510-s-single-step-and-p.patch
+ext4-drop-ineligible-txn-start-stop-apis.patch
+ext4-simplify-updating-of-fast-commit-stats.patch
+ext4-fast-commit-may-not-fallback-for-ineligible-com.patch
+ext4-fast-commit-may-miss-file-actions.patch
+sched-fair-fix-fault-in-reweight_entity.patch
+kvm-x86-add-kvm_cap_enable_cap-to-x86.patch
+ata-pata_hpt37x-fix-pci-clock-detection.patch
+drm-amdgpu-check-vm-ready-by-amdgpu_vm-evicting-flag.patch
+tracing-add-ustring-operation-to-filtering-string-po.patch
+ipv6-fix-skb-drops-in-igmp6_event_query-and-igmp6_ev.patch
--- /dev/null
+From d003cfd3df7bf6ed1560ad3fe2abd2102c0b564e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Jan 2022 11:55:32 -0500
+Subject: tracing: Add test for user space strings when filtering on string
+ pointers
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+[ Upstream commit 77360f9bbc7e5e2ab7a2c8b4c0244fbbfcfc6f62 ]
+
+Pingfan reported that the following causes a fault:
+
+ echo "filename ~ \"cpu\"" > events/syscalls/sys_enter_openat/filter
+ echo 1 > events/syscalls/sys_enter_at/enable
+
+The reason is that trace event filter treats the user space pointer
+defined by "filename" as a normal pointer to compare against the "cpu"
+string. The following bug happened:
+
+ kvm-03-guest16 login: [72198.026181] BUG: unable to handle page fault for address: 00007fffaae8ef60
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0001) - permissions violation
+ PGD 80000001008b7067 P4D 80000001008b7067 PUD 2393f1067 PMD 2393ec067 PTE 8000000108f47867
+ Oops: 0001 [#1] PREEMPT SMP PTI
+ CPU: 1 PID: 1 Comm: systemd Kdump: loaded Not tainted 5.14.0-32.el9.x86_64 #1
+ Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
+ RIP: 0010:strlen+0x0/0x20
+ Code: 48 89 f9 74 09 48 83 c1 01 80 39 00 75 f7 31 d2 44 0f b6 04 16 44 88 04 11
+ 48 83 c2 01 45 84 c0 75 ee c3 0f 1f 80 00 00 00 00 <80> 3f 00 74 10 48 89 f8
+ 48 83 c0 01 80 38 00 75 f7 48 29 f8 c3 31
+ RSP: 0018:ffffb5b900013e48 EFLAGS: 00010246
+ RAX: 0000000000000018 RBX: ffff8fc1c49ede00 RCX: 0000000000000000
+ RDX: 0000000000000020 RSI: ffff8fc1c02d601c RDI: 00007fffaae8ef60
+ RBP: 00007fffaae8ef60 R08: 0005034f4ddb8ea4 R09: 0000000000000000
+ R10: ffff8fc1c02d601c R11: 0000000000000000 R12: ffff8fc1c8a6e380
+ R13: 0000000000000000 R14: ffff8fc1c02d6010 R15: ffff8fc1c00453c0
+ FS: 00007fa86123db40(0000) GS:ffff8fc2ffd00000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 00007fffaae8ef60 CR3: 0000000102880001 CR4: 00000000007706e0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ PKRU: 55555554
+ Call Trace:
+ filter_pred_pchar+0x18/0x40
+ filter_match_preds+0x31/0x70
+ ftrace_syscall_enter+0x27a/0x2c0
+ syscall_trace_enter.constprop.0+0x1aa/0x1d0
+ do_syscall_64+0x16/0x90
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+ RIP: 0033:0x7fa861d88664
+
+The above happened because the kernel tried to access user space directly
+and triggered a "supervisor read access in kernel mode" fault. Worse yet,
+the memory could not even be loaded yet, and a SEGFAULT could happen as
+well. This could be true for kernel space accessing as well.
+
+To be even more robust, test both kernel and user space strings. If the
+string fails to read, then simply have the filter fail.
+
+Note, TASK_SIZE is used to determine if the pointer is user or kernel space
+and the appropriate strncpy_from_kernel/user_nofault() function is used to
+copy the memory. For some architectures, the compare to TASK_SIZE may always
+pick user space or kernel space. If it gets it wrong, the only thing is that
+the filter will fail to match. In the future, this needs to be fixed to have
+the event denote which should be used. But failing a filter is much better
+than panicing the machine, and that can be solved later.
+
+Link: https://lore.kernel.org/all/20220107044951.22080-1-kernelfans@gmail.com/
+Link: https://lkml.kernel.org/r/20220110115532.536088fd@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Tom Zanussi <zanussi@kernel.org>
+Reported-by: Pingfan Liu <kernelfans@gmail.com>
+Tested-by: Pingfan Liu <kernelfans@gmail.com>
+Fixes: 87a342f5db69d ("tracing/filters: Support filtering for char * strings")
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/trace/events.rst | 10 +++++
+ kernel/trace/trace_events_filter.c | 66 ++++++++++++++++++++++++++++--
+ 2 files changed, 73 insertions(+), 3 deletions(-)
+
+diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
+index 8ddb9b09451c8..45e66a60a816a 100644
+--- a/Documentation/trace/events.rst
++++ b/Documentation/trace/events.rst
+@@ -230,6 +230,16 @@ Currently the caret ('^') for an error always appears at the beginning of
+ the filter string; the error message should still be useful though
+ even without more accurate position info.
+
++5.2.1 Filter limitations
++------------------------
++
++If a filter is placed on a string pointer ``(char *)`` that does not point
++to a string on the ring buffer, but instead points to kernel or user space
++memory, then, for safety reasons, at most 1024 bytes of the content is
++copied onto a temporary buffer to do the compare. If the copy of the memory
++faults (the pointer points to memory that should not be accessed), then the
++string compare will be treated as not matching.
++
+ 5.3 Clearing filters
+ --------------------
+
+diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
+index c9124038b140f..d3eb3c630f601 100644
+--- a/kernel/trace/trace_events_filter.c
++++ b/kernel/trace/trace_events_filter.c
+@@ -5,6 +5,7 @@
+ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
+ */
+
++#include <linux/uaccess.h>
+ #include <linux/module.h>
+ #include <linux/ctype.h>
+ #include <linux/mutex.h>
+@@ -654,6 +655,47 @@ DEFINE_EQUALITY_PRED(32);
+ DEFINE_EQUALITY_PRED(16);
+ DEFINE_EQUALITY_PRED(8);
+
++/* user space strings temp buffer */
++#define USTRING_BUF_SIZE 1024
++
++struct ustring_buffer {
++ char buffer[USTRING_BUF_SIZE];
++};
++
++static __percpu struct ustring_buffer *ustring_per_cpu;
++
++static __always_inline char *test_string(char *str)
++{
++ struct ustring_buffer *ubuf;
++ char __user *ustr;
++ char *kstr;
++
++ if (!ustring_per_cpu)
++ return NULL;
++
++ ubuf = this_cpu_ptr(ustring_per_cpu);
++ kstr = ubuf->buffer;
++
++ /*
++ * We use TASK_SIZE to denote user or kernel space, but this will
++ * not work for all architectures. If it picks the wrong one, it may
++ * just fail the filter (but will not bug).
++ *
++ * TODO: Have a way to properly denote which one this is for.
++ */
++ if (likely((unsigned long)str >= TASK_SIZE)) {
++ /* For safety, do not trust the string pointer */
++ if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE))
++ return NULL;
++ } else {
++ /* user space address? */
++ ustr = (char __user *)str;
++ if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE))
++ return NULL;
++ }
++ return kstr;
++}
++
+ /* Filter predicate for fixed sized arrays of characters */
+ static int filter_pred_string(struct filter_pred *pred, void *event)
+ {
+@@ -671,10 +713,16 @@ static int filter_pred_string(struct filter_pred *pred, void *event)
+ static int filter_pred_pchar(struct filter_pred *pred, void *event)
+ {
+ char **addr = (char **)(event + pred->offset);
++ char *str;
+ int cmp, match;
+- int len = strlen(*addr) + 1; /* including tailing '\0' */
++ int len;
+
+- cmp = pred->regex.match(*addr, &pred->regex, len);
++ str = test_string(*addr);
++ if (!str)
++ return 0;
++
++ len = strlen(str) + 1; /* including tailing '\0' */
++ cmp = pred->regex.match(str, &pred->regex, len);
+
+ match = cmp ^ pred->not;
+
+@@ -1320,8 +1368,17 @@ static int parse_pred(const char *str, void *data,
+
+ } else if (field->filter_type == FILTER_DYN_STRING)
+ pred->fn = filter_pred_strloc;
+- else
++ else {
++
++ if (!ustring_per_cpu) {
++ /* Once allocated, keep it around for good */
++ ustring_per_cpu = alloc_percpu(struct ustring_buffer);
++ if (!ustring_per_cpu)
++ goto err_mem;
++ }
++
+ pred->fn = filter_pred_pchar;
++ }
+ /* go past the last quote */
+ i++;
+
+@@ -1387,6 +1444,9 @@ static int parse_pred(const char *str, void *data,
+ err_free:
+ kfree(pred);
+ return -EINVAL;
++err_mem:
++ kfree(pred);
++ return -ENOMEM;
+ }
+
+ enum {
+--
+2.34.1
+
--- /dev/null
+From 641b43ce7c85d47bfb4887dc81217b3334be4593 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jan 2022 20:08:40 -0500
+Subject: tracing: Add ustring operation to filtering string pointers
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+[ Upstream commit f37c3bbc635994eda203a6da4ba0f9d05165a8d6 ]
+
+Since referencing user space pointers is special, if the user wants to
+filter on a field that is a pointer to user space, then they need to
+specify it.
+
+Add a ".ustring" attribute to the field name for filters to state that the
+field is pointing to user space such that the kernel can take the
+appropriate action to read that pointer.
+
+Link: https://lore.kernel.org/all/yt9d8rvmt2jq.fsf@linux.ibm.com/
+
+Fixes: 77360f9bbc7e ("tracing: Add test for user space strings when filtering on string pointers")
+Tested-by: Sven Schnelle <svens@linux.ibm.com>
+Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/trace/events.rst | 9 ++++
+ kernel/trace/trace_events_filter.c | 81 +++++++++++++++++++++---------
+ 2 files changed, 66 insertions(+), 24 deletions(-)
+
+diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
+index 45e66a60a816a..c47f381d0c002 100644
+--- a/Documentation/trace/events.rst
++++ b/Documentation/trace/events.rst
+@@ -198,6 +198,15 @@ The glob (~) accepts a wild card character (\*,?) and character classes
+ prev_comm ~ "*sh*"
+ prev_comm ~ "ba*sh"
+
++If the field is a pointer that points into user space (for example
++"filename" from sys_enter_openat), then you have to append ".ustring" to the
++field name::
++
++ filename.ustring ~ "password"
++
++As the kernel will have to know how to retrieve the memory that the pointer
++is at from user space.
++
+ 5.2 Setting filters
+ -------------------
+
+diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
+index d3eb3c630f601..06d6318ee5377 100644
+--- a/kernel/trace/trace_events_filter.c
++++ b/kernel/trace/trace_events_filter.c
+@@ -665,6 +665,23 @@ struct ustring_buffer {
+ static __percpu struct ustring_buffer *ustring_per_cpu;
+
+ static __always_inline char *test_string(char *str)
++{
++ struct ustring_buffer *ubuf;
++ char *kstr;
++
++ if (!ustring_per_cpu)
++ return NULL;
++
++ ubuf = this_cpu_ptr(ustring_per_cpu);
++ kstr = ubuf->buffer;
++
++ /* For safety, do not trust the string pointer */
++ if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE))
++ return NULL;
++ return kstr;
++}
++
++static __always_inline char *test_ustring(char *str)
+ {
+ struct ustring_buffer *ubuf;
+ char __user *ustr;
+@@ -676,23 +693,11 @@ static __always_inline char *test_string(char *str)
+ ubuf = this_cpu_ptr(ustring_per_cpu);
+ kstr = ubuf->buffer;
+
+- /*
+- * We use TASK_SIZE to denote user or kernel space, but this will
+- * not work for all architectures. If it picks the wrong one, it may
+- * just fail the filter (but will not bug).
+- *
+- * TODO: Have a way to properly denote which one this is for.
+- */
+- if (likely((unsigned long)str >= TASK_SIZE)) {
+- /* For safety, do not trust the string pointer */
+- if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE))
+- return NULL;
+- } else {
+- /* user space address? */
+- ustr = (char __user *)str;
+- if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE))
+- return NULL;
+- }
++ /* user space address? */
++ ustr = (char __user *)str;
++ if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE))
++ return NULL;
++
+ return kstr;
+ }
+
+@@ -709,24 +714,42 @@ static int filter_pred_string(struct filter_pred *pred, void *event)
+ return match;
+ }
+
++static __always_inline int filter_pchar(struct filter_pred *pred, char *str)
++{
++ int cmp, match;
++ int len;
++
++ len = strlen(str) + 1; /* including tailing '\0' */
++ cmp = pred->regex.match(str, &pred->regex, len);
++
++ match = cmp ^ pred->not;
++
++ return match;
++}
+ /* Filter predicate for char * pointers */
+ static int filter_pred_pchar(struct filter_pred *pred, void *event)
+ {
+ char **addr = (char **)(event + pred->offset);
+ char *str;
+- int cmp, match;
+- int len;
+
+ str = test_string(*addr);
+ if (!str)
+ return 0;
+
+- len = strlen(str) + 1; /* including tailing '\0' */
+- cmp = pred->regex.match(str, &pred->regex, len);
++ return filter_pchar(pred, str);
++}
+
+- match = cmp ^ pred->not;
++/* Filter predicate for char * pointers in user space*/
++static int filter_pred_pchar_user(struct filter_pred *pred, void *event)
++{
++ char **addr = (char **)(event + pred->offset);
++ char *str;
+
+- return match;
++ str = test_ustring(*addr);
++ if (!str)
++ return 0;
++
++ return filter_pchar(pred, str);
+ }
+
+ /*
+@@ -1206,6 +1229,7 @@ static int parse_pred(const char *str, void *data,
+ struct filter_pred *pred = NULL;
+ char num_buf[24]; /* Big enough to hold an address */
+ char *field_name;
++ bool ustring = false;
+ char q;
+ u64 val;
+ int len;
+@@ -1240,6 +1264,12 @@ static int parse_pred(const char *str, void *data,
+ return -EINVAL;
+ }
+
++ /* See if the field is a user space string */
++ if ((len = str_has_prefix(str + i, ".ustring"))) {
++ ustring = true;
++ i += len;
++ }
++
+ while (isspace(str[i]))
+ i++;
+
+@@ -1377,7 +1407,10 @@ static int parse_pred(const char *str, void *data,
+ goto err_mem;
+ }
+
+- pred->fn = filter_pred_pchar;
++ if (ustring)
++ pred->fn = filter_pred_pchar_user;
++ else
++ pred->fn = filter_pred_pchar;
+ }
+ /* go past the last quote */
+ i++;
+--
+2.34.1
+