--- /dev/null
+From 07f330a82e38cf1b6740332b89dc2a2e22e5703e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Oct 2021 11:12:58 -0400
+Subject: ceph: properly handle statfs on multifs setups
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 8cfc0c7ed34f7929ce7e5d7c6eecf4d01ba89a84 ]
+
+ceph_statfs currently stuffs the cluster fsid into the f_fsid field.
+This was fine when we only had a single filesystem per cluster, but now
+that we have multiples we need to use something that will vary between
+them.
+
+Change ceph_statfs to xor each 32-bit chunk of the fsid (aka cluster id)
+into the lower bits of the statfs->f_fsid. Change the lower bits to hold
+the fscid (filesystem ID within the cluster).
+
+That should give us a value that is guaranteed to be unique between
+filesystems within a cluster, and should minimize the chance of
+collisions between mounts of different clusters.
+
+URL: https://tracker.ceph.com/issues/52812
+Reported-by: Sachin Prabhu <sprabhu@redhat.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Xiubo Li <xiubli@redhat.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ceph/super.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/fs/ceph/super.c b/fs/ceph/super.c
+index fd8742bae8471..202ddde3d62ad 100644
+--- a/fs/ceph/super.c
++++ b/fs/ceph/super.c
+@@ -52,8 +52,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
+ struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry));
+ struct ceph_mon_client *monc = &fsc->client->monc;
+ struct ceph_statfs st;
+- u64 fsid;
+- int err;
++ int i, err;
+ u64 data_pool;
+
+ if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) {
+@@ -99,12 +98,14 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
+ buf->f_namelen = NAME_MAX;
+
+ /* Must convert the fsid, for consistent values across arches */
++ buf->f_fsid.val[0] = 0;
+ mutex_lock(&monc->mutex);
+- fsid = le64_to_cpu(*(__le64 *)(&monc->monmap->fsid)) ^
+- le64_to_cpu(*((__le64 *)&monc->monmap->fsid + 1));
++ for (i = 0 ; i < sizeof(monc->monmap->fsid) / sizeof(__le32) ; ++i)
++ buf->f_fsid.val[0] ^= le32_to_cpu(((__le32 *)&monc->monmap->fsid)[i]);
+ mutex_unlock(&monc->mutex);
+
+- buf->f_fsid = u64_to_fsid(fsid);
++ /* fold the fs_cluster_id into the upper bits */
++ buf->f_fsid.val[1] = monc->fs_cluster_id;
+
+ return 0;
+ }
+--
+2.33.0
+
--- /dev/null
+From 28192917a4520ee82ab741cefdba79ea2c39cbb7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 6 Nov 2021 11:31:53 +0000
+Subject: cifs: nosharesock should not share socket with future sessions
+
+From: Shyam Prasad N <sprasad@microsoft.com>
+
+[ Upstream commit c9f1c19cf7c50949885fa5afdb2cb242d61a7fac ]
+
+Today, when a new mount is done with nosharesock, we ensure
+that we don't select an existing matching session. However,
+we don't mark the connection as nosharesock, which means that
+those could be shared with future sessions.
+
+Fixed it with this commit. Also printing this info in DebugData.
+
+Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
+Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cifs/cifs_debug.c | 2 ++
+ fs/cifs/cifsglob.h | 1 +
+ fs/cifs/connect.c | 8 +++++++-
+ 3 files changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
+index de2c12bcfa4bc..905a901f7f80b 100644
+--- a/fs/cifs/cifs_debug.c
++++ b/fs/cifs/cifs_debug.c
+@@ -358,6 +358,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
+ seq_printf(m, " signed");
+ if (server->posix_ext_supported)
+ seq_printf(m, " posix");
++ if (server->nosharesock)
++ seq_printf(m, " nosharesock");
+
+ if (server->rdma)
+ seq_printf(m, "\nRDMA ");
+diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
+index dea4c929d3f46..3e5b8e177cfa7 100644
+--- a/fs/cifs/cifsglob.h
++++ b/fs/cifs/cifsglob.h
+@@ -592,6 +592,7 @@ struct TCP_Server_Info {
+ struct list_head pending_mid_q;
+ bool noblocksnd; /* use blocking sendmsg */
+ bool noautotune; /* do not autotune send buf sizes */
++ bool nosharesock;
+ bool tcp_nodelay;
+ unsigned int credits; /* send no more requests at once */
+ unsigned int max_credits; /* can override large 32000 default at mnt */
+diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
+index e757ee52cc777..d26703a05c6b4 100644
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -1217,7 +1217,13 @@ static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context *
+ {
+ struct sockaddr *addr = (struct sockaddr *)&ctx->dstaddr;
+
+- if (ctx->nosharesock)
++ if (ctx->nosharesock) {
++ server->nosharesock = true;
++ return 0;
++ }
++
++ /* this server does not share socket */
++ if (server->nosharesock)
+ return 0;
+
+ /* If multidialect negotiation see if existing sessions match one */
+--
+2.33.0
+
--- /dev/null
+From 8ae24aacc1020dac21b216841199f68f8cf1e5ec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Sep 2021 10:38:11 +0800
+Subject: f2fs: quota: fix potential deadlock
+
+From: Chao Yu <chao@kernel.org>
+
+[ Upstream commit a5c0042200b28fff3bde6fa128ddeaef97990f8d ]
+
+As Yi Zhuang reported in bugzilla:
+
+https://bugzilla.kernel.org/show_bug.cgi?id=214299
+
+There is potential deadlock during quota data flush as below:
+
+Thread A: Thread B:
+f2fs_dquot_acquire
+down_read(&sbi->quota_sem)
+ f2fs_write_checkpoint
+ block_operations
+ f2fs_look_all
+ down_write(&sbi->cp_rwsem)
+f2fs_quota_write
+f2fs_write_begin
+__do_map_lock
+f2fs_lock_op
+down_read(&sbi->cp_rwsem)
+ __need_flush_qutoa
+ down_write(&sbi->quota_sem)
+
+This patch changes block_operations() to use trylock, if it fails,
+it means there is potential quota data updater, in this condition,
+let's flush quota data first and then trylock again to check dirty
+status of quota data.
+
+The side effect is: in heavy race condition (e.g. multi quota data
+upaters vs quota data flusher), it may decrease the probability of
+synchronizing quota data successfully in checkpoint() due to limited
+retry time of quota flush.
+
+Reported-by: Yi Zhuang <zhuangyi1@huawei.com>
+Signed-off-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/f2fs/checkpoint.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
+index 83e9bc0f91ffd..7b02827242312 100644
+--- a/fs/f2fs/checkpoint.c
++++ b/fs/f2fs/checkpoint.c
+@@ -1162,7 +1162,8 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi)
+ if (!is_journalled_quota(sbi))
+ return false;
+
+- down_write(&sbi->quota_sem);
++ if (!down_write_trylock(&sbi->quota_sem))
++ return true;
+ if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
+ ret = false;
+ } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) {
+--
+2.33.0
+
--- /dev/null
+From 06215e5f1d7114ed82e2a1f7156119ecc5b5f1d3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 18 Sep 2021 20:46:36 +0800
+Subject: f2fs: set SBI_NEED_FSCK flag when inconsistent node block found
+
+From: Weichao Guo <guoweichao@oppo.com>
+
+[ Upstream commit 6663b138ded1a59e630c9e605e42aa7fde490cdc ]
+
+Inconsistent node block will cause a file fail to open or read,
+which could make the user process crashes or stucks. Let's mark
+SBI_NEED_FSCK flag to trigger a fix at next fsck time. After
+unlinking the corrupted file, the user process could regenerate
+a new one and work correctly.
+
+Signed-off-by: Weichao Guo <guoweichao@oppo.com>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/f2fs/node.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
+index e863136081b47..556fcd8457f3f 100644
+--- a/fs/f2fs/node.c
++++ b/fs/f2fs/node.c
+@@ -1443,6 +1443,7 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
+ nid, nid_of_node(page), ino_of_node(page),
+ ofs_of_node(page), cpver_of_node(page),
+ next_blkaddr_of_node(page));
++ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ err = -EINVAL;
+ out_err:
+ ClearPageUptodate(page);
+--
+2.33.0
+
--- /dev/null
+From 5e789ed4bbb3bd860bca4a9a8db249ce67f9c63a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Nov 2021 03:13:25 +0100
+Subject: iommu/rockchip: Fix PAGE_DESC_HI_MASKs for RK3568
+
+From: Alex Bee <knaerzche@gmail.com>
+
+[ Upstream commit f7ff3cff3527ff1e70cad8d2fe7c0c7b6f83120a ]
+
+With the submission of iommu driver for RK3568 a subtle bug was
+introduced: PAGE_DESC_HI_MASK1 and PAGE_DESC_HI_MASK2 have to be
+the other way arround - that leads to random errors, especially when
+addresses beyond 32 bit are used.
+
+Fix it.
+
+Fixes: c55356c534aa ("iommu: rockchip: Add support for iommu v2")
+Signed-off-by: Alex Bee <knaerzche@gmail.com>
+Tested-by: Peter Geis <pgwipeout@gmail.com>
+Reviewed-by: Heiko Stuebner <heiko@sntech.de>
+Tested-by: Dan Johansen <strit@manjaro.org>
+Reviewed-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
+Link: https://lore.kernel.org/r/20211124021325.858139-1-knaerzche@gmail.com
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/rockchip-iommu.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
+index 5cb260820eda6..7f23ad61c094f 100644
+--- a/drivers/iommu/rockchip-iommu.c
++++ b/drivers/iommu/rockchip-iommu.c
+@@ -200,8 +200,8 @@ static inline phys_addr_t rk_dte_pt_address(u32 dte)
+ #define DTE_HI_MASK2 GENMASK(7, 4)
+ #define DTE_HI_SHIFT1 24 /* shift bit 8 to bit 32 */
+ #define DTE_HI_SHIFT2 32 /* shift bit 4 to bit 36 */
+-#define PAGE_DESC_HI_MASK1 GENMASK_ULL(39, 36)
+-#define PAGE_DESC_HI_MASK2 GENMASK_ULL(35, 32)
++#define PAGE_DESC_HI_MASK1 GENMASK_ULL(35, 32)
++#define PAGE_DESC_HI_MASK2 GENMASK_ULL(39, 36)
+
+ static inline phys_addr_t rk_dte_pt_address_v2(u32 dte)
+ {
+--
+2.33.0
+
--- /dev/null
+From 6d2de354d125782cd23228ad93e2bfd28dc0ee24 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 Nov 2021 21:55:56 +0800
+Subject: iommu/vt-d: Fix unmap_pages support
+
+From: Alex Williamson <alex.williamson@redhat.com>
+
+[ Upstream commit 86dc40c7ea9c22f64571e0e45f695de73a0e2644 ]
+
+When supporting only the .map and .unmap callbacks of iommu_ops,
+the IOMMU driver can make assumptions about the size and alignment
+used for mappings based on the driver provided pgsize_bitmap. VT-d
+previously used essentially PAGE_MASK for this bitmap as any power
+of two mapping was acceptably filled by native page sizes.
+
+However, with the .map_pages and .unmap_pages interface we're now
+getting page-size and count arguments. If we simply combine these
+as (page-size * count) and make use of the previous map/unmap
+functions internally, any size and alignment assumptions are very
+different.
+
+As an example, a given vfio device assignment VM will often create
+a 4MB mapping at IOVA pfn [0x3fe00 - 0x401ff]. On a system that
+does not support IOMMU super pages, the unmap_pages interface will
+ask to unmap 1024 4KB pages at the base IOVA. dma_pte_clear_level()
+will recurse down to level 2 of the page table where the first half
+of the pfn range exactly matches the entire pte level. We clear the
+pte, increment the pfn by the level size, but (oops) the next pte is
+on a new page, so we exit the loop an pop back up a level. When we
+then update the pfn based on that higher level, we seem to assume
+that the previous pfn value was at the start of the level. In this
+case the level size is 256K pfns, which we add to the base pfn and
+get a results of 0x7fe00, which is clearly greater than 0x401ff,
+so we're done. Meanwhile we never cleared the ptes for the remainder
+of the range. When the VM remaps this range, we're overwriting valid
+ptes and the VT-d driver complains loudly, as reported by the user
+report linked below.
+
+The fix for this seems relatively simple, if each iteration of the
+loop in dma_pte_clear_level() is assumed to clear to the end of the
+level pte page, then our next pfn should be calculated from level_pfn
+rather than our working pfn.
+
+Fixes: 3f34f1259776 ("iommu/vt-d: Implement map/unmap_pages() iommu_ops callback")
+Reported-by: Ajay Garg <ajaygargnsit@gmail.com>
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Tested-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+Link: https://lore.kernel.org/all/20211002124012.18186-1-ajaygargnsit@gmail.com/
+Link: https://lore.kernel.org/r/163659074748.1617923.12716161410774184024.stgit@omen
+Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
+Link: https://lore.kernel.org/r/20211126135556.397932-3-baolu.lu@linux.intel.com
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/intel/iommu.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
+index 9a356075d3450..78f8c8e6803e9 100644
+--- a/drivers/iommu/intel/iommu.c
++++ b/drivers/iommu/intel/iommu.c
+@@ -1226,13 +1226,11 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
+ pte = &pte[pfn_level_offset(pfn, level)];
+
+ do {
+- unsigned long level_pfn;
++ unsigned long level_pfn = pfn & level_mask(level);
+
+ if (!dma_pte_present(pte))
+ goto next;
+
+- level_pfn = pfn & level_mask(level);
+-
+ /* If range covers entire pagetable, free it */
+ if (start_pfn <= level_pfn &&
+ last_pfn >= level_pfn + level_size(level) - 1) {
+@@ -1253,7 +1251,7 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
+ freelist);
+ }
+ next:
+- pfn += level_size(level);
++ pfn = level_pfn + level_size(level);
+ } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
+
+ if (first_pte)
+--
+2.33.0
+
--- /dev/null
+From 4c5eb65ede27e046b344a36354403c098e1cb2a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 Nov 2021 20:29:12 -0500
+Subject: locking/rwsem: Make handoff bit handling more consistent
+
+From: Waiman Long <longman@redhat.com>
+
+[ Upstream commit d257cc8cb8d5355ffc43a96bab94db7b5a324803 ]
+
+There are some inconsistency in the way that the handoff bit is being
+handled in readers and writers that lead to a race condition.
+
+Firstly, when a queue head writer set the handoff bit, it will clear
+it when the writer is being killed or interrupted on its way out
+without acquiring the lock. That is not the case for a queue head
+reader. The handoff bit will simply be inherited by the next waiter.
+
+Secondly, in the out_nolock path of rwsem_down_read_slowpath(), both
+the waiter and handoff bits are cleared if the wait queue becomes
+empty. For rwsem_down_write_slowpath(), however, the handoff bit is
+not checked and cleared if the wait queue is empty. This can
+potentially make the handoff bit set with empty wait queue.
+
+Worse, the situation in rwsem_down_write_slowpath() relies on wstate,
+a variable set outside of the critical section containing the ->count
+manipulation, this leads to race condition where RWSEM_FLAG_HANDOFF
+can be double subtracted, corrupting ->count.
+
+To make the handoff bit handling more consistent and robust, extract
+out handoff bit clearing code into the new rwsem_del_waiter() helper
+function. Also, completely eradicate wstate; always evaluate
+everything inside the same critical section.
+
+The common function will only use atomic_long_andnot() to clear bits
+when the wait queue is empty to avoid possible race condition. If the
+first waiter with handoff bit set is killed or interrupted to exit the
+slowpath without acquiring the lock, the next waiter will inherit the
+handoff bit.
+
+While at it, simplify the trylock for loop in
+rwsem_down_write_slowpath() to make it easier to read.
+
+Fixes: 4f23dbc1e657 ("locking/rwsem: Implement lock handoff to prevent lock starvation")
+Reported-by: Zhenhua Ma <mazhenhua@xiaomi.com>
+Suggested-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20211116012912.723980-1-longman@redhat.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/locking/rwsem.c | 171 ++++++++++++++++++++---------------------
+ 1 file changed, 85 insertions(+), 86 deletions(-)
+
+diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
+index 29eea50a3e678..e63f740c2cc84 100644
+--- a/kernel/locking/rwsem.c
++++ b/kernel/locking/rwsem.c
+@@ -106,9 +106,9 @@
+ * atomic_long_cmpxchg() will be used to obtain writer lock.
+ *
+ * There are three places where the lock handoff bit may be set or cleared.
+- * 1) rwsem_mark_wake() for readers.
+- * 2) rwsem_try_write_lock() for writers.
+- * 3) Error path of rwsem_down_write_slowpath().
++ * 1) rwsem_mark_wake() for readers -- set, clear
++ * 2) rwsem_try_write_lock() for writers -- set, clear
++ * 3) rwsem_del_waiter() -- clear
+ *
+ * For all the above cases, wait_lock will be held. A writer must also
+ * be the first one in the wait_list to be eligible for setting the handoff
+@@ -335,6 +335,9 @@ struct rwsem_waiter {
+ struct task_struct *task;
+ enum rwsem_waiter_type type;
+ unsigned long timeout;
++
++ /* Writer only, not initialized in reader */
++ bool handoff_set;
+ };
+ #define rwsem_first_waiter(sem) \
+ list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
+@@ -345,12 +348,6 @@ enum rwsem_wake_type {
+ RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
+ };
+
+-enum writer_wait_state {
+- WRITER_NOT_FIRST, /* Writer is not first in wait list */
+- WRITER_FIRST, /* Writer is first in wait list */
+- WRITER_HANDOFF /* Writer is first & handoff needed */
+-};
+-
+ /*
+ * The typical HZ value is either 250 or 1000. So set the minimum waiting
+ * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
+@@ -366,6 +363,31 @@ enum writer_wait_state {
+ */
+ #define MAX_READERS_WAKEUP 0x100
+
++static inline void
++rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
++{
++ lockdep_assert_held(&sem->wait_lock);
++ list_add_tail(&waiter->list, &sem->wait_list);
++ /* caller will set RWSEM_FLAG_WAITERS */
++}
++
++/*
++ * Remove a waiter from the wait_list and clear flags.
++ *
++ * Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of
++ * this function. Modify with care.
++ */
++static inline void
++rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
++{
++ lockdep_assert_held(&sem->wait_lock);
++ list_del(&waiter->list);
++ if (likely(!list_empty(&sem->wait_list)))
++ return;
++
++ atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count);
++}
++
+ /*
+ * handle the lock release when processes blocked on it that can now run
+ * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
+@@ -377,6 +399,8 @@ enum writer_wait_state {
+ * preferably when the wait_lock is released
+ * - woken process blocks are discarded from the list after having task zeroed
+ * - writers are only marked woken if downgrading is false
++ *
++ * Implies rwsem_del_waiter() for all woken readers.
+ */
+ static void rwsem_mark_wake(struct rw_semaphore *sem,
+ enum rwsem_wake_type wake_type,
+@@ -491,18 +515,25 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
+
+ adjustment = woken * RWSEM_READER_BIAS - adjustment;
+ lockevent_cond_inc(rwsem_wake_reader, woken);
++
++ oldcount = atomic_long_read(&sem->count);
+ if (list_empty(&sem->wait_list)) {
+- /* hit end of list above */
++ /*
++ * Combined with list_move_tail() above, this implies
++ * rwsem_del_waiter().
++ */
+ adjustment -= RWSEM_FLAG_WAITERS;
++ if (oldcount & RWSEM_FLAG_HANDOFF)
++ adjustment -= RWSEM_FLAG_HANDOFF;
++ } else if (woken) {
++ /*
++ * When we've woken a reader, we no longer need to force
++ * writers to give up the lock and we can clear HANDOFF.
++ */
++ if (oldcount & RWSEM_FLAG_HANDOFF)
++ adjustment -= RWSEM_FLAG_HANDOFF;
+ }
+
+- /*
+- * When we've woken a reader, we no longer need to force writers
+- * to give up the lock and we can clear HANDOFF.
+- */
+- if (woken && (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF))
+- adjustment -= RWSEM_FLAG_HANDOFF;
+-
+ if (adjustment)
+ atomic_long_add(adjustment, &sem->count);
+
+@@ -533,12 +564,12 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
+ * race conditions between checking the rwsem wait list and setting the
+ * sem->count accordingly.
+ *
+- * If wstate is WRITER_HANDOFF, it will make sure that either the handoff
+- * bit is set or the lock is acquired with handoff bit cleared.
++ * Implies rwsem_del_waiter() on success.
+ */
+ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
+- enum writer_wait_state wstate)
++ struct rwsem_waiter *waiter)
+ {
++ bool first = rwsem_first_waiter(sem) == waiter;
+ long count, new;
+
+ lockdep_assert_held(&sem->wait_lock);
+@@ -547,13 +578,19 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
+ do {
+ bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
+
+- if (has_handoff && wstate == WRITER_NOT_FIRST)
+- return false;
++ if (has_handoff) {
++ if (!first)
++ return false;
++
++ /* First waiter inherits a previously set handoff bit */
++ waiter->handoff_set = true;
++ }
+
+ new = count;
+
+ if (count & RWSEM_LOCK_MASK) {
+- if (has_handoff || (wstate != WRITER_HANDOFF))
++ if (has_handoff || (!rt_task(waiter->task) &&
++ !time_after(jiffies, waiter->timeout)))
+ return false;
+
+ new |= RWSEM_FLAG_HANDOFF;
+@@ -570,9 +607,17 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
+ * We have either acquired the lock with handoff bit cleared or
+ * set the handoff bit.
+ */
+- if (new & RWSEM_FLAG_HANDOFF)
++ if (new & RWSEM_FLAG_HANDOFF) {
++ waiter->handoff_set = true;
++ lockevent_inc(rwsem_wlock_handoff);
+ return false;
++ }
+
++ /*
++ * Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on
++ * success.
++ */
++ list_del(&waiter->list);
+ rwsem_set_owner(sem);
+ return true;
+ }
+@@ -953,7 +998,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
+ }
+ adjustment += RWSEM_FLAG_WAITERS;
+ }
+- list_add_tail(&waiter.list, &sem->wait_list);
++ rwsem_add_waiter(sem, &waiter);
+
+ /* we're now waiting on the lock, but no longer actively locking */
+ count = atomic_long_add_return(adjustment, &sem->count);
+@@ -999,11 +1044,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
+ return sem;
+
+ out_nolock:
+- list_del(&waiter.list);
+- if (list_empty(&sem->wait_list)) {
+- atomic_long_andnot(RWSEM_FLAG_WAITERS|RWSEM_FLAG_HANDOFF,
+- &sem->count);
+- }
++ rwsem_del_waiter(sem, &waiter);
+ raw_spin_unlock_irq(&sem->wait_lock);
+ __set_current_state(TASK_RUNNING);
+ lockevent_inc(rwsem_rlock_fail);
+@@ -1017,9 +1058,7 @@ static struct rw_semaphore *
+ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
+ {
+ long count;
+- enum writer_wait_state wstate;
+ struct rwsem_waiter waiter;
+- struct rw_semaphore *ret = sem;
+ DEFINE_WAKE_Q(wake_q);
+
+ /* do optimistic spinning and steal lock if possible */
+@@ -1035,16 +1074,13 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
+ waiter.task = current;
+ waiter.type = RWSEM_WAITING_FOR_WRITE;
+ waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
++ waiter.handoff_set = false;
+
+ raw_spin_lock_irq(&sem->wait_lock);
+-
+- /* account for this before adding a new element to the list */
+- wstate = list_empty(&sem->wait_list) ? WRITER_FIRST : WRITER_NOT_FIRST;
+-
+- list_add_tail(&waiter.list, &sem->wait_list);
++ rwsem_add_waiter(sem, &waiter);
+
+ /* we're now waiting on the lock */
+- if (wstate == WRITER_NOT_FIRST) {
++ if (rwsem_first_waiter(sem) != &waiter) {
+ count = atomic_long_read(&sem->count);
+
+ /*
+@@ -1080,13 +1116,16 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
+ /* wait until we successfully acquire the lock */
+ set_current_state(state);
+ for (;;) {
+- if (rwsem_try_write_lock(sem, wstate)) {
++ if (rwsem_try_write_lock(sem, &waiter)) {
+ /* rwsem_try_write_lock() implies ACQUIRE on success */
+ break;
+ }
+
+ raw_spin_unlock_irq(&sem->wait_lock);
+
++ if (signal_pending_state(state, current))
++ goto out_nolock;
++
+ /*
+ * After setting the handoff bit and failing to acquire
+ * the lock, attempt to spin on owner to accelerate lock
+@@ -1095,7 +1134,7 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
+ * In this case, we attempt to acquire the lock again
+ * without sleeping.
+ */
+- if (wstate == WRITER_HANDOFF) {
++ if (waiter.handoff_set) {
+ enum owner_state owner_state;
+
+ preempt_disable();
+@@ -1106,66 +1145,26 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
+ goto trylock_again;
+ }
+
+- /* Block until there are no active lockers. */
+- for (;;) {
+- if (signal_pending_state(state, current))
+- goto out_nolock;
+-
+- schedule();
+- lockevent_inc(rwsem_sleep_writer);
+- set_current_state(state);
+- /*
+- * If HANDOFF bit is set, unconditionally do
+- * a trylock.
+- */
+- if (wstate == WRITER_HANDOFF)
+- break;
+-
+- if ((wstate == WRITER_NOT_FIRST) &&
+- (rwsem_first_waiter(sem) == &waiter))
+- wstate = WRITER_FIRST;
+-
+- count = atomic_long_read(&sem->count);
+- if (!(count & RWSEM_LOCK_MASK))
+- break;
+-
+- /*
+- * The setting of the handoff bit is deferred
+- * until rwsem_try_write_lock() is called.
+- */
+- if ((wstate == WRITER_FIRST) && (rt_task(current) ||
+- time_after(jiffies, waiter.timeout))) {
+- wstate = WRITER_HANDOFF;
+- lockevent_inc(rwsem_wlock_handoff);
+- break;
+- }
+- }
++ schedule();
++ lockevent_inc(rwsem_sleep_writer);
++ set_current_state(state);
+ trylock_again:
+ raw_spin_lock_irq(&sem->wait_lock);
+ }
+ __set_current_state(TASK_RUNNING);
+- list_del(&waiter.list);
+ raw_spin_unlock_irq(&sem->wait_lock);
+ lockevent_inc(rwsem_wlock);
+-
+- return ret;
++ return sem;
+
+ out_nolock:
+ __set_current_state(TASK_RUNNING);
+ raw_spin_lock_irq(&sem->wait_lock);
+- list_del(&waiter.list);
+-
+- if (unlikely(wstate == WRITER_HANDOFF))
+- atomic_long_add(-RWSEM_FLAG_HANDOFF, &sem->count);
+-
+- if (list_empty(&sem->wait_list))
+- atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count);
+- else
++ rwsem_del_waiter(sem, &waiter);
++ if (!list_empty(&sem->wait_list))
+ rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+ raw_spin_unlock_irq(&sem->wait_lock);
+ wake_up_q(&wake_q);
+ lockevent_inc(rwsem_wlock_fail);
+-
+ return ERR_PTR(-EINTR);
+ }
+
+--
+2.33.0
+
--- /dev/null
+From 61f4ecf2f145a940ee7b764cd4584134cf249034 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Nov 2021 13:22:32 +0100
+Subject: perf: Ignore sigtrap for tracepoints destined for other tasks
+
+From: Marco Elver <elver@google.com>
+
+[ Upstream commit 73743c3b092277febbf69b250ce8ebbca0525aa2 ]
+
+syzbot reported that the warning in perf_sigtrap() fires, saying that
+the event's task does not match current:
+
+ | WARNING: CPU: 0 PID: 9090 at kernel/events/core.c:6446 perf_pending_event+0x40d/0x4b0 kernel/events/core.c:6513
+ | Modules linked in:
+ | CPU: 0 PID: 9090 Comm: syz-executor.1 Not tainted 5.15.0-syzkaller #0
+ | Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+ | RIP: 0010:perf_sigtrap kernel/events/core.c:6446 [inline]
+ | RIP: 0010:perf_pending_event_disable kernel/events/core.c:6470 [inline]
+ | RIP: 0010:perf_pending_event+0x40d/0x4b0 kernel/events/core.c:6513
+ | ...
+ | Call Trace:
+ | <IRQ>
+ | irq_work_single+0x106/0x220 kernel/irq_work.c:211
+ | irq_work_run_list+0x6a/0x90 kernel/irq_work.c:242
+ | irq_work_run+0x4f/0xd0 kernel/irq_work.c:251
+ | __sysvec_irq_work+0x95/0x3d0 arch/x86/kernel/irq_work.c:22
+ | sysvec_irq_work+0x8e/0xc0 arch/x86/kernel/irq_work.c:17
+ | </IRQ>
+ | <TASK>
+ | asm_sysvec_irq_work+0x12/0x20 arch/x86/include/asm/idtentry.h:664
+ | RIP: 0010:__raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:152 [inline]
+ | RIP: 0010:_raw_spin_unlock_irqrestore+0x38/0x70 kernel/locking/spinlock.c:194
+ | ...
+ | coredump_task_exit kernel/exit.c:371 [inline]
+ | do_exit+0x1865/0x25c0 kernel/exit.c:771
+ | do_group_exit+0xe7/0x290 kernel/exit.c:929
+ | get_signal+0x3b0/0x1ce0 kernel/signal.c:2820
+ | arch_do_signal_or_restart+0x2a9/0x1c40 arch/x86/kernel/signal.c:868
+ | handle_signal_work kernel/entry/common.c:148 [inline]
+ | exit_to_user_mode_loop kernel/entry/common.c:172 [inline]
+ | exit_to_user_mode_prepare+0x17d/0x290 kernel/entry/common.c:207
+ | __syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline]
+ | syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:300
+ | do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86
+ | entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+On x86 this shouldn't happen, which has arch_irq_work_raise().
+
+The test program sets up a perf event with sigtrap set to fire on the
+'sched_wakeup' tracepoint, which fired in ttwu_do_wakeup().
+
+This happened because the 'sched_wakeup' tracepoint also takes a task
+argument passed on to perf_tp_event(), which is used to deliver the
+event to that other task.
+
+Since we cannot deliver synchronous signals to other tasks, skip an event if
+perf_tp_event() is targeted at another task and perf_event_attr::sigtrap is
+set, which will avoid ever entering perf_sigtrap() for such events.
+
+Fixes: 97ba62b27867 ("perf: Add support for SIGTRAP on perf events")
+Reported-by: syzbot+663359e32ce6f1a305ad@syzkaller.appspotmail.com
+Signed-off-by: Marco Elver <elver@google.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/YYpoCOBmC/kJWfmI@elver.google.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/events/core.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index 7162b600e7eaa..2931faf92a76f 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -9729,6 +9729,9 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
+ continue;
+ if (event->attr.config != entry->type)
+ continue;
++ /* Cannot deliver synchronous signal to other task. */
++ if (event->attr.sigtrap)
++ continue;
+ if (perf_tp_event_match(event, &data, regs))
+ perf_swevent_event(event, count, &data, regs);
+ }
+--
+2.33.0
+
--- /dev/null
+From 1082fccdaaae28cc12878411b956d4470f7385ff Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Sep 2021 14:50:42 +0200
+Subject: riscv: dts: microchip: drop duplicated MMC/SDHC node
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+[ Upstream commit 42a57a47bb0c0f531321a7001972a3ca121409bd ]
+
+Devicetree source is a description of hardware and hardware has only one
+block @20008000 which can be configured either as eMMC or SDHC. Having
+two node for different modes is an obscure, unusual and confusing way to
+configure it. Instead the board file is supposed to customize the block
+to its needs, e.g. to SDHC mode.
+
+This fixes dtbs_check warning:
+ arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dt.yaml: sdhc@20008000: $nodename:0: 'sdhc@20008000' does not match '^mmc(@.*)?$'
+
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../microchip/microchip-mpfs-icicle-kit.dts | 11 ++++++-
+ .../boot/dts/microchip/microchip-mpfs.dtsi | 29 ++-----------------
+ 2 files changed, 12 insertions(+), 28 deletions(-)
+
+diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
+index be0d77624cf53..cce5eca31f257 100644
+--- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
++++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
+@@ -56,8 +56,17 @@ &serial3 {
+ status = "okay";
+ };
+
+-&sdcard {
++&mmc {
+ status = "okay";
++
++ bus-width = <4>;
++ disable-wp;
++ cap-sd-highspeed;
++ card-detect-delay = <200>;
++ sd-uhs-sdr12;
++ sd-uhs-sdr25;
++ sd-uhs-sdr50;
++ sd-uhs-sdr104;
+ };
+
+ &emac0 {
+diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
+index 446f41d6a87e9..b12fd594e7172 100644
+--- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
++++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
+@@ -262,39 +262,14 @@ serial3: serial@20104000 {
+ status = "disabled";
+ };
+
+- emmc: mmc@20008000 {
++ /* Common node entry for emmc/sd */
++ mmc: mmc@20008000 {
+ compatible = "cdns,sd4hc";
+ reg = <0x0 0x20008000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <88 89>;
+ pinctrl-names = "default";
+ clocks = <&clkcfg 6>;
+- bus-width = <4>;
+- cap-mmc-highspeed;
+- mmc-ddr-3_3v;
+- max-frequency = <200000000>;
+- non-removable;
+- no-sd;
+- no-sdio;
+- voltage-ranges = <3300 3300>;
+- status = "disabled";
+- };
+-
+- sdcard: sdhc@20008000 {
+- compatible = "cdns,sd4hc";
+- reg = <0x0 0x20008000 0x0 0x1000>;
+- interrupt-parent = <&plic>;
+- interrupts = <88>;
+- pinctrl-names = "default";
+- clocks = <&clkcfg 6>;
+- bus-width = <4>;
+- disable-wp;
+- cap-sd-highspeed;
+- card-detect-delay = <200>;
+- sd-uhs-sdr12;
+- sd-uhs-sdr25;
+- sd-uhs-sdr50;
+- sd-uhs-sdr104;
+ max-frequency = <200000000>;
+ status = "disabled";
+ };
+--
+2.33.0
+
--- /dev/null
+From f8aa3780c08969f271101c4fef63a5cabc19dd9e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Sep 2021 14:50:41 +0200
+Subject: riscv: dts: microchip: fix board compatible
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+[ Upstream commit fd86dd2a5dc5ff1044423c19fef3907862f591c4 ]
+
+According to bindings, the compatible must include microchip,mpfs. This
+fixes dtbs_check warning:
+
+ arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dt.yaml: /: compatible: ['microchip,mpfs-icicle-kit'] is too short
+
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
+Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
+Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts | 2 +-
+ arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi | 4 ++--
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
+index b254c60589a1c..be0d77624cf53 100644
+--- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
++++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
+@@ -12,7 +12,7 @@ / {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ model = "Microchip PolarFire-SoC Icicle Kit";
+- compatible = "microchip,mpfs-icicle-kit";
++ compatible = "microchip,mpfs-icicle-kit", "microchip,mpfs";
+
+ aliases {
+ ethernet0 = &emac1;
+diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
+index 9d2fbbc1f7778..446f41d6a87e9 100644
+--- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
++++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
+@@ -6,8 +6,8 @@
+ / {
+ #address-cells = <2>;
+ #size-cells = <2>;
+- model = "Microchip MPFS Icicle Kit";
+- compatible = "microchip,mpfs-icicle-kit";
++ model = "Microchip PolarFire SoC";
++ compatible = "microchip,mpfs";
+
+ chosen {
+ };
+--
+2.33.0
+
--- /dev/null
+From 58af3afbaf6ca63292a78772935ae24054fc8065 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Nov 2021 11:40:47 +0000
+Subject: sched/scs: Reset task stack state in bringup_cpu()
+
+From: Mark Rutland <mark.rutland@arm.com>
+
+[ Upstream commit dce1ca0525bfdc8a69a9343bc714fbc19a2f04b3 ]
+
+To hot unplug a CPU, the idle task on that CPU calls a few layers of C
+code before finally leaving the kernel. When KASAN is in use, poisoned
+shadow is left around for each of the active stack frames, and when
+shadow call stacks are in use. When shadow call stacks (SCS) are in use
+the task's saved SCS SP is left pointing at an arbitrary point within
+the task's shadow call stack.
+
+When a CPU is offlined than onlined back into the kernel, this stale
+state can adversely affect execution. Stale KASAN shadow can alias new
+stackframes and result in bogus KASAN warnings. A stale SCS SP is
+effectively a memory leak, and prevents a portion of the shadow call
+stack being used. Across a number of hotplug cycles the idle task's
+entire shadow call stack can become unusable.
+
+We previously fixed the KASAN issue in commit:
+
+ e1b77c92981a5222 ("sched/kasan: remove stale KASAN poison after hotplug")
+
+... by removing any stale KASAN stack poison immediately prior to
+onlining a CPU.
+
+Subsequently in commit:
+
+ f1a0a376ca0c4ef1 ("sched/core: Initialize the idle task with preemption disabled")
+
+... the refactoring left the KASAN and SCS cleanup in one-time idle
+thread initialization code rather than something invoked prior to each
+CPU being onlined, breaking both as above.
+
+We fixed SCS (but not KASAN) in commit:
+
+ 63acd42c0d4942f7 ("sched/scs: Reset the shadow stack when idle_task_exit")
+
+... but as this runs in the context of the idle task being offlined it's
+potentially fragile.
+
+To fix these consistently and more robustly, reset the SCS SP and KASAN
+shadow of a CPU's idle task immediately before we online that CPU in
+bringup_cpu(). This ensures the idle task always has a consistent state
+when it is running, and removes the need to so so when exiting an idle
+task.
+
+Whenever any thread is created, dup_task_struct() will give the task a
+stack which is free of KASAN shadow, and initialize the task's SCS SP,
+so there's no need to specially initialize either for idle thread within
+init_idle(), as this was only necessary to handle hotplug cycles.
+
+I've tested this on arm64 with:
+
+* gcc 11.1.0, defconfig +KASAN_INLINE, KASAN_STACK
+* clang 12.0.0, defconfig +KASAN_INLINE, KASAN_STACK, SHADOW_CALL_STACK
+
+... offlining and onlining CPUS with:
+
+| while true; do
+| for C in /sys/devices/system/cpu/cpu*/online; do
+| echo 0 > $C;
+| echo 1 > $C;
+| done
+| done
+
+Fixes: f1a0a376ca0c4ef1 ("sched/core: Initialize the idle task with preemption disabled")
+Reported-by: Qian Cai <quic_qiancai@quicinc.com>
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
+Tested-by: Qian Cai <quic_qiancai@quicinc.com>
+Link: https://lore.kernel.org/lkml/20211115113310.35693-1-mark.rutland@arm.com/
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/cpu.c | 7 +++++++
+ kernel/sched/core.c | 4 ----
+ 2 files changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/kernel/cpu.c b/kernel/cpu.c
+index 192e43a874076..407a2568f35eb 100644
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -31,6 +31,7 @@
+ #include <linux/smpboot.h>
+ #include <linux/relay.h>
+ #include <linux/slab.h>
++#include <linux/scs.h>
+ #include <linux/percpu-rwsem.h>
+ #include <linux/cpuset.h>
+
+@@ -587,6 +588,12 @@ static int bringup_cpu(unsigned int cpu)
+ struct task_struct *idle = idle_thread_get(cpu);
+ int ret;
+
++ /*
++ * Reset stale stack state from the last time this CPU was online.
++ */
++ scs_task_reset(idle);
++ kasan_unpoison_task_stack(idle);
++
+ /*
+ * Some architectures have to walk the irq descriptors to
+ * setup the vector space for the cpu which comes online.
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 779f27a4b46ac..6f4625f8276f1 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -8641,9 +8641,6 @@ void __init init_idle(struct task_struct *idle, int cpu)
+ idle->flags |= PF_IDLE | PF_KTHREAD | PF_NO_SETAFFINITY;
+ kthread_set_per_cpu(idle, cpu);
+
+- scs_task_reset(idle);
+- kasan_unpoison_task_stack(idle);
+-
+ #ifdef CONFIG_SMP
+ /*
+ * It's possible that init_idle() gets called multiple times on a task,
+@@ -8799,7 +8796,6 @@ void idle_task_exit(void)
+ finish_arch_post_lock_switch();
+ }
+
+- scs_task_reset(current);
+ /* finish_cpu(), as ran on the BP, will clean up the active_mm state */
+ }
+
+--
+2.33.0
+
net-hns3-fix-incorrect-components-info-of-ethtool-re.patch
net-mscc-ocelot-don-t-downgrade-timestamping-rx-filt.patch
net-mscc-ocelot-correctly-report-the-timestamping-rx.patch
+locking-rwsem-make-handoff-bit-handling-more-consist.patch
+perf-ignore-sigtrap-for-tracepoints-destined-for-oth.patch
+sched-scs-reset-task-stack-state-in-bringup_cpu.patch
+iommu-rockchip-fix-page_desc_hi_masks-for-rk3568.patch
+iommu-vt-d-fix-unmap_pages-support.patch
+f2fs-quota-fix-potential-deadlock.patch
+f2fs-set-sbi_need_fsck-flag-when-inconsistent-node-b.patch
+riscv-dts-microchip-fix-board-compatible.patch
+riscv-dts-microchip-drop-duplicated-mmc-sdhc-node.patch
+cifs-nosharesock-should-not-share-socket-with-future.patch
+ceph-properly-handle-statfs-on-multifs-setups.patch