--- /dev/null
+From d549ecab002cd5302c23865230be6dccd58b4c3b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Jul 2022 18:38:15 -1000
+Subject: cgroup: Elide write-locking threadgroup_rwsem when updating csses on
+ an empty subtree
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tejun Heo <tj@kernel.org>
+
+[ Upstream commit 671c11f0619e5ccb380bcf0f062f69ba95fc974a ]
+
+cgroup_update_dfl_csses() write-lock the threadgroup_rwsem as updating the
+csses can trigger process migrations. However, if the subtree doesn't
+contain any tasks, there aren't gonna be any cgroup migrations. This
+condition can be trivially detected by testing whether
+mgctx.preloaded_src_csets is empty. Elide write-locking threadgroup_rwsem if
+the subtree is empty.
+
+After this optimization, the usage pattern of creating a cgroup, enabling
+the necessary controllers, and then seeding it with CLONE_INTO_CGROUP and
+then removing the cgroup after it becomes empty doesn't need to write-lock
+threadgroup_rwsem at all.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Michal Koutný <mkoutny@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/cgroup/cgroup.c | 16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
+index 416dd7db3fb2c..baebd1c7667b7 100644
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -2949,12 +2949,11 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+ struct cgroup_subsys_state *d_css;
+ struct cgroup *dsct;
+ struct css_set *src_cset;
++ bool has_tasks;
+ int ret;
+
+ lockdep_assert_held(&cgroup_mutex);
+
+- percpu_down_write(&cgroup_threadgroup_rwsem);
+-
+ /* look up all csses currently attached to @cgrp's subtree */
+ spin_lock_irq(&css_set_lock);
+ cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
+@@ -2965,6 +2964,16 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+ }
+ spin_unlock_irq(&css_set_lock);
+
++ /*
++ * We need to write-lock threadgroup_rwsem while migrating tasks.
++ * However, if there are no source csets for @cgrp, changing its
++ * controllers isn't gonna produce any task migrations and the
++ * write-locking can be skipped safely.
++ */
++ has_tasks = !list_empty(&mgctx.preloaded_src_csets);
++ if (has_tasks)
++ percpu_down_write(&cgroup_threadgroup_rwsem);
++
+ /* NULL dst indicates self on default hierarchy */
+ ret = cgroup_migrate_prepare_dst(&mgctx);
+ if (ret)
+@@ -2984,7 +2993,8 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+ ret = cgroup_migrate_execute(&mgctx);
+ out_finish:
+ cgroup_migrate_finish(&mgctx);
+- percpu_up_write(&cgroup_threadgroup_rwsem);
++ if (has_tasks)
++ percpu_up_write(&cgroup_threadgroup_rwsem);
+ return ret;
+ }
+
+--
+2.35.1
+
--- /dev/null
+From 9964e8bbe57dcf67d4b53897cfc14a248f45657d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 15 Aug 2022 13:27:38 -1000
+Subject: cgroup: Fix threadgroup_rwsem <-> cpus_read_lock() deadlock
+
+From: Tejun Heo <tj@kernel.org>
+
+[ Upstream commit 4f7e7236435ca0abe005c674ebd6892c6e83aeb3 ]
+
+Bringing up a CPU may involve creating and destroying tasks which requires
+read-locking threadgroup_rwsem, so threadgroup_rwsem nests inside
+cpus_read_lock(). However, cpuset's ->attach(), which may be called with
+thredagroup_rwsem write-locked, also wants to disable CPU hotplug and
+acquires cpus_read_lock(), leading to a deadlock.
+
+Fix it by guaranteeing that ->attach() is always called with CPU hotplug
+disabled and removing cpus_read_lock() call from cpuset_attach().
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reviewed-and-tested-by: Imran Khan <imran.f.khan@oracle.com>
+Reported-and-tested-by: Xuewen Yan <xuewen.yan@unisoc.com>
+Fixes: 05c7b7a92cc8 ("cgroup/cpuset: Fix a race between cpuset_attach() and cpu hotplug")
+Cc: stable@vger.kernel.org # v5.17+
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/cgroup/cgroup.c | 77 +++++++++++++++++++++++++++++-------------
+ kernel/cgroup/cpuset.c | 3 +-
+ 2 files changed, 55 insertions(+), 25 deletions(-)
+
+diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
+index baebd1c7667b7..75c3881af0784 100644
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -2345,6 +2345,47 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
+ }
+ EXPORT_SYMBOL_GPL(task_cgroup_path);
+
++/**
++ * cgroup_attach_lock - Lock for ->attach()
++ * @lock_threadgroup: whether to down_write cgroup_threadgroup_rwsem
++ *
++ * cgroup migration sometimes needs to stabilize threadgroups against forks and
++ * exits by write-locking cgroup_threadgroup_rwsem. However, some ->attach()
++ * implementations (e.g. cpuset), also need to disable CPU hotplug.
++ * Unfortunately, letting ->attach() operations acquire cpus_read_lock() can
++ * lead to deadlocks.
++ *
++ * Bringing up a CPU may involve creating and destroying tasks which requires
++ * read-locking threadgroup_rwsem, so threadgroup_rwsem nests inside
++ * cpus_read_lock(). If we call an ->attach() which acquires the cpus lock while
++ * write-locking threadgroup_rwsem, the locking order is reversed and we end up
++ * waiting for an on-going CPU hotplug operation which in turn is waiting for
++ * the threadgroup_rwsem to be released to create new tasks. For more details:
++ *
++ * http://lkml.kernel.org/r/20220711174629.uehfmqegcwn2lqzu@wubuntu
++ *
++ * Resolve the situation by always acquiring cpus_read_lock() before optionally
++ * write-locking cgroup_threadgroup_rwsem. This allows ->attach() to assume that
++ * CPU hotplug is disabled on entry.
++ */
++static void cgroup_attach_lock(bool lock_threadgroup)
++{
++ cpus_read_lock();
++ if (lock_threadgroup)
++ percpu_down_write(&cgroup_threadgroup_rwsem);
++}
++
++/**
++ * cgroup_attach_unlock - Undo cgroup_attach_lock()
++ * @lock_threadgroup: whether to up_write cgroup_threadgroup_rwsem
++ */
++static void cgroup_attach_unlock(bool lock_threadgroup)
++{
++ if (lock_threadgroup)
++ percpu_up_write(&cgroup_threadgroup_rwsem);
++ cpus_read_unlock();
++}
++
+ /**
+ * cgroup_migrate_add_task - add a migration target task to a migration context
+ * @task: target task
+@@ -2821,8 +2862,7 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
+ }
+
+ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
+- bool *locked)
+- __acquires(&cgroup_threadgroup_rwsem)
++ bool *threadgroup_locked)
+ {
+ struct task_struct *tsk;
+ pid_t pid;
+@@ -2839,12 +2879,8 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
+ * Therefore, we can skip the global lock.
+ */
+ lockdep_assert_held(&cgroup_mutex);
+- if (pid || threadgroup) {
+- percpu_down_write(&cgroup_threadgroup_rwsem);
+- *locked = true;
+- } else {
+- *locked = false;
+- }
++ *threadgroup_locked = pid || threadgroup;
++ cgroup_attach_lock(*threadgroup_locked);
+
+ rcu_read_lock();
+ if (pid) {
+@@ -2875,17 +2911,14 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
+ goto out_unlock_rcu;
+
+ out_unlock_threadgroup:
+- if (*locked) {
+- percpu_up_write(&cgroup_threadgroup_rwsem);
+- *locked = false;
+- }
++ cgroup_attach_unlock(*threadgroup_locked);
++ *threadgroup_locked = false;
+ out_unlock_rcu:
+ rcu_read_unlock();
+ return tsk;
+ }
+
+-void cgroup_procs_write_finish(struct task_struct *task, bool locked)
+- __releases(&cgroup_threadgroup_rwsem)
++void cgroup_procs_write_finish(struct task_struct *task, bool threadgroup_locked)
+ {
+ struct cgroup_subsys *ss;
+ int ssid;
+@@ -2893,8 +2926,8 @@ void cgroup_procs_write_finish(struct task_struct *task, bool locked)
+ /* release reference from cgroup_procs_write_start() */
+ put_task_struct(task);
+
+- if (locked)
+- percpu_up_write(&cgroup_threadgroup_rwsem);
++ cgroup_attach_unlock(threadgroup_locked);
++
+ for_each_subsys(ss, ssid)
+ if (ss->post_attach)
+ ss->post_attach();
+@@ -2971,8 +3004,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+ * write-locking can be skipped safely.
+ */
+ has_tasks = !list_empty(&mgctx.preloaded_src_csets);
+- if (has_tasks)
+- percpu_down_write(&cgroup_threadgroup_rwsem);
++ cgroup_attach_lock(has_tasks);
+
+ /* NULL dst indicates self on default hierarchy */
+ ret = cgroup_migrate_prepare_dst(&mgctx);
+@@ -2993,8 +3025,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+ ret = cgroup_migrate_execute(&mgctx);
+ out_finish:
+ cgroup_migrate_finish(&mgctx);
+- if (has_tasks)
+- percpu_up_write(&cgroup_threadgroup_rwsem);
++ cgroup_attach_unlock(has_tasks);
+ return ret;
+ }
+
+@@ -4942,13 +4973,13 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
+ struct task_struct *task;
+ const struct cred *saved_cred;
+ ssize_t ret;
+- bool locked;
++ bool threadgroup_locked;
+
+ dst_cgrp = cgroup_kn_lock_live(of->kn, false);
+ if (!dst_cgrp)
+ return -ENODEV;
+
+- task = cgroup_procs_write_start(buf, threadgroup, &locked);
++ task = cgroup_procs_write_start(buf, threadgroup, &threadgroup_locked);
+ ret = PTR_ERR_OR_ZERO(task);
+ if (ret)
+ goto out_unlock;
+@@ -4974,7 +5005,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
+ ret = cgroup_attach_task(dst_cgrp, task, threadgroup);
+
+ out_finish:
+- cgroup_procs_write_finish(task, locked);
++ cgroup_procs_write_finish(task, threadgroup_locked);
+ out_unlock:
+ cgroup_kn_unlock(of->kn);
+
+diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
+index 9c5b659db63f4..3213d3c8ea0a8 100644
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -2249,7 +2249,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
+ cgroup_taskset_first(tset, &css);
+ cs = css_cs(css);
+
+- cpus_read_lock();
++ lockdep_assert_cpus_held(); /* see cgroup_attach_lock() */
+ percpu_down_write(&cpuset_rwsem);
+
+ guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+@@ -2303,7 +2303,6 @@ static void cpuset_attach(struct cgroup_taskset *tset)
+ wake_up(&cpuset_attach_wq);
+
+ percpu_up_write(&cpuset_rwsem);
+- cpus_read_unlock();
+ }
+
+ /* The various types of files and directories in a cpuset file system */
+--
+2.35.1
+
--- /dev/null
+From 1d960ddcb28ad2a6a22bac66b4b9e0a363f9dd45 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 13 Aug 2022 08:22:25 -0400
+Subject: NFS: Fix another fsync() issue after a server reboot
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 67f4b5dc49913abcdb5cc736e73674e2f352f81d ]
+
+Currently, when the writeback code detects a server reboot, it redirties
+any pages that were not committed to disk, and it sets the flag
+NFS_CONTEXT_RESEND_WRITES in the nfs_open_context of the file descriptor
+that dirtied the file. While this allows the file descriptor in question
+to redrive its own writes, it violates the fsync() requirement that we
+should be synchronising all writes to disk.
+While the problem is infrequent, we do see corner cases where an
+untimely server reboot causes the fsync() call to abandon its attempt to
+sync data to disk and causing data corruption issues due to missed error
+conditions or similar.
+
+In order to tighted up the client's ability to deal with this situation
+without introducing livelocks, add a counter that records the number of
+times pages are redirtied due to a server reboot-like condition, and use
+that in fsync() to redrive the sync to disk.
+
+Fixes: 2197e9b06c22 ("NFS: Fix up fsync() when the server rebooted")
+Cc: stable@vger.kernel.org
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/file.c | 15 ++++++---------
+ fs/nfs/inode.c | 1 +
+ fs/nfs/write.c | 6 ++++--
+ include/linux/nfs_fs.h | 1 +
+ 4 files changed, 12 insertions(+), 11 deletions(-)
+
+diff --git a/fs/nfs/file.c b/fs/nfs/file.c
+index a8693cc50c7ca..ad5114e480097 100644
+--- a/fs/nfs/file.c
++++ b/fs/nfs/file.c
+@@ -223,8 +223,10 @@ nfs_file_fsync_commit(struct file *file, int datasync)
+ int
+ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+ {
+- struct nfs_open_context *ctx = nfs_file_open_context(file);
+ struct inode *inode = file_inode(file);
++ struct nfs_inode *nfsi = NFS_I(inode);
++ long save_nredirtied = atomic_long_read(&nfsi->redirtied_pages);
++ long nredirtied;
+ int ret;
+
+ trace_nfs_fsync_enter(inode);
+@@ -239,15 +241,10 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+ ret = pnfs_sync_inode(inode, !!datasync);
+ if (ret != 0)
+ break;
+- if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags))
++ nredirtied = atomic_long_read(&nfsi->redirtied_pages);
++ if (nredirtied == save_nredirtied)
+ break;
+- /*
+- * If nfs_file_fsync_commit detected a server reboot, then
+- * resend all dirty pages that might have been covered by
+- * the NFS_CONTEXT_RESEND_WRITES flag
+- */
+- start = 0;
+- end = LLONG_MAX;
++ save_nredirtied = nredirtied;
+ }
+
+ trace_nfs_fsync_exit(inode, ret);
+diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
+index dc057ab6b30d1..e4524635a129a 100644
+--- a/fs/nfs/inode.c
++++ b/fs/nfs/inode.c
+@@ -434,6 +434,7 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh)
+ static void nfs_inode_init_regular(struct nfs_inode *nfsi)
+ {
+ atomic_long_set(&nfsi->nrequests, 0);
++ atomic_long_set(&nfsi->redirtied_pages, 0);
+ INIT_LIST_HEAD(&nfsi->commit_info.list);
+ atomic_long_set(&nfsi->commit_info.ncommit, 0);
+ atomic_set(&nfsi->commit_info.rpcs_out, 0);
+diff --git a/fs/nfs/write.c b/fs/nfs/write.c
+index cdb29fd235492..be70874bc3292 100644
+--- a/fs/nfs/write.c
++++ b/fs/nfs/write.c
+@@ -1394,10 +1394,12 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
+ */
+ static void nfs_redirty_request(struct nfs_page *req)
+ {
++ struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host);
++
+ /* Bump the transmission count */
+ req->wb_nio++;
+ nfs_mark_request_dirty(req);
+- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
++ atomic_long_inc(&nfsi->redirtied_pages);
+ nfs_end_page_writeback(req);
+ nfs_release_request(req);
+ }
+@@ -1870,7 +1872,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
+ /* We have a mismatch. Write the page again */
+ dprintk_cont(" mismatch\n");
+ nfs_mark_request_dirty(req);
+- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
++ atomic_long_inc(&NFS_I(data->inode)->redirtied_pages);
+ next:
+ nfs_unlock_and_release_request(req);
+ /* Latency breaker */
+diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
+index d0855352cd6fc..71467d661fb66 100644
+--- a/include/linux/nfs_fs.h
++++ b/include/linux/nfs_fs.h
+@@ -180,6 +180,7 @@ struct nfs_inode {
+ /* Regular file */
+ struct {
+ atomic_long_t nrequests;
++ atomic_long_t redirtied_pages;
+ struct nfs_mds_commit_info commit_info;
+ struct mutex commit_mutex;
+ };
+--
+2.35.1
+
--- /dev/null
+From 824880fe84e32e08ef97657f71ae5568a73d6899 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Sep 2021 14:33:44 -0400
+Subject: NFS: Further optimisations for 'ls -l'
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit ff81dfb5d721fff87bd516c558847f6effb70031 ]
+
+If a user is doing 'ls -l', we have a heuristic in GETATTR that tells
+the readdir code to try to use READDIRPLUS in order to refresh the inode
+attributes. In certain cirumstances, we also try to invalidate the
+remaining directory entries in order to ensure this refresh.
+
+If there are multiple readers of the directory, we probably should avoid
+invalidating the page cache, since the heuristic breaks down in that
+situation anyway.
+
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Tested-by: Benjamin Coddington <bcodding@redhat.com>
+Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/dir.c | 16 +++++++++++-----
+ include/linux/nfs_fs.h | 5 ++---
+ 2 files changed, 13 insertions(+), 8 deletions(-)
+
+diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
+index 78219396788b4..32c3d0c454b19 100644
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -78,6 +78,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir
+ ctx->attr_gencount = nfsi->attr_gencount;
+ ctx->dir_cookie = 0;
+ ctx->dup_cookie = 0;
++ ctx->page_index = 0;
+ spin_lock(&dir->i_lock);
+ if (list_empty(&nfsi->open_files) &&
+ (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
+@@ -85,6 +86,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir
+ NFS_INO_INVALID_DATA |
+ NFS_INO_REVAL_FORCED);
+ list_add(&ctx->list, &nfsi->open_files);
++ clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags);
+ spin_unlock(&dir->i_lock);
+ return ctx;
+ }
+@@ -626,8 +628,7 @@ void nfs_force_use_readdirplus(struct inode *dir)
+ if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
+ !list_empty(&nfsi->open_files)) {
+ set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
+- invalidate_mapping_pages(dir->i_mapping,
+- nfsi->page_index + 1, -1);
++ set_bit(NFS_INO_FORCE_READDIR, &nfsi->flags);
+ }
+ }
+
+@@ -938,10 +939,8 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
+ sizeof(nfsi->cookieverf));
+ }
+ res = nfs_readdir_search_array(desc);
+- if (res == 0) {
+- nfsi->page_index = desc->page_index;
++ if (res == 0)
+ return 0;
+- }
+ nfs_readdir_page_unlock_and_put_cached(desc);
+ return res;
+ }
+@@ -1081,6 +1080,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_open_dir_context *dir_ctx = file->private_data;
+ struct nfs_readdir_descriptor *desc;
++ pgoff_t page_index;
+ int res;
+
+ dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
+@@ -1111,10 +1111,15 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
+ desc->dir_cookie = dir_ctx->dir_cookie;
+ desc->dup_cookie = dir_ctx->dup_cookie;
+ desc->duped = dir_ctx->duped;
++ page_index = dir_ctx->page_index;
+ desc->attr_gencount = dir_ctx->attr_gencount;
+ memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
+ spin_unlock(&file->f_lock);
+
++ if (test_and_clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags) &&
++ list_is_singular(&nfsi->open_files))
++ invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1);
++
+ do {
+ res = readdir_search_pagecache(desc);
+
+@@ -1151,6 +1156,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
+ dir_ctx->dup_cookie = desc->dup_cookie;
+ dir_ctx->duped = desc->duped;
+ dir_ctx->attr_gencount = desc->attr_gencount;
++ dir_ctx->page_index = desc->page_index;
+ memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
+ spin_unlock(&file->f_lock);
+
+diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
+index 66b6cc24ab8c9..be8625d8a10a7 100644
+--- a/include/linux/nfs_fs.h
++++ b/include/linux/nfs_fs.h
+@@ -103,6 +103,7 @@ struct nfs_open_dir_context {
+ __be32 verf[NFS_DIR_VERIFIER_SIZE];
+ __u64 dir_cookie;
+ __u64 dup_cookie;
++ pgoff_t page_index;
+ signed char duped;
+ };
+
+@@ -181,9 +182,6 @@ struct nfs_inode {
+ struct rw_semaphore rmdir_sem;
+ struct mutex commit_mutex;
+
+- /* track last access to cached pages */
+- unsigned long page_index;
+-
+ #if IS_ENABLED(CONFIG_NFS_V4)
+ struct nfs4_cached_acl *nfs4_acl;
+ /* NFSv4 state */
+@@ -272,6 +270,7 @@ struct nfs4_copy_state {
+ #define NFS_INO_INVALIDATING (3) /* inode is being invalidated */
+ #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */
+ #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */
++#define NFS_INO_FORCE_READDIR (7) /* force readdirplus */
+ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */
+ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */
+ #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */
+--
+2.35.1
+
--- /dev/null
+From a3af8f4666db612da3ab84dde6b1889b01fb91e5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Sep 2021 17:41:41 -0400
+Subject: NFS: Save some space in the inode
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit e591b298d7ecb851e200f65946e3d53fe78a3c4f ]
+
+Save some space in the nfs_inode by setting up an anonymous union with
+the fields that are peculiar to a specific type of filesystem object.
+
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/inode.c | 26 ++++++++++++++++++--------
+ include/linux/nfs_fs.h | 42 ++++++++++++++++++++++++------------------
+ 2 files changed, 42 insertions(+), 26 deletions(-)
+
+diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
+index cb407af9e9e92..dc057ab6b30d1 100644
+--- a/fs/nfs/inode.c
++++ b/fs/nfs/inode.c
+@@ -431,6 +431,22 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh)
+ return inode;
+ }
+
++static void nfs_inode_init_regular(struct nfs_inode *nfsi)
++{
++ atomic_long_set(&nfsi->nrequests, 0);
++ INIT_LIST_HEAD(&nfsi->commit_info.list);
++ atomic_long_set(&nfsi->commit_info.ncommit, 0);
++ atomic_set(&nfsi->commit_info.rpcs_out, 0);
++ mutex_init(&nfsi->commit_mutex);
++}
++
++static void nfs_inode_init_dir(struct nfs_inode *nfsi)
++{
++ nfsi->cache_change_attribute = 0;
++ memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
++ init_rwsem(&nfsi->rmdir_sem);
++}
++
+ /*
+ * This is our front-end to iget that looks up inodes by file handle
+ * instead of inode number.
+@@ -485,10 +501,12 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
+ if (S_ISREG(inode->i_mode)) {
+ inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
+ inode->i_data.a_ops = &nfs_file_aops;
++ nfs_inode_init_regular(nfsi);
+ } else if (S_ISDIR(inode->i_mode)) {
+ inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
+ inode->i_fop = &nfs_dir_operations;
+ inode->i_data.a_ops = &nfs_dir_aops;
++ nfs_inode_init_dir(nfsi);
+ /* Deal with crossing mountpoints */
+ if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT ||
+ fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+@@ -514,7 +532,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
+ inode->i_uid = make_kuid(&init_user_ns, -2);
+ inode->i_gid = make_kgid(&init_user_ns, -2);
+ inode->i_blocks = 0;
+- memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+ nfsi->write_io = 0;
+ nfsi->read_io = 0;
+
+@@ -2282,14 +2299,7 @@ static void init_once(void *foo)
+ INIT_LIST_HEAD(&nfsi->open_files);
+ INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
+ INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
+- INIT_LIST_HEAD(&nfsi->commit_info.list);
+- atomic_long_set(&nfsi->nrequests, 0);
+- atomic_long_set(&nfsi->commit_info.ncommit, 0);
+- atomic_set(&nfsi->commit_info.rpcs_out, 0);
+- init_rwsem(&nfsi->rmdir_sem);
+- mutex_init(&nfsi->commit_mutex);
+ nfs4_init_once(nfsi);
+- nfsi->cache_change_attribute = 0;
+ }
+
+ static int __init nfs_init_inodecache(void)
+diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
+index be8625d8a10a7..d0855352cd6fc 100644
+--- a/include/linux/nfs_fs.h
++++ b/include/linux/nfs_fs.h
+@@ -155,33 +155,39 @@ struct nfs_inode {
+ unsigned long attrtimeo_timestamp;
+
+ unsigned long attr_gencount;
+- /* "Generation counter" for the attribute cache. This is
+- * bumped whenever we update the metadata on the
+- * server.
+- */
+- unsigned long cache_change_attribute;
+
+ struct rb_root access_cache;
+ struct list_head access_cache_entry_lru;
+ struct list_head access_cache_inode_lru;
+
+- /*
+- * This is the cookie verifier used for NFSv3 readdir
+- * operations
+- */
+- __be32 cookieverf[NFS_DIR_VERIFIER_SIZE];
+-
+- atomic_long_t nrequests;
+- struct nfs_mds_commit_info commit_info;
++ union {
++ /* Directory */
++ struct {
++ /* "Generation counter" for the attribute cache.
++ * This is bumped whenever we update the metadata
++ * on the server.
++ */
++ unsigned long cache_change_attribute;
++ /*
++ * This is the cookie verifier used for NFSv3 readdir
++ * operations
++ */
++ __be32 cookieverf[NFS_DIR_VERIFIER_SIZE];
++ /* Readers: in-flight sillydelete RPC calls */
++ /* Writers: rmdir */
++ struct rw_semaphore rmdir_sem;
++ };
++ /* Regular file */
++ struct {
++ atomic_long_t nrequests;
++ struct nfs_mds_commit_info commit_info;
++ struct mutex commit_mutex;
++ };
++ };
+
+ /* Open contexts for shared mmap writes */
+ struct list_head open_files;
+
+- /* Readers: in-flight sillydelete RPC calls */
+- /* Writers: rmdir */
+- struct rw_semaphore rmdir_sem;
+- struct mutex commit_mutex;
+-
+ #if IS_ENABLED(CONFIG_NFS_V4)
+ struct nfs4_cached_acl *nfs4_acl;
+ /* NFSv4 state */
+--
+2.35.1
+
--- /dev/null
+From 4fa8e7d65f7a720ac13f22aab2ce447b921dbf97 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 Dec 2021 13:49:26 +0100
+Subject: riscv: dts: microchip: mpfs: Fix reference clock node
+
+From: Geert Uytterhoeven <geert@linux-m68k.org>
+
+[ Upstream commit 9d7b3078628f591e4007210c0d5d3f94805cff55 ]
+
+"make dtbs_check" reports:
+
+ arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dt.yaml: soc: refclk: {'compatible': ['fixed-clock'], '#clock-cells': [[0]], 'clock-frequency': [[600000000]], 'clock-output-names': ['msspllclk'], 'phandle': [[7]]} should not be valid under {'type': 'object'}
+ From schema: dtschema/schemas/simple-bus.yaml
+
+Fix this by moving the node out of the "soc" subnode.
+While at it, rename it to "msspllclk", and drop the now superfluous
+"clock-output-names" property.
+Move the actual clock-frequency value to the board DTS, since it is not
+set until bitstream programming time.
+
+Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
+Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
+Tested-by: Conor Dooley <conor.dooley@microchip.com>
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../boot/dts/microchip/microchip-mpfs-icicle-kit.dts | 4 ++++
+ arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi | 12 +++++-------
+ 2 files changed, 9 insertions(+), 7 deletions(-)
+
+diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
+index cce5eca31f257..4b69ab4ff30a2 100644
+--- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
++++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
+@@ -40,6 +40,10 @@
+ };
+ };
+
++&refclk {
++ clock-frequency = <600000000>;
++};
++
+ &serial0 {
+ status = "okay";
+ };
+diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
+index 4ef4bcb748729..9279ccf20009a 100644
+--- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
++++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
+@@ -139,6 +139,11 @@
+ };
+ };
+
++ refclk: msspllclk {
++ compatible = "fixed-clock";
++ #clock-cells = <0>;
++ };
++
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+@@ -188,13 +193,6 @@
+ #dma-cells = <1>;
+ };
+
+- refclk: refclk {
+- compatible = "fixed-clock";
+- #clock-cells = <0>;
+- clock-frequency = <600000000>;
+- clock-output-names = "msspllclk";
+- };
+-
+ clkcfg: clkcfg@20002000 {
+ compatible = "microchip,mpfs-clkcfg";
+ reg = <0x0 0x20002000 0x0 0x1000>;
+--
+2.35.1
+
drm-i915-implement-waedplinkratedatareload.patch
scsi-mpt3sas-fix-use-after-free-warning.patch
scsi-lpfc-add-missing-destroy_workqueue-in-error-path.patch
+nfs-further-optimisations-for-ls-l.patch
+nfs-save-some-space-in-the-inode.patch
+nfs-fix-another-fsync-issue-after-a-server-reboot.patch
+cgroup-elide-write-locking-threadgroup_rwsem-when-up.patch
+cgroup-fix-threadgroup_rwsem-cpus_read_lock-deadlock.patch
+riscv-dts-microchip-mpfs-fix-reference-clock-node.patch