From: Sasha Levin Date: Sat, 10 Sep 2022 17:15:16 +0000 (-0400) Subject: Fixes for 5.15 X-Git-Tag: v5.19.9~48 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=eca155cb697c9f903287102b1c30e77d43581c69;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.15 Signed-off-by: Sasha Levin --- diff --git a/queue-5.15/cgroup-elide-write-locking-threadgroup_rwsem-when-up.patch b/queue-5.15/cgroup-elide-write-locking-threadgroup_rwsem-when-up.patch new file mode 100644 index 00000000000..b16cdb48b8f --- /dev/null +++ b/queue-5.15/cgroup-elide-write-locking-threadgroup_rwsem-when-up.patch @@ -0,0 +1,81 @@ +From d549ecab002cd5302c23865230be6dccd58b4c3b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 14 Jul 2022 18:38:15 -1000 +Subject: cgroup: Elide write-locking threadgroup_rwsem when updating csses on + an empty subtree +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Tejun Heo + +[ Upstream commit 671c11f0619e5ccb380bcf0f062f69ba95fc974a ] + +cgroup_update_dfl_csses() write-lock the threadgroup_rwsem as updating the +csses can trigger process migrations. However, if the subtree doesn't +contain any tasks, there aren't gonna be any cgroup migrations. This +condition can be trivially detected by testing whether +mgctx.preloaded_src_csets is empty. Elide write-locking threadgroup_rwsem if +the subtree is empty. + +After this optimization, the usage pattern of creating a cgroup, enabling +the necessary controllers, and then seeding it with CLONE_INTO_CGROUP and +then removing the cgroup after it becomes empty doesn't need to write-lock +threadgroup_rwsem at all. + +Signed-off-by: Tejun Heo +Cc: Christian Brauner +Cc: Michal Koutný +Signed-off-by: Sasha Levin +--- + kernel/cgroup/cgroup.c | 16 +++++++++++++--- + 1 file changed, 13 insertions(+), 3 deletions(-) + +diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c +index 416dd7db3fb2c..baebd1c7667b7 100644 +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -2949,12 +2949,11 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) + struct cgroup_subsys_state *d_css; + struct cgroup *dsct; + struct css_set *src_cset; ++ bool has_tasks; + int ret; + + lockdep_assert_held(&cgroup_mutex); + +- percpu_down_write(&cgroup_threadgroup_rwsem); +- + /* look up all csses currently attached to @cgrp's subtree */ + spin_lock_irq(&css_set_lock); + cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { +@@ -2965,6 +2964,16 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) + } + spin_unlock_irq(&css_set_lock); + ++ /* ++ * We need to write-lock threadgroup_rwsem while migrating tasks. ++ * However, if there are no source csets for @cgrp, changing its ++ * controllers isn't gonna produce any task migrations and the ++ * write-locking can be skipped safely. ++ */ ++ has_tasks = !list_empty(&mgctx.preloaded_src_csets); ++ if (has_tasks) ++ percpu_down_write(&cgroup_threadgroup_rwsem); ++ + /* NULL dst indicates self on default hierarchy */ + ret = cgroup_migrate_prepare_dst(&mgctx); + if (ret) +@@ -2984,7 +2993,8 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) + ret = cgroup_migrate_execute(&mgctx); + out_finish: + cgroup_migrate_finish(&mgctx); +- percpu_up_write(&cgroup_threadgroup_rwsem); ++ if (has_tasks) ++ percpu_up_write(&cgroup_threadgroup_rwsem); + return ret; + } + +-- +2.35.1 + diff --git a/queue-5.15/cgroup-fix-threadgroup_rwsem-cpus_read_lock-deadlock.patch b/queue-5.15/cgroup-fix-threadgroup_rwsem-cpus_read_lock-deadlock.patch new file mode 100644 index 00000000000..aacdb276936 --- /dev/null +++ b/queue-5.15/cgroup-fix-threadgroup_rwsem-cpus_read_lock-deadlock.patch @@ -0,0 +1,207 @@ +From 9964e8bbe57dcf67d4b53897cfc14a248f45657d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 15 Aug 2022 13:27:38 -1000 +Subject: cgroup: Fix threadgroup_rwsem <-> cpus_read_lock() deadlock + +From: Tejun Heo + +[ Upstream commit 4f7e7236435ca0abe005c674ebd6892c6e83aeb3 ] + +Bringing up a CPU may involve creating and destroying tasks which requires +read-locking threadgroup_rwsem, so threadgroup_rwsem nests inside +cpus_read_lock(). However, cpuset's ->attach(), which may be called with +thredagroup_rwsem write-locked, also wants to disable CPU hotplug and +acquires cpus_read_lock(), leading to a deadlock. + +Fix it by guaranteeing that ->attach() is always called with CPU hotplug +disabled and removing cpus_read_lock() call from cpuset_attach(). + +Signed-off-by: Tejun Heo +Reviewed-and-tested-by: Imran Khan +Reported-and-tested-by: Xuewen Yan +Fixes: 05c7b7a92cc8 ("cgroup/cpuset: Fix a race between cpuset_attach() and cpu hotplug") +Cc: stable@vger.kernel.org # v5.17+ +Signed-off-by: Sasha Levin +--- + kernel/cgroup/cgroup.c | 77 +++++++++++++++++++++++++++++------------- + kernel/cgroup/cpuset.c | 3 +- + 2 files changed, 55 insertions(+), 25 deletions(-) + +diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c +index baebd1c7667b7..75c3881af0784 100644 +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -2345,6 +2345,47 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) + } + EXPORT_SYMBOL_GPL(task_cgroup_path); + ++/** ++ * cgroup_attach_lock - Lock for ->attach() ++ * @lock_threadgroup: whether to down_write cgroup_threadgroup_rwsem ++ * ++ * cgroup migration sometimes needs to stabilize threadgroups against forks and ++ * exits by write-locking cgroup_threadgroup_rwsem. However, some ->attach() ++ * implementations (e.g. cpuset), also need to disable CPU hotplug. ++ * Unfortunately, letting ->attach() operations acquire cpus_read_lock() can ++ * lead to deadlocks. ++ * ++ * Bringing up a CPU may involve creating and destroying tasks which requires ++ * read-locking threadgroup_rwsem, so threadgroup_rwsem nests inside ++ * cpus_read_lock(). If we call an ->attach() which acquires the cpus lock while ++ * write-locking threadgroup_rwsem, the locking order is reversed and we end up ++ * waiting for an on-going CPU hotplug operation which in turn is waiting for ++ * the threadgroup_rwsem to be released to create new tasks. For more details: ++ * ++ * http://lkml.kernel.org/r/20220711174629.uehfmqegcwn2lqzu@wubuntu ++ * ++ * Resolve the situation by always acquiring cpus_read_lock() before optionally ++ * write-locking cgroup_threadgroup_rwsem. This allows ->attach() to assume that ++ * CPU hotplug is disabled on entry. ++ */ ++static void cgroup_attach_lock(bool lock_threadgroup) ++{ ++ cpus_read_lock(); ++ if (lock_threadgroup) ++ percpu_down_write(&cgroup_threadgroup_rwsem); ++} ++ ++/** ++ * cgroup_attach_unlock - Undo cgroup_attach_lock() ++ * @lock_threadgroup: whether to up_write cgroup_threadgroup_rwsem ++ */ ++static void cgroup_attach_unlock(bool lock_threadgroup) ++{ ++ if (lock_threadgroup) ++ percpu_up_write(&cgroup_threadgroup_rwsem); ++ cpus_read_unlock(); ++} ++ + /** + * cgroup_migrate_add_task - add a migration target task to a migration context + * @task: target task +@@ -2821,8 +2862,7 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, + } + + struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup, +- bool *locked) +- __acquires(&cgroup_threadgroup_rwsem) ++ bool *threadgroup_locked) + { + struct task_struct *tsk; + pid_t pid; +@@ -2839,12 +2879,8 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup, + * Therefore, we can skip the global lock. + */ + lockdep_assert_held(&cgroup_mutex); +- if (pid || threadgroup) { +- percpu_down_write(&cgroup_threadgroup_rwsem); +- *locked = true; +- } else { +- *locked = false; +- } ++ *threadgroup_locked = pid || threadgroup; ++ cgroup_attach_lock(*threadgroup_locked); + + rcu_read_lock(); + if (pid) { +@@ -2875,17 +2911,14 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup, + goto out_unlock_rcu; + + out_unlock_threadgroup: +- if (*locked) { +- percpu_up_write(&cgroup_threadgroup_rwsem); +- *locked = false; +- } ++ cgroup_attach_unlock(*threadgroup_locked); ++ *threadgroup_locked = false; + out_unlock_rcu: + rcu_read_unlock(); + return tsk; + } + +-void cgroup_procs_write_finish(struct task_struct *task, bool locked) +- __releases(&cgroup_threadgroup_rwsem) ++void cgroup_procs_write_finish(struct task_struct *task, bool threadgroup_locked) + { + struct cgroup_subsys *ss; + int ssid; +@@ -2893,8 +2926,8 @@ void cgroup_procs_write_finish(struct task_struct *task, bool locked) + /* release reference from cgroup_procs_write_start() */ + put_task_struct(task); + +- if (locked) +- percpu_up_write(&cgroup_threadgroup_rwsem); ++ cgroup_attach_unlock(threadgroup_locked); ++ + for_each_subsys(ss, ssid) + if (ss->post_attach) + ss->post_attach(); +@@ -2971,8 +3004,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) + * write-locking can be skipped safely. + */ + has_tasks = !list_empty(&mgctx.preloaded_src_csets); +- if (has_tasks) +- percpu_down_write(&cgroup_threadgroup_rwsem); ++ cgroup_attach_lock(has_tasks); + + /* NULL dst indicates self on default hierarchy */ + ret = cgroup_migrate_prepare_dst(&mgctx); +@@ -2993,8 +3025,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) + ret = cgroup_migrate_execute(&mgctx); + out_finish: + cgroup_migrate_finish(&mgctx); +- if (has_tasks) +- percpu_up_write(&cgroup_threadgroup_rwsem); ++ cgroup_attach_unlock(has_tasks); + return ret; + } + +@@ -4942,13 +4973,13 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, + struct task_struct *task; + const struct cred *saved_cred; + ssize_t ret; +- bool locked; ++ bool threadgroup_locked; + + dst_cgrp = cgroup_kn_lock_live(of->kn, false); + if (!dst_cgrp) + return -ENODEV; + +- task = cgroup_procs_write_start(buf, threadgroup, &locked); ++ task = cgroup_procs_write_start(buf, threadgroup, &threadgroup_locked); + ret = PTR_ERR_OR_ZERO(task); + if (ret) + goto out_unlock; +@@ -4974,7 +5005,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, + ret = cgroup_attach_task(dst_cgrp, task, threadgroup); + + out_finish: +- cgroup_procs_write_finish(task, locked); ++ cgroup_procs_write_finish(task, threadgroup_locked); + out_unlock: + cgroup_kn_unlock(of->kn); + +diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c +index 9c5b659db63f4..3213d3c8ea0a8 100644 +--- a/kernel/cgroup/cpuset.c ++++ b/kernel/cgroup/cpuset.c +@@ -2249,7 +2249,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) + cgroup_taskset_first(tset, &css); + cs = css_cs(css); + +- cpus_read_lock(); ++ lockdep_assert_cpus_held(); /* see cgroup_attach_lock() */ + percpu_down_write(&cpuset_rwsem); + + guarantee_online_mems(cs, &cpuset_attach_nodemask_to); +@@ -2303,7 +2303,6 @@ static void cpuset_attach(struct cgroup_taskset *tset) + wake_up(&cpuset_attach_wq); + + percpu_up_write(&cpuset_rwsem); +- cpus_read_unlock(); + } + + /* The various types of files and directories in a cpuset file system */ +-- +2.35.1 + diff --git a/queue-5.15/nfs-fix-another-fsync-issue-after-a-server-reboot.patch b/queue-5.15/nfs-fix-another-fsync-issue-after-a-server-reboot.patch new file mode 100644 index 00000000000..31e58b897b9 --- /dev/null +++ b/queue-5.15/nfs-fix-another-fsync-issue-after-a-server-reboot.patch @@ -0,0 +1,125 @@ +From 1d960ddcb28ad2a6a22bac66b4b9e0a363f9dd45 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 13 Aug 2022 08:22:25 -0400 +Subject: NFS: Fix another fsync() issue after a server reboot + +From: Trond Myklebust + +[ Upstream commit 67f4b5dc49913abcdb5cc736e73674e2f352f81d ] + +Currently, when the writeback code detects a server reboot, it redirties +any pages that were not committed to disk, and it sets the flag +NFS_CONTEXT_RESEND_WRITES in the nfs_open_context of the file descriptor +that dirtied the file. While this allows the file descriptor in question +to redrive its own writes, it violates the fsync() requirement that we +should be synchronising all writes to disk. +While the problem is infrequent, we do see corner cases where an +untimely server reboot causes the fsync() call to abandon its attempt to +sync data to disk and causing data corruption issues due to missed error +conditions or similar. + +In order to tighted up the client's ability to deal with this situation +without introducing livelocks, add a counter that records the number of +times pages are redirtied due to a server reboot-like condition, and use +that in fsync() to redrive the sync to disk. + +Fixes: 2197e9b06c22 ("NFS: Fix up fsync() when the server rebooted") +Cc: stable@vger.kernel.org +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/file.c | 15 ++++++--------- + fs/nfs/inode.c | 1 + + fs/nfs/write.c | 6 ++++-- + include/linux/nfs_fs.h | 1 + + 4 files changed, 12 insertions(+), 11 deletions(-) + +diff --git a/fs/nfs/file.c b/fs/nfs/file.c +index a8693cc50c7ca..ad5114e480097 100644 +--- a/fs/nfs/file.c ++++ b/fs/nfs/file.c +@@ -223,8 +223,10 @@ nfs_file_fsync_commit(struct file *file, int datasync) + int + nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) + { +- struct nfs_open_context *ctx = nfs_file_open_context(file); + struct inode *inode = file_inode(file); ++ struct nfs_inode *nfsi = NFS_I(inode); ++ long save_nredirtied = atomic_long_read(&nfsi->redirtied_pages); ++ long nredirtied; + int ret; + + trace_nfs_fsync_enter(inode); +@@ -239,15 +241,10 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) + ret = pnfs_sync_inode(inode, !!datasync); + if (ret != 0) + break; +- if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags)) ++ nredirtied = atomic_long_read(&nfsi->redirtied_pages); ++ if (nredirtied == save_nredirtied) + break; +- /* +- * If nfs_file_fsync_commit detected a server reboot, then +- * resend all dirty pages that might have been covered by +- * the NFS_CONTEXT_RESEND_WRITES flag +- */ +- start = 0; +- end = LLONG_MAX; ++ save_nredirtied = nredirtied; + } + + trace_nfs_fsync_exit(inode, ret); +diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c +index dc057ab6b30d1..e4524635a129a 100644 +--- a/fs/nfs/inode.c ++++ b/fs/nfs/inode.c +@@ -434,6 +434,7 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh) + static void nfs_inode_init_regular(struct nfs_inode *nfsi) + { + atomic_long_set(&nfsi->nrequests, 0); ++ atomic_long_set(&nfsi->redirtied_pages, 0); + INIT_LIST_HEAD(&nfsi->commit_info.list); + atomic_long_set(&nfsi->commit_info.ncommit, 0); + atomic_set(&nfsi->commit_info.rpcs_out, 0); +diff --git a/fs/nfs/write.c b/fs/nfs/write.c +index cdb29fd235492..be70874bc3292 100644 +--- a/fs/nfs/write.c ++++ b/fs/nfs/write.c +@@ -1394,10 +1394,12 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, + */ + static void nfs_redirty_request(struct nfs_page *req) + { ++ struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host); ++ + /* Bump the transmission count */ + req->wb_nio++; + nfs_mark_request_dirty(req); +- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags); ++ atomic_long_inc(&nfsi->redirtied_pages); + nfs_end_page_writeback(req); + nfs_release_request(req); + } +@@ -1870,7 +1872,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) + /* We have a mismatch. Write the page again */ + dprintk_cont(" mismatch\n"); + nfs_mark_request_dirty(req); +- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags); ++ atomic_long_inc(&NFS_I(data->inode)->redirtied_pages); + next: + nfs_unlock_and_release_request(req); + /* Latency breaker */ +diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h +index d0855352cd6fc..71467d661fb66 100644 +--- a/include/linux/nfs_fs.h ++++ b/include/linux/nfs_fs.h +@@ -180,6 +180,7 @@ struct nfs_inode { + /* Regular file */ + struct { + atomic_long_t nrequests; ++ atomic_long_t redirtied_pages; + struct nfs_mds_commit_info commit_info; + struct mutex commit_mutex; + }; +-- +2.35.1 + diff --git a/queue-5.15/nfs-further-optimisations-for-ls-l.patch b/queue-5.15/nfs-further-optimisations-for-ls-l.patch new file mode 100644 index 00000000000..d4a51c508e0 --- /dev/null +++ b/queue-5.15/nfs-further-optimisations-for-ls-l.patch @@ -0,0 +1,134 @@ +From 824880fe84e32e08ef97657f71ae5568a73d6899 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 Sep 2021 14:33:44 -0400 +Subject: NFS: Further optimisations for 'ls -l' + +From: Trond Myklebust + +[ Upstream commit ff81dfb5d721fff87bd516c558847f6effb70031 ] + +If a user is doing 'ls -l', we have a heuristic in GETATTR that tells +the readdir code to try to use READDIRPLUS in order to refresh the inode +attributes. In certain cirumstances, we also try to invalidate the +remaining directory entries in order to ensure this refresh. + +If there are multiple readers of the directory, we probably should avoid +invalidating the page cache, since the heuristic breaks down in that +situation anyway. + +Signed-off-by: Trond Myklebust +Tested-by: Benjamin Coddington +Reviewed-by: Benjamin Coddington +Signed-off-by: Sasha Levin +--- + fs/nfs/dir.c | 16 +++++++++++----- + include/linux/nfs_fs.h | 5 ++--- + 2 files changed, 13 insertions(+), 8 deletions(-) + +diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c +index 78219396788b4..32c3d0c454b19 100644 +--- a/fs/nfs/dir.c ++++ b/fs/nfs/dir.c +@@ -78,6 +78,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir + ctx->attr_gencount = nfsi->attr_gencount; + ctx->dir_cookie = 0; + ctx->dup_cookie = 0; ++ ctx->page_index = 0; + spin_lock(&dir->i_lock); + if (list_empty(&nfsi->open_files) && + (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) +@@ -85,6 +86,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir + NFS_INO_INVALID_DATA | + NFS_INO_REVAL_FORCED); + list_add(&ctx->list, &nfsi->open_files); ++ clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags); + spin_unlock(&dir->i_lock); + return ctx; + } +@@ -626,8 +628,7 @@ void nfs_force_use_readdirplus(struct inode *dir) + if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && + !list_empty(&nfsi->open_files)) { + set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); +- invalidate_mapping_pages(dir->i_mapping, +- nfsi->page_index + 1, -1); ++ set_bit(NFS_INO_FORCE_READDIR, &nfsi->flags); + } + } + +@@ -938,10 +939,8 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc) + sizeof(nfsi->cookieverf)); + } + res = nfs_readdir_search_array(desc); +- if (res == 0) { +- nfsi->page_index = desc->page_index; ++ if (res == 0) + return 0; +- } + nfs_readdir_page_unlock_and_put_cached(desc); + return res; + } +@@ -1081,6 +1080,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_dir_context *dir_ctx = file->private_data; + struct nfs_readdir_descriptor *desc; ++ pgoff_t page_index; + int res; + + dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n", +@@ -1111,10 +1111,15 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) + desc->dir_cookie = dir_ctx->dir_cookie; + desc->dup_cookie = dir_ctx->dup_cookie; + desc->duped = dir_ctx->duped; ++ page_index = dir_ctx->page_index; + desc->attr_gencount = dir_ctx->attr_gencount; + memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf)); + spin_unlock(&file->f_lock); + ++ if (test_and_clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags) && ++ list_is_singular(&nfsi->open_files)) ++ invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1); ++ + do { + res = readdir_search_pagecache(desc); + +@@ -1151,6 +1156,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) + dir_ctx->dup_cookie = desc->dup_cookie; + dir_ctx->duped = desc->duped; + dir_ctx->attr_gencount = desc->attr_gencount; ++ dir_ctx->page_index = desc->page_index; + memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf)); + spin_unlock(&file->f_lock); + +diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h +index 66b6cc24ab8c9..be8625d8a10a7 100644 +--- a/include/linux/nfs_fs.h ++++ b/include/linux/nfs_fs.h +@@ -103,6 +103,7 @@ struct nfs_open_dir_context { + __be32 verf[NFS_DIR_VERIFIER_SIZE]; + __u64 dir_cookie; + __u64 dup_cookie; ++ pgoff_t page_index; + signed char duped; + }; + +@@ -181,9 +182,6 @@ struct nfs_inode { + struct rw_semaphore rmdir_sem; + struct mutex commit_mutex; + +- /* track last access to cached pages */ +- unsigned long page_index; +- + #if IS_ENABLED(CONFIG_NFS_V4) + struct nfs4_cached_acl *nfs4_acl; + /* NFSv4 state */ +@@ -272,6 +270,7 @@ struct nfs4_copy_state { + #define NFS_INO_INVALIDATING (3) /* inode is being invalidated */ + #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ + #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ ++#define NFS_INO_FORCE_READDIR (7) /* force readdirplus */ + #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ + #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ + #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ +-- +2.35.1 + diff --git a/queue-5.15/nfs-save-some-space-in-the-inode.patch b/queue-5.15/nfs-save-some-space-in-the-inode.patch new file mode 100644 index 00000000000..d3fc8e8592e --- /dev/null +++ b/queue-5.15/nfs-save-some-space-in-the-inode.patch @@ -0,0 +1,147 @@ +From a3af8f4666db612da3ab84dde6b1889b01fb91e5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 Sep 2021 17:41:41 -0400 +Subject: NFS: Save some space in the inode + +From: Trond Myklebust + +[ Upstream commit e591b298d7ecb851e200f65946e3d53fe78a3c4f ] + +Save some space in the nfs_inode by setting up an anonymous union with +the fields that are peculiar to a specific type of filesystem object. + +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/inode.c | 26 ++++++++++++++++++-------- + include/linux/nfs_fs.h | 42 ++++++++++++++++++++++++------------------ + 2 files changed, 42 insertions(+), 26 deletions(-) + +diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c +index cb407af9e9e92..dc057ab6b30d1 100644 +--- a/fs/nfs/inode.c ++++ b/fs/nfs/inode.c +@@ -431,6 +431,22 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh) + return inode; + } + ++static void nfs_inode_init_regular(struct nfs_inode *nfsi) ++{ ++ atomic_long_set(&nfsi->nrequests, 0); ++ INIT_LIST_HEAD(&nfsi->commit_info.list); ++ atomic_long_set(&nfsi->commit_info.ncommit, 0); ++ atomic_set(&nfsi->commit_info.rpcs_out, 0); ++ mutex_init(&nfsi->commit_mutex); ++} ++ ++static void nfs_inode_init_dir(struct nfs_inode *nfsi) ++{ ++ nfsi->cache_change_attribute = 0; ++ memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); ++ init_rwsem(&nfsi->rmdir_sem); ++} ++ + /* + * This is our front-end to iget that looks up inodes by file handle + * instead of inode number. +@@ -485,10 +501,12 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st + if (S_ISREG(inode->i_mode)) { + inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops; + inode->i_data.a_ops = &nfs_file_aops; ++ nfs_inode_init_regular(nfsi); + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; + inode->i_fop = &nfs_dir_operations; + inode->i_data.a_ops = &nfs_dir_aops; ++ nfs_inode_init_dir(nfsi); + /* Deal with crossing mountpoints */ + if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT || + fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { +@@ -514,7 +532,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st + inode->i_uid = make_kuid(&init_user_ns, -2); + inode->i_gid = make_kgid(&init_user_ns, -2); + inode->i_blocks = 0; +- memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + nfsi->write_io = 0; + nfsi->read_io = 0; + +@@ -2282,14 +2299,7 @@ static void init_once(void *foo) + INIT_LIST_HEAD(&nfsi->open_files); + INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); + INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); +- INIT_LIST_HEAD(&nfsi->commit_info.list); +- atomic_long_set(&nfsi->nrequests, 0); +- atomic_long_set(&nfsi->commit_info.ncommit, 0); +- atomic_set(&nfsi->commit_info.rpcs_out, 0); +- init_rwsem(&nfsi->rmdir_sem); +- mutex_init(&nfsi->commit_mutex); + nfs4_init_once(nfsi); +- nfsi->cache_change_attribute = 0; + } + + static int __init nfs_init_inodecache(void) +diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h +index be8625d8a10a7..d0855352cd6fc 100644 +--- a/include/linux/nfs_fs.h ++++ b/include/linux/nfs_fs.h +@@ -155,33 +155,39 @@ struct nfs_inode { + unsigned long attrtimeo_timestamp; + + unsigned long attr_gencount; +- /* "Generation counter" for the attribute cache. This is +- * bumped whenever we update the metadata on the +- * server. +- */ +- unsigned long cache_change_attribute; + + struct rb_root access_cache; + struct list_head access_cache_entry_lru; + struct list_head access_cache_inode_lru; + +- /* +- * This is the cookie verifier used for NFSv3 readdir +- * operations +- */ +- __be32 cookieverf[NFS_DIR_VERIFIER_SIZE]; +- +- atomic_long_t nrequests; +- struct nfs_mds_commit_info commit_info; ++ union { ++ /* Directory */ ++ struct { ++ /* "Generation counter" for the attribute cache. ++ * This is bumped whenever we update the metadata ++ * on the server. ++ */ ++ unsigned long cache_change_attribute; ++ /* ++ * This is the cookie verifier used for NFSv3 readdir ++ * operations ++ */ ++ __be32 cookieverf[NFS_DIR_VERIFIER_SIZE]; ++ /* Readers: in-flight sillydelete RPC calls */ ++ /* Writers: rmdir */ ++ struct rw_semaphore rmdir_sem; ++ }; ++ /* Regular file */ ++ struct { ++ atomic_long_t nrequests; ++ struct nfs_mds_commit_info commit_info; ++ struct mutex commit_mutex; ++ }; ++ }; + + /* Open contexts for shared mmap writes */ + struct list_head open_files; + +- /* Readers: in-flight sillydelete RPC calls */ +- /* Writers: rmdir */ +- struct rw_semaphore rmdir_sem; +- struct mutex commit_mutex; +- + #if IS_ENABLED(CONFIG_NFS_V4) + struct nfs4_cached_acl *nfs4_acl; + /* NFSv4 state */ +-- +2.35.1 + diff --git a/queue-5.15/riscv-dts-microchip-mpfs-fix-reference-clock-node.patch b/queue-5.15/riscv-dts-microchip-mpfs-fix-reference-clock-node.patch new file mode 100644 index 00000000000..52491d8a1f7 --- /dev/null +++ b/queue-5.15/riscv-dts-microchip-mpfs-fix-reference-clock-node.patch @@ -0,0 +1,79 @@ +From 4fa8e7d65f7a720ac13f22aab2ce447b921dbf97 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 17 Dec 2021 13:49:26 +0100 +Subject: riscv: dts: microchip: mpfs: Fix reference clock node + +From: Geert Uytterhoeven + +[ Upstream commit 9d7b3078628f591e4007210c0d5d3f94805cff55 ] + +"make dtbs_check" reports: + + arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dt.yaml: soc: refclk: {'compatible': ['fixed-clock'], '#clock-cells': [[0]], 'clock-frequency': [[600000000]], 'clock-output-names': ['msspllclk'], 'phandle': [[7]]} should not be valid under {'type': 'object'} + From schema: dtschema/schemas/simple-bus.yaml + +Fix this by moving the node out of the "soc" subnode. +While at it, rename it to "msspllclk", and drop the now superfluous +"clock-output-names" property. +Move the actual clock-frequency value to the board DTS, since it is not +set until bitstream programming time. + +Signed-off-by: Geert Uytterhoeven +Acked-by: Krzysztof Kozlowski +Reviewed-by: Conor Dooley +Tested-by: Conor Dooley +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + .../boot/dts/microchip/microchip-mpfs-icicle-kit.dts | 4 ++++ + arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi | 12 +++++------- + 2 files changed, 9 insertions(+), 7 deletions(-) + +diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts +index cce5eca31f257..4b69ab4ff30a2 100644 +--- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts ++++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts +@@ -40,6 +40,10 @@ + }; + }; + ++&refclk { ++ clock-frequency = <600000000>; ++}; ++ + &serial0 { + status = "okay"; + }; +diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi +index 4ef4bcb748729..9279ccf20009a 100644 +--- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi ++++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi +@@ -139,6 +139,11 @@ + }; + }; + ++ refclk: msspllclk { ++ compatible = "fixed-clock"; ++ #clock-cells = <0>; ++ }; ++ + soc { + #address-cells = <2>; + #size-cells = <2>; +@@ -188,13 +193,6 @@ + #dma-cells = <1>; + }; + +- refclk: refclk { +- compatible = "fixed-clock"; +- #clock-cells = <0>; +- clock-frequency = <600000000>; +- clock-output-names = "msspllclk"; +- }; +- + clkcfg: clkcfg@20002000 { + compatible = "microchip,mpfs-clkcfg"; + reg = <0x0 0x20002000 0x0 0x1000>; +-- +2.35.1 + diff --git a/queue-5.15/series b/queue-5.15/series index 0bf0d7eb766..1451c2e7b21 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -40,3 +40,9 @@ nvmet-fix-a-use-after-free.patch drm-i915-implement-waedplinkratedatareload.patch scsi-mpt3sas-fix-use-after-free-warning.patch scsi-lpfc-add-missing-destroy_workqueue-in-error-path.patch +nfs-further-optimisations-for-ls-l.patch +nfs-save-some-space-in-the-inode.patch +nfs-fix-another-fsync-issue-after-a-server-reboot.patch +cgroup-elide-write-locking-threadgroup_rwsem-when-up.patch +cgroup-fix-threadgroup_rwsem-cpus_read_lock-deadlock.patch +riscv-dts-microchip-mpfs-fix-reference-clock-node.patch