+++ /dev/null
-From f8c989a0c89a75d30f899a7cabdc14d72522bb8d Mon Sep 17 00:00:00 2001
-From: Yang Erkun <yangerkun@huawei.com>
-Date: Mon, 21 Oct 2024 22:23:43 +0800
-Subject: nfsd: release svc_expkey/svc_export with rcu_work
-
-From: Yang Erkun <yangerkun@huawei.com>
-
-commit f8c989a0c89a75d30f899a7cabdc14d72522bb8d upstream.
-
-The last reference for `cache_head` can be reduced to zero in `c_show`
-and `e_show`(using `rcu_read_lock` and `rcu_read_unlock`). Consequently,
-`svc_export_put` and `expkey_put` will be invoked, leading to two
-issues:
-
-1. The `svc_export_put` will directly free ex_uuid. However,
- `e_show`/`c_show` will access `ex_uuid` after `cache_put`, which can
- trigger a use-after-free issue, shown below.
-
- ==================================================================
- BUG: KASAN: slab-use-after-free in svc_export_show+0x362/0x430 [nfsd]
- Read of size 1 at addr ff11000010fdc120 by task cat/870
-
- CPU: 1 UID: 0 PID: 870 Comm: cat Not tainted 6.12.0-rc3+ #1
- Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
- 1.16.1-2.fc37 04/01/2014
- Call Trace:
- <TASK>
- dump_stack_lvl+0x53/0x70
- print_address_description.constprop.0+0x2c/0x3a0
- print_report+0xb9/0x280
- kasan_report+0xae/0xe0
- svc_export_show+0x362/0x430 [nfsd]
- c_show+0x161/0x390 [sunrpc]
- seq_read_iter+0x589/0x770
- seq_read+0x1e5/0x270
- proc_reg_read+0xe1/0x140
- vfs_read+0x125/0x530
- ksys_read+0xc1/0x160
- do_syscall_64+0x5f/0x170
- entry_SYSCALL_64_after_hwframe+0x76/0x7e
-
- Allocated by task 830:
- kasan_save_stack+0x20/0x40
- kasan_save_track+0x14/0x30
- __kasan_kmalloc+0x8f/0xa0
- __kmalloc_node_track_caller_noprof+0x1bc/0x400
- kmemdup_noprof+0x22/0x50
- svc_export_parse+0x8a9/0xb80 [nfsd]
- cache_do_downcall+0x71/0xa0 [sunrpc]
- cache_write_procfs+0x8e/0xd0 [sunrpc]
- proc_reg_write+0xe1/0x140
- vfs_write+0x1a5/0x6d0
- ksys_write+0xc1/0x160
- do_syscall_64+0x5f/0x170
- entry_SYSCALL_64_after_hwframe+0x76/0x7e
-
- Freed by task 868:
- kasan_save_stack+0x20/0x40
- kasan_save_track+0x14/0x30
- kasan_save_free_info+0x3b/0x60
- __kasan_slab_free+0x37/0x50
- kfree+0xf3/0x3e0
- svc_export_put+0x87/0xb0 [nfsd]
- cache_purge+0x17f/0x1f0 [sunrpc]
- nfsd_destroy_serv+0x226/0x2d0 [nfsd]
- nfsd_svc+0x125/0x1e0 [nfsd]
- write_threads+0x16a/0x2a0 [nfsd]
- nfsctl_transaction_write+0x74/0xa0 [nfsd]
- vfs_write+0x1a5/0x6d0
- ksys_write+0xc1/0x160
- do_syscall_64+0x5f/0x170
- entry_SYSCALL_64_after_hwframe+0x76/0x7e
-
-2. We cannot sleep while using `rcu_read_lock`/`rcu_read_unlock`.
- However, `svc_export_put`/`expkey_put` will call path_put, which
- subsequently triggers a sleeping operation due to the following
- `dput`.
-
- =============================
- WARNING: suspicious RCU usage
- 5.10.0-dirty #141 Not tainted
- -----------------------------
- ...
- Call Trace:
- dump_stack+0x9a/0xd0
- ___might_sleep+0x231/0x240
- dput+0x39/0x600
- path_put+0x1b/0x30
- svc_export_put+0x17/0x80
- e_show+0x1c9/0x200
- seq_read_iter+0x63f/0x7c0
- seq_read+0x226/0x2d0
- vfs_read+0x113/0x2c0
- ksys_read+0xc9/0x170
- do_syscall_64+0x33/0x40
- entry_SYSCALL_64_after_hwframe+0x67/0xd1
-
-Fix these issues by using `rcu_work` to help release
-`svc_expkey`/`svc_export`. This approach allows for an asynchronous
-context to invoke `path_put` and also facilitates the freeing of
-`uuid/exp/key` after an RCU grace period.
-
-Fixes: 9ceddd9da134 ("knfsd: Allow lockless lookups of the exports")
-Signed-off-by: Yang Erkun <yangerkun@huawei.com>
-Reviewed-by: Jeff Layton <jlayton@kernel.org>
-Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
-Signed-off-by: Bin Lan <lanbincn@qq.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- fs/nfsd/export.c | 31 +++++++++++++++++++++++++------
- fs/nfsd/export.h | 4 ++--
- 2 files changed, 27 insertions(+), 8 deletions(-)
-
---- a/fs/nfsd/export.c
-+++ b/fs/nfsd/export.c
-@@ -40,15 +40,24 @@
- #define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS)
- #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1)
-
--static void expkey_put(struct kref *ref)
-+static void expkey_put_work(struct work_struct *work)
- {
-- struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
-+ struct svc_expkey *key =
-+ container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work);
-
- if (test_bit(CACHE_VALID, &key->h.flags) &&
- !test_bit(CACHE_NEGATIVE, &key->h.flags))
- path_put(&key->ek_path);
- auth_domain_put(key->ek_client);
-- kfree_rcu(key, ek_rcu);
-+ kfree(key);
-+}
-+
-+static void expkey_put(struct kref *ref)
-+{
-+ struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
-+
-+ INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work);
-+ queue_rcu_work(system_wq, &key->ek_rcu_work);
- }
-
- static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
-@@ -351,16 +360,26 @@ static void export_stats_destroy(struct
- EXP_STATS_COUNTERS_NUM);
- }
-
--static void svc_export_put(struct kref *ref)
-+static void svc_export_put_work(struct work_struct *work)
- {
-- struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
-+ struct svc_export *exp =
-+ container_of(to_rcu_work(work), struct svc_export, ex_rcu_work);
-+
- path_put(&exp->ex_path);
- auth_domain_put(exp->ex_client);
- nfsd4_fslocs_free(&exp->ex_fslocs);
- export_stats_destroy(exp->ex_stats);
- kfree(exp->ex_stats);
- kfree(exp->ex_uuid);
-- kfree_rcu(exp, ex_rcu);
-+ kfree(exp);
-+}
-+
-+static void svc_export_put(struct kref *ref)
-+{
-+ struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
-+
-+ INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work);
-+ queue_rcu_work(system_wq, &exp->ex_rcu_work);
- }
-
- static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
---- a/fs/nfsd/export.h
-+++ b/fs/nfsd/export.h
-@@ -75,7 +75,7 @@ struct svc_export {
- u32 ex_layout_types;
- struct nfsd4_deviceid_map *ex_devid_map;
- struct cache_detail *cd;
-- struct rcu_head ex_rcu;
-+ struct rcu_work ex_rcu_work;
- struct export_stats *ex_stats;
- };
-
-@@ -91,7 +91,7 @@ struct svc_expkey {
- u32 ek_fsid[6];
-
- struct path ek_path;
-- struct rcu_head ek_rcu;
-+ struct rcu_work ek_rcu_work;
- };
-
- #define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC))