]>
Commit | Line | Data |
---|---|---|
d2d2340a SL |
1 | From 7ab717eacadcc56e0de66520eca6205ed950a3db Mon Sep 17 00:00:00 2001 |
2 | From: Sasha Levin <sashal@kernel.org> | |
3 | Date: Fri, 1 Mar 2024 11:49:57 -0500 | |
4 | Subject: nfs: fix UAF in direct writes | |
5 | ||
6 | From: Josef Bacik <josef@toxicpanda.com> | |
7 | ||
8 | [ Upstream commit 17f46b803d4f23c66cacce81db35fef3adb8f2af ] | |
9 | ||
10 | In production we have been hitting the following warning consistently | |
11 | ||
12 | ------------[ cut here ]------------ | |
13 | refcount_t: underflow; use-after-free. | |
14 | WARNING: CPU: 17 PID: 1800359 at lib/refcount.c:28 refcount_warn_saturate+0x9c/0xe0 | |
15 | Workqueue: nfsiod nfs_direct_write_schedule_work [nfs] | |
16 | RIP: 0010:refcount_warn_saturate+0x9c/0xe0 | |
17 | PKRU: 55555554 | |
18 | Call Trace: | |
19 | <TASK> | |
20 | ? __warn+0x9f/0x130 | |
21 | ? refcount_warn_saturate+0x9c/0xe0 | |
22 | ? report_bug+0xcc/0x150 | |
23 | ? handle_bug+0x3d/0x70 | |
24 | ? exc_invalid_op+0x16/0x40 | |
25 | ? asm_exc_invalid_op+0x16/0x20 | |
26 | ? refcount_warn_saturate+0x9c/0xe0 | |
27 | nfs_direct_write_schedule_work+0x237/0x250 [nfs] | |
28 | process_one_work+0x12f/0x4a0 | |
29 | worker_thread+0x14e/0x3b0 | |
30 | ? ZSTD_getCParams_internal+0x220/0x220 | |
31 | kthread+0xdc/0x120 | |
32 | ? __btf_name_valid+0xa0/0xa0 | |
33 | ret_from_fork+0x1f/0x30 | |
34 | ||
35 | This is because we're completing the nfs_direct_request twice in a row. | |
36 | ||
37 | The source of this is when we have our commit requests to submit, we | |
38 | process them and send them off, and then in the completion path for the | |
39 | commit requests we have | |
40 | ||
41 | if (nfs_commit_end(cinfo.mds)) | |
42 | nfs_direct_write_complete(dreq); | |
43 | ||
44 | However since we're submitting asynchronous requests we sometimes have | |
45 | one that completes before we submit the next one, so we end up calling | |
46 | complete on the nfs_direct_request twice. | |
47 | ||
48 | The only other place we use nfs_generic_commit_list() is in | |
49 | __nfs_commit_inode, which wraps this call in a | |
50 | ||
51 | nfs_commit_begin(); | |
52 | nfs_commit_end(); | |
53 | ||
54 | Which is a common pattern for this style of completion handling, one | |
55 | that is also repeated in the direct code with get_dreq()/put_dreq() | |
56 | calls around where we process events as well as in the completion paths. | |
57 | ||
58 | Fix this by using the same pattern for the commit requests. | |
59 | ||
60 | Before with my 200 node rocksdb stress running this warning would pop | |
61 | every 10ish minutes. With my patch the stress test has been running for | |
62 | several hours without popping. | |
63 | ||
64 | Signed-off-by: Josef Bacik <josef@toxicpanda.com> | |
65 | Cc: stable@vger.kernel.org | |
66 | Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> | |
67 | Signed-off-by: Sasha Levin <sashal@kernel.org> | |
68 | --- | |
69 | fs/nfs/direct.c | 11 +++++++++-- | |
70 | fs/nfs/write.c | 2 +- | |
71 | include/linux/nfs_fs.h | 1 + | |
72 | 3 files changed, 11 insertions(+), 3 deletions(-) | |
73 | ||
74 | diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c | |
75 | index bbe2a5cc49f68..3185899676adf 100644 | |
76 | --- a/fs/nfs/direct.c | |
77 | +++ b/fs/nfs/direct.c | |
78 | @@ -678,10 +678,17 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) | |
79 | LIST_HEAD(mds_list); | |
80 | ||
81 | nfs_init_cinfo_from_dreq(&cinfo, dreq); | |
82 | + nfs_commit_begin(cinfo.mds); | |
83 | nfs_scan_commit(dreq->inode, &mds_list, &cinfo); | |
84 | res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo); | |
85 | - if (res < 0) /* res == -ENOMEM */ | |
86 | - nfs_direct_write_reschedule(dreq); | |
87 | + if (res < 0) { /* res == -ENOMEM */ | |
88 | + spin_lock(&dreq->lock); | |
89 | + if (dreq->flags == 0) | |
90 | + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; | |
91 | + spin_unlock(&dreq->lock); | |
92 | + } | |
93 | + if (nfs_commit_end(cinfo.mds)) | |
94 | + nfs_direct_write_complete(dreq); | |
95 | } | |
96 | ||
97 | static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq) | |
98 | diff --git a/fs/nfs/write.c b/fs/nfs/write.c | |
99 | index 4231d51fc1add..3d06bad2760da 100644 | |
100 | --- a/fs/nfs/write.c | |
101 | +++ b/fs/nfs/write.c | |
102 | @@ -1644,7 +1644,7 @@ static int wait_on_commit(struct nfs_mds_commit_info *cinfo) | |
103 | !atomic_read(&cinfo->rpcs_out)); | |
104 | } | |
105 | ||
106 | -static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) | |
107 | +void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) | |
108 | { | |
109 | atomic_inc(&cinfo->rpcs_out); | |
110 | } | |
111 | diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h | |
112 | index 5ddc30405f7f4..886bfa99a6af4 100644 | |
113 | --- a/include/linux/nfs_fs.h | |
114 | +++ b/include/linux/nfs_fs.h | |
115 | @@ -588,6 +588,7 @@ extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); | |
116 | extern int nfs_commit_inode(struct inode *, int); | |
117 | extern struct nfs_commit_data *nfs_commitdata_alloc(void); | |
118 | extern void nfs_commit_free(struct nfs_commit_data *data); | |
119 | +void nfs_commit_begin(struct nfs_mds_commit_info *cinfo); | |
120 | bool nfs_commit_end(struct nfs_mds_commit_info *cinfo); | |
121 | ||
122 | static inline int | |
123 | -- | |
124 | 2.43.0 | |
125 |