]>
Commit | Line | Data |
---|---|---|
571e214c GKH |
1 | From 84c4e1f89fefe70554da0ab33be72c9be7994379 Mon Sep 17 00:00:00 2001 |
2 | From: Linus Torvalds <torvalds@linux-foundation.org> | |
3 | Date: Sun, 3 Mar 2019 14:23:33 -0800 | |
4 | Subject: aio: simplify - and fix - fget/fput for io_submit() | |
5 | ||
6 | From: Linus Torvalds <torvalds@linux-foundation.org> | |
7 | ||
8 | commit 84c4e1f89fefe70554da0ab33be72c9be7994379 upstream. | |
9 | ||
10 | Al Viro root-caused a race where the IOCB_CMD_POLL handling of | |
11 | fget/fput() could cause us to access the file pointer after it had | |
12 | already been freed: | |
13 | ||
14 | "In more details - normally IOCB_CMD_POLL handling looks so: | |
15 | ||
16 | 1) io_submit(2) allocates aio_kiocb instance and passes it to | |
17 | aio_poll() | |
18 | ||
19 | 2) aio_poll() resolves the descriptor to struct file by req->file = | |
20 | fget(iocb->aio_fildes) | |
21 | ||
22 | 3) aio_poll() sets ->woken to false and raises ->ki_refcnt of that | |
23 | aio_kiocb to 2 (bumps by 1, that is). | |
24 | ||
25 | 4) aio_poll() calls vfs_poll(). After sanity checks (basically, | |
26 | "poll_wait() had been called and only once") it locks the queue. | |
27 | That's what the extra reference to iocb had been for - we know we | |
28 | can safely access it. | |
29 | ||
30 | 5) With queue locked, we check if ->woken has already been set to | |
31 | true (by aio_poll_wake()) and, if it had been, we unlock the | |
32 | queue, drop a reference to aio_kiocb and bugger off - at that | |
33 | point it's a responsibility to aio_poll_wake() and the stuff | |
34 | called/scheduled by it. That code will drop the reference to file | |
35 | in req->file, along with the other reference to our aio_kiocb. | |
36 | ||
37 | 6) otherwise, we see whether we need to wait. If we do, we unlock the | |
38 | queue, drop one reference to aio_kiocb and go away - eventual | |
39 | wakeup (or cancel) will deal with the reference to file and with | |
40 | the other reference to aio_kiocb | |
41 | ||
42 | 7) otherwise we remove ourselves from waitqueue (still under the | |
43 | queue lock), so that wakeup won't get us. No async activity will | |
44 | be happening, so we can safely drop req->file and iocb ourselves. | |
45 | ||
46 | If wakeup happens while we are in vfs_poll(), we are fine - aio_kiocb | |
47 | won't get freed under us, so we can do all the checks and locking | |
48 | safely. And we don't touch ->file if we detect that case. | |
49 | ||
50 | However, vfs_poll() most certainly *does* touch the file it had been | |
51 | given. So wakeup coming while we are still in ->poll() might end up | |
52 | doing fput() on that file. That case is not too rare, and usually we | |
53 | are saved by the still present reference from descriptor table - that | |
54 | fput() is not the final one. | |
55 | ||
56 | But if another thread closes that descriptor right after our fget() | |
57 | and wakeup does happen before ->poll() returns, we are in trouble - | |
58 | final fput() done while we are in the middle of a method: | |
59 | ||
60 | Al also wrote a patch to take an extra reference to the file descriptor | |
61 | to fix this, but I instead suggested we just streamline the whole file | |
62 | pointer handling by submit_io() so that the generic aio submission code | |
63 | simply keeps the file pointer around until the aio has completed. | |
64 | ||
65 | Fixes: bfe4037e722e ("aio: implement IOCB_CMD_POLL") | |
66 | Acked-by: Al Viro <viro@zeniv.linux.org.uk> | |
67 | Reported-by: syzbot+503d4cc169fcec1cb18c@syzkaller.appspotmail.com | |
68 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | |
69 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
70 | ||
71 | --- | |
72 | fs/aio.c | 67 +++++++++++++++++++++-------------------------------- | |
73 | include/linux/fs.h | 8 +++++- | |
74 | 2 files changed, 34 insertions(+), 41 deletions(-) | |
75 | ||
76 | --- a/fs/aio.c | |
77 | +++ b/fs/aio.c | |
78 | @@ -161,9 +161,13 @@ struct kioctx { | |
79 | unsigned id; | |
80 | }; | |
81 | ||
82 | +/* | |
83 | + * First field must be the file pointer in all the | |
84 | + * iocb unions! See also 'struct kiocb' in <linux/fs.h> | |
85 | + */ | |
86 | struct fsync_iocb { | |
87 | - struct work_struct work; | |
88 | struct file *file; | |
89 | + struct work_struct work; | |
90 | bool datasync; | |
91 | }; | |
92 | ||
93 | @@ -177,8 +181,15 @@ struct poll_iocb { | |
94 | struct work_struct work; | |
95 | }; | |
96 | ||
97 | +/* | |
98 | + * NOTE! Each of the iocb union members has the file pointer | |
99 | + * as the first entry in their struct definition. So you can | |
100 | + * access the file pointer through any of the sub-structs, | |
101 | + * or directly as just 'ki_filp' in this struct. | |
102 | + */ | |
103 | struct aio_kiocb { | |
104 | union { | |
105 | + struct file *ki_filp; | |
106 | struct kiocb rw; | |
107 | struct fsync_iocb fsync; | |
108 | struct poll_iocb poll; | |
109 | @@ -1054,6 +1065,8 @@ static inline void iocb_put(struct aio_k | |
110 | { | |
111 | if (refcount_read(&iocb->ki_refcnt) == 0 || | |
112 | refcount_dec_and_test(&iocb->ki_refcnt)) { | |
113 | + if (iocb->ki_filp) | |
114 | + fput(iocb->ki_filp); | |
115 | percpu_ref_put(&iocb->ki_ctx->reqs); | |
116 | kmem_cache_free(kiocb_cachep, iocb); | |
117 | } | |
118 | @@ -1412,7 +1425,6 @@ static void aio_complete_rw(struct kiocb | |
119 | file_end_write(kiocb->ki_filp); | |
120 | } | |
121 | ||
122 | - fput(kiocb->ki_filp); | |
123 | aio_complete(iocb, res, res2); | |
124 | } | |
125 | ||
126 | @@ -1420,9 +1432,6 @@ static int aio_prep_rw(struct kiocb *req | |
127 | { | |
128 | int ret; | |
129 | ||
130 | - req->ki_filp = fget(iocb->aio_fildes); | |
131 | - if (unlikely(!req->ki_filp)) | |
132 | - return -EBADF; | |
133 | req->ki_complete = aio_complete_rw; | |
134 | req->ki_pos = iocb->aio_offset; | |
135 | req->ki_flags = iocb_flags(req->ki_filp); | |
136 | @@ -1438,7 +1447,6 @@ static int aio_prep_rw(struct kiocb *req | |
137 | ret = ioprio_check_cap(iocb->aio_reqprio); | |
138 | if (ret) { | |
139 | pr_debug("aio ioprio check cap error: %d\n", ret); | |
140 | - fput(req->ki_filp); | |
141 | return ret; | |
142 | } | |
143 | ||
144 | @@ -1447,8 +1455,6 @@ static int aio_prep_rw(struct kiocb *req | |
145 | req->ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); | |
146 | ||
147 | ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags); | |
148 | - if (unlikely(ret)) | |
149 | - fput(req->ki_filp); | |
150 | return ret; | |
151 | } | |
152 | ||
153 | @@ -1503,24 +1509,19 @@ static ssize_t aio_read(struct kiocb *re | |
154 | if (ret) | |
155 | return ret; | |
156 | file = req->ki_filp; | |
157 | - | |
158 | - ret = -EBADF; | |
159 | if (unlikely(!(file->f_mode & FMODE_READ))) | |
160 | - goto out_fput; | |
161 | + return -EBADF; | |
162 | ret = -EINVAL; | |
163 | if (unlikely(!file->f_op->read_iter)) | |
164 | - goto out_fput; | |
165 | + return -EINVAL; | |
166 | ||
167 | ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter); | |
168 | if (ret) | |
169 | - goto out_fput; | |
170 | + return ret; | |
171 | ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter)); | |
172 | if (!ret) | |
173 | aio_rw_done(req, call_read_iter(file, req, &iter)); | |
174 | kfree(iovec); | |
175 | -out_fput: | |
176 | - if (unlikely(ret)) | |
177 | - fput(file); | |
178 | return ret; | |
179 | } | |
180 | ||
181 | @@ -1537,16 +1538,14 @@ static ssize_t aio_write(struct kiocb *r | |
182 | return ret; | |
183 | file = req->ki_filp; | |
184 | ||
185 | - ret = -EBADF; | |
186 | if (unlikely(!(file->f_mode & FMODE_WRITE))) | |
187 | - goto out_fput; | |
188 | - ret = -EINVAL; | |
189 | + return -EBADF; | |
190 | if (unlikely(!file->f_op->write_iter)) | |
191 | - goto out_fput; | |
192 | + return -EINVAL; | |
193 | ||
194 | ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter); | |
195 | if (ret) | |
196 | - goto out_fput; | |
197 | + return ret; | |
198 | ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter)); | |
199 | if (!ret) { | |
200 | /* | |
201 | @@ -1564,9 +1563,6 @@ static ssize_t aio_write(struct kiocb *r | |
202 | aio_rw_done(req, call_write_iter(file, req, &iter)); | |
203 | } | |
204 | kfree(iovec); | |
205 | -out_fput: | |
206 | - if (unlikely(ret)) | |
207 | - fput(file); | |
208 | return ret; | |
209 | } | |
210 | ||
211 | @@ -1576,7 +1572,6 @@ static void aio_fsync_work(struct work_s | |
212 | int ret; | |
213 | ||
214 | ret = vfs_fsync(req->file, req->datasync); | |
215 | - fput(req->file); | |
216 | aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0); | |
217 | } | |
218 | ||
219 | @@ -1586,13 +1581,8 @@ static int aio_fsync(struct fsync_iocb * | |
220 | iocb->aio_rw_flags)) | |
221 | return -EINVAL; | |
222 | ||
223 | - req->file = fget(iocb->aio_fildes); | |
224 | - if (unlikely(!req->file)) | |
225 | - return -EBADF; | |
226 | - if (unlikely(!req->file->f_op->fsync)) { | |
227 | - fput(req->file); | |
228 | + if (unlikely(!req->file->f_op->fsync)) | |
229 | return -EINVAL; | |
230 | - } | |
231 | ||
232 | req->datasync = datasync; | |
233 | INIT_WORK(&req->work, aio_fsync_work); | |
234 | @@ -1602,10 +1592,7 @@ static int aio_fsync(struct fsync_iocb * | |
235 | ||
236 | static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask) | |
237 | { | |
238 | - struct file *file = iocb->poll.file; | |
239 | - | |
240 | aio_complete(iocb, mangle_poll(mask), 0); | |
241 | - fput(file); | |
242 | } | |
243 | ||
244 | static void aio_poll_complete_work(struct work_struct *work) | |
245 | @@ -1730,9 +1717,6 @@ static ssize_t aio_poll(struct aio_kiocb | |
246 | ||
247 | INIT_WORK(&req->work, aio_poll_complete_work); | |
248 | req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP; | |
249 | - req->file = fget(iocb->aio_fildes); | |
250 | - if (unlikely(!req->file)) | |
251 | - return -EBADF; | |
252 | ||
253 | apt.pt._qproc = aio_poll_queue_proc; | |
254 | apt.pt._key = req->events; | |
255 | @@ -1771,10 +1755,8 @@ static ssize_t aio_poll(struct aio_kiocb | |
256 | spin_unlock_irq(&ctx->ctx_lock); | |
257 | ||
258 | out: | |
259 | - if (unlikely(apt.error)) { | |
260 | - fput(req->file); | |
261 | + if (unlikely(apt.error)) | |
262 | return apt.error; | |
263 | - } | |
264 | ||
265 | if (mask) | |
266 | aio_poll_complete(aiocb, mask); | |
267 | @@ -1812,6 +1794,11 @@ static int io_submit_one(struct kioctx * | |
268 | if (unlikely(!req)) | |
269 | return -EAGAIN; | |
270 | ||
271 | + req->ki_filp = fget(iocb->aio_fildes); | |
272 | + ret = -EBADF; | |
273 | + if (unlikely(!req->ki_filp)) | |
274 | + goto out_put_req; | |
275 | + | |
276 | if (iocb.aio_flags & IOCB_FLAG_RESFD) { | |
277 | /* | |
278 | * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an | |
279 | --- a/include/linux/fs.h | |
280 | +++ b/include/linux/fs.h | |
281 | @@ -304,13 +304,19 @@ enum rw_hint { | |
282 | ||
283 | struct kiocb { | |
284 | struct file *ki_filp; | |
285 | + | |
286 | + /* The 'ki_filp' pointer is shared in a union for aio */ | |
287 | + randomized_struct_fields_start | |
288 | + | |
289 | loff_t ki_pos; | |
290 | void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); | |
291 | void *private; | |
292 | int ki_flags; | |
293 | u16 ki_hint; | |
294 | u16 ki_ioprio; /* See linux/ioprio.h */ | |
295 | -} __randomize_layout; | |
296 | + | |
297 | + randomized_struct_fields_end | |
298 | +}; | |
299 | ||
300 | static inline bool is_sync_kiocb(struct kiocb *kiocb) | |
301 | { |