]>
Commit | Line | Data |
---|---|---|
da9b67a0 GKH |
1 | From 561e4f9451d65fc2f7eef564e0064373e3019793 Mon Sep 17 00:00:00 2001 |
2 | From: Jens Axboe <axboe@kernel.dk> | |
3 | Date: Tue, 2 Apr 2024 16:16:03 -0600 | |
4 | Subject: io_uring/kbuf: hold io_buffer_list reference over mmap | |
5 | ||
6 | From: Jens Axboe <axboe@kernel.dk> | |
7 | ||
8 | commit 561e4f9451d65fc2f7eef564e0064373e3019793 upstream. | |
9 | ||
10 | If we look up the kbuf, ensure that it doesn't get unregistered until | |
11 | after we're done with it. Since we're inside mmap, we cannot safely use | |
12 | the io_uring lock. Rely on the fact that we can lookup the buffer list | |
13 | under RCU now and grab a reference to it, preventing it from being | |
14 | unregistered until we're done with it. The lookup returns the | |
15 | io_buffer_list directly with it referenced. | |
16 | ||
17 | Cc: stable@vger.kernel.org # v6.4+ | |
18 | Fixes: 5cf4f52e6d8a ("io_uring: free io_buffer_list entries via RCU") | |
19 | Signed-off-by: Jens Axboe <axboe@kernel.dk> | |
20 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
21 | --- | |
22 | io_uring/io_uring.c | 11 ++++++----- | |
23 | io_uring/kbuf.c | 31 +++++++++++++++++++++++++------ | |
24 | io_uring/kbuf.h | 4 +++- | |
25 | 3 files changed, 34 insertions(+), 12 deletions(-) | |
26 | ||
27 | --- a/io_uring/io_uring.c | |
28 | +++ b/io_uring/io_uring.c | |
29 | @@ -3429,14 +3429,15 @@ static void *io_uring_validate_mmap_requ | |
30 | ptr = ctx->sq_sqes; | |
31 | break; | |
32 | case IORING_OFF_PBUF_RING: { | |
33 | + struct io_buffer_list *bl; | |
34 | unsigned int bgid; | |
35 | ||
36 | bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT; | |
37 | - rcu_read_lock(); | |
38 | - ptr = io_pbuf_get_address(ctx, bgid); | |
39 | - rcu_read_unlock(); | |
40 | - if (!ptr) | |
41 | - return ERR_PTR(-EINVAL); | |
42 | + bl = io_pbuf_get_bl(ctx, bgid); | |
43 | + if (IS_ERR(bl)) | |
44 | + return bl; | |
45 | + ptr = bl->buf_ring; | |
46 | + io_put_bl(ctx, bl); | |
47 | break; | |
48 | } | |
49 | default: | |
50 | --- a/io_uring/kbuf.c | |
51 | +++ b/io_uring/kbuf.c | |
52 | @@ -273,7 +273,7 @@ static int __io_remove_buffers(struct io | |
53 | return i; | |
54 | } | |
55 | ||
56 | -static void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl) | |
57 | +void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl) | |
58 | { | |
59 | if (atomic_dec_and_test(&bl->refs)) { | |
60 | __io_remove_buffers(ctx, bl, -1U); | |
61 | @@ -689,16 +689,35 @@ int io_unregister_pbuf_ring(struct io_ri | |
62 | return 0; | |
63 | } | |
64 | ||
65 | -void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid) | |
66 | +struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx, | |
67 | + unsigned long bgid) | |
68 | { | |
69 | struct io_buffer_list *bl; | |
70 | + bool ret; | |
71 | ||
72 | - bl = __io_buffer_get_list(ctx, bgid); | |
73 | + /* | |
74 | + * We have to be a bit careful here - we're inside mmap and cannot grab | |
75 | + * the uring_lock. This means the buffer_list could be simultaneously | |
76 | + * going away, if someone is trying to be sneaky. Look it up under rcu | |
77 | + * so we know it's not going away, and attempt to grab a reference to | |
78 | + * it. If the ref is already zero, then fail the mapping. If successful, | |
79 | + * the caller will call io_put_bl() to drop the the reference at at the | |
80 | + * end. This may then safely free the buffer_list (and drop the pages) | |
81 | + * at that point, vm_insert_pages() would've already grabbed the | |
82 | + * necessary vma references. | |
83 | + */ | |
84 | + rcu_read_lock(); | |
85 | + bl = xa_load(&ctx->io_bl_xa, bgid); | |
86 | + /* must be a mmap'able buffer ring and have pages */ | |
87 | + ret = false; | |
88 | + if (bl && bl->is_mmap) | |
89 | + ret = atomic_inc_not_zero(&bl->refs); | |
90 | + rcu_read_unlock(); | |
91 | ||
92 | - if (!bl || !bl->is_mmap) | |
93 | - return NULL; | |
94 | + if (ret) | |
95 | + return bl; | |
96 | ||
97 | - return bl->buf_ring; | |
98 | + return ERR_PTR(-EINVAL); | |
99 | } | |
100 | ||
101 | /* | |
102 | --- a/io_uring/kbuf.h | |
103 | +++ b/io_uring/kbuf.h | |
104 | @@ -60,7 +60,9 @@ unsigned int __io_put_kbuf(struct io_kio | |
105 | ||
106 | void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); | |
107 | ||
108 | -void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid); | |
109 | +void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl); | |
110 | +struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx, | |
111 | + unsigned long bgid); | |
112 | ||
113 | static inline void io_kbuf_recycle_ring(struct io_kiocb *req) | |
114 | { |