]>
Commit | Line | Data |
---|---|---|
8d0c12a8 SR |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include "io_uring.h" | |
4 | #include "napi.h" | |
5 | ||
6 | #ifdef CONFIG_NET_RX_BUSY_POLL | |
7 | ||
8 | /* Timeout for cleanout of stale entries. */ | |
9 | #define NAPI_TIMEOUT (60 * SEC_CONVERSION) | |
10 | ||
11 | struct io_napi_entry { | |
12 | unsigned int napi_id; | |
13 | struct list_head list; | |
14 | ||
15 | unsigned long timeout; | |
16 | struct hlist_node node; | |
17 | ||
18 | struct rcu_head rcu; | |
19 | }; | |
20 | ||
21 | static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list, | |
22 | unsigned int napi_id) | |
23 | { | |
24 | struct io_napi_entry *e; | |
25 | ||
26 | hlist_for_each_entry_rcu(e, hash_list, node) { | |
27 | if (e->napi_id != napi_id) | |
28 | continue; | |
29 | e->timeout = jiffies + NAPI_TIMEOUT; | |
30 | return e; | |
31 | } | |
32 | ||
33 | return NULL; | |
34 | } | |
35 | ||
36 | void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) | |
37 | { | |
38 | struct hlist_head *hash_list; | |
39 | unsigned int napi_id; | |
40 | struct sock *sk; | |
41 | struct io_napi_entry *e; | |
42 | ||
43 | sk = sock->sk; | |
44 | if (!sk) | |
45 | return; | |
46 | ||
47 | napi_id = READ_ONCE(sk->sk_napi_id); | |
48 | ||
49 | /* Non-NAPI IDs can be rejected. */ | |
50 | if (napi_id < MIN_NAPI_ID) | |
51 | return; | |
52 | ||
53 | hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))]; | |
54 | ||
55 | rcu_read_lock(); | |
56 | e = io_napi_hash_find(hash_list, napi_id); | |
57 | if (e) { | |
58 | e->timeout = jiffies + NAPI_TIMEOUT; | |
59 | rcu_read_unlock(); | |
60 | return; | |
61 | } | |
62 | rcu_read_unlock(); | |
63 | ||
64 | e = kmalloc(sizeof(*e), GFP_NOWAIT); | |
65 | if (!e) | |
66 | return; | |
67 | ||
68 | e->napi_id = napi_id; | |
69 | e->timeout = jiffies + NAPI_TIMEOUT; | |
70 | ||
71 | spin_lock(&ctx->napi_lock); | |
72 | if (unlikely(io_napi_hash_find(hash_list, napi_id))) { | |
73 | spin_unlock(&ctx->napi_lock); | |
74 | kfree(e); | |
75 | return; | |
76 | } | |
77 | ||
78 | hlist_add_tail_rcu(&e->node, hash_list); | |
79 | list_add_tail(&e->list, &ctx->napi_list); | |
80 | spin_unlock(&ctx->napi_lock); | |
81 | } | |
82 | ||
83 | static void __io_napi_remove_stale(struct io_ring_ctx *ctx) | |
84 | { | |
85 | struct io_napi_entry *e; | |
86 | unsigned int i; | |
87 | ||
88 | spin_lock(&ctx->napi_lock); | |
89 | hash_for_each(ctx->napi_ht, i, e, node) { | |
90 | if (time_after(jiffies, e->timeout)) { | |
91 | list_del(&e->list); | |
92 | hash_del_rcu(&e->node); | |
93 | kfree_rcu(e, rcu); | |
94 | } | |
95 | } | |
96 | spin_unlock(&ctx->napi_lock); | |
97 | } | |
98 | ||
99 | static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale) | |
100 | { | |
101 | if (is_stale) | |
102 | __io_napi_remove_stale(ctx); | |
103 | } | |
104 | ||
105 | static inline bool io_napi_busy_loop_timeout(unsigned long start_time, | |
106 | unsigned long bp_usec) | |
107 | { | |
108 | if (bp_usec) { | |
109 | unsigned long end_time = start_time + bp_usec; | |
110 | unsigned long now = busy_loop_current_time(); | |
111 | ||
112 | return time_after(now, end_time); | |
113 | } | |
114 | ||
115 | return true; | |
116 | } | |
117 | ||
118 | static bool io_napi_busy_loop_should_end(void *data, | |
119 | unsigned long start_time) | |
120 | { | |
121 | struct io_wait_queue *iowq = data; | |
122 | ||
123 | if (signal_pending(current)) | |
124 | return true; | |
428f1382 | 125 | if (io_should_wake(iowq) || io_has_work(iowq->ctx)) |
8d0c12a8 SR |
126 | return true; |
127 | if (io_napi_busy_loop_timeout(start_time, iowq->napi_busy_poll_to)) | |
128 | return true; | |
129 | ||
130 | return false; | |
131 | } | |
132 | ||
133 | static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx, | |
134 | void *loop_end_arg) | |
135 | { | |
136 | struct io_napi_entry *e; | |
137 | bool (*loop_end)(void *, unsigned long) = NULL; | |
138 | bool is_stale = false; | |
139 | ||
140 | if (loop_end_arg) | |
141 | loop_end = io_napi_busy_loop_should_end; | |
142 | ||
143 | list_for_each_entry_rcu(e, &ctx->napi_list, list) { | |
144 | napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg, | |
145 | ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET); | |
146 | ||
147 | if (time_after(jiffies, e->timeout)) | |
148 | is_stale = true; | |
149 | } | |
150 | ||
151 | return is_stale; | |
152 | } | |
153 | ||
154 | static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, | |
155 | struct io_wait_queue *iowq) | |
156 | { | |
157 | unsigned long start_time = busy_loop_current_time(); | |
158 | void *loop_end_arg = NULL; | |
159 | bool is_stale = false; | |
160 | ||
161 | /* Singular lists use a different napi loop end check function and are | |
162 | * only executed once. | |
163 | */ | |
164 | if (list_is_singular(&ctx->napi_list)) | |
165 | loop_end_arg = iowq; | |
166 | ||
167 | rcu_read_lock(); | |
168 | do { | |
169 | is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg); | |
170 | } while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg); | |
171 | rcu_read_unlock(); | |
172 | ||
173 | io_napi_remove_stale(ctx, is_stale); | |
174 | } | |
175 | ||
176 | /* | |
177 | * io_napi_init() - Init napi settings | |
178 | * @ctx: pointer to io-uring context structure | |
179 | * | |
180 | * Init napi settings in the io-uring context. | |
181 | */ | |
182 | void io_napi_init(struct io_ring_ctx *ctx) | |
183 | { | |
184 | INIT_LIST_HEAD(&ctx->napi_list); | |
185 | spin_lock_init(&ctx->napi_lock); | |
186 | ctx->napi_prefer_busy_poll = false; | |
187 | ctx->napi_busy_poll_to = READ_ONCE(sysctl_net_busy_poll); | |
188 | } | |
189 | ||
190 | /* | |
191 | * io_napi_free() - Deallocate napi | |
192 | * @ctx: pointer to io-uring context structure | |
193 | * | |
194 | * Free the napi list and the hash table in the io-uring context. | |
195 | */ | |
196 | void io_napi_free(struct io_ring_ctx *ctx) | |
197 | { | |
198 | struct io_napi_entry *e; | |
199 | LIST_HEAD(napi_list); | |
200 | unsigned int i; | |
201 | ||
202 | spin_lock(&ctx->napi_lock); | |
203 | hash_for_each(ctx->napi_ht, i, e, node) { | |
204 | hash_del_rcu(&e->node); | |
205 | kfree_rcu(e, rcu); | |
206 | } | |
207 | spin_unlock(&ctx->napi_lock); | |
208 | } | |
209 | ||
ef1186c1 SR |
210 | /* |
211 | * io_napi_register() - Register napi with io-uring | |
212 | * @ctx: pointer to io-uring context structure | |
213 | * @arg: pointer to io_uring_napi structure | |
214 | * | |
215 | * Register napi in the io-uring context. | |
216 | */ | |
217 | int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) | |
218 | { | |
219 | const struct io_uring_napi curr = { | |
220 | .busy_poll_to = ctx->napi_busy_poll_to, | |
221 | .prefer_busy_poll = ctx->napi_prefer_busy_poll | |
222 | }; | |
223 | struct io_uring_napi napi; | |
224 | ||
225 | if (copy_from_user(&napi, arg, sizeof(napi))) | |
226 | return -EFAULT; | |
227 | if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv) | |
228 | return -EINVAL; | |
229 | ||
ef1186c1 SR |
230 | if (copy_to_user(arg, &curr, sizeof(curr))) |
231 | return -EFAULT; | |
232 | ||
b4ccc4dd JA |
233 | WRITE_ONCE(ctx->napi_busy_poll_to, napi.busy_poll_to); |
234 | WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll); | |
235 | WRITE_ONCE(ctx->napi_enabled, true); | |
ef1186c1 SR |
236 | return 0; |
237 | } | |
238 | ||
239 | /* | |
240 | * io_napi_unregister() - Unregister napi with io-uring | |
241 | * @ctx: pointer to io-uring context structure | |
242 | * @arg: pointer to io_uring_napi structure | |
243 | * | |
244 | * Unregister napi. If arg has been specified copy the busy poll timeout and | |
245 | * prefer busy poll setting to the passed in structure. | |
246 | */ | |
247 | int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) | |
248 | { | |
249 | const struct io_uring_napi curr = { | |
250 | .busy_poll_to = ctx->napi_busy_poll_to, | |
251 | .prefer_busy_poll = ctx->napi_prefer_busy_poll | |
252 | }; | |
253 | ||
254 | if (arg && copy_to_user(arg, &curr, sizeof(curr))) | |
255 | return -EFAULT; | |
256 | ||
257 | WRITE_ONCE(ctx->napi_busy_poll_to, 0); | |
258 | WRITE_ONCE(ctx->napi_prefer_busy_poll, false); | |
b4ccc4dd | 259 | WRITE_ONCE(ctx->napi_enabled, false); |
ef1186c1 SR |
260 | return 0; |
261 | } | |
262 | ||
8d0c12a8 SR |
263 | /* |
264 | * __io_napi_adjust_timeout() - Add napi id to the busy poll list | |
265 | * @ctx: pointer to io-uring context structure | |
266 | * @iowq: pointer to io wait queue | |
267 | * @ts: pointer to timespec or NULL | |
268 | * | |
269 | * Adjust the busy loop timeout according to timespec and busy poll timeout. | |
270 | */ | |
271 | void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, | |
272 | struct timespec64 *ts) | |
273 | { | |
274 | unsigned int poll_to = READ_ONCE(ctx->napi_busy_poll_to); | |
275 | ||
276 | if (ts) { | |
277 | struct timespec64 poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to); | |
278 | ||
279 | if (timespec64_compare(ts, &poll_to_ts) > 0) { | |
280 | *ts = timespec64_sub(*ts, poll_to_ts); | |
281 | } else { | |
282 | u64 to = timespec64_to_ns(ts); | |
283 | ||
284 | do_div(to, 1000); | |
285 | ts->tv_sec = 0; | |
286 | ts->tv_nsec = 0; | |
287 | } | |
288 | } | |
289 | ||
290 | iowq->napi_busy_poll_to = poll_to; | |
291 | } | |
292 | ||
293 | /* | |
294 | * __io_napi_busy_loop() - execute busy poll loop | |
295 | * @ctx: pointer to io-uring context structure | |
296 | * @iowq: pointer to io wait queue | |
297 | * | |
298 | * Execute the busy poll loop and merge the spliced off list. | |
299 | */ | |
300 | void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) | |
301 | { | |
302 | iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll); | |
303 | ||
b4ccc4dd | 304 | if (!(ctx->flags & IORING_SETUP_SQPOLL) && ctx->napi_enabled) |
8d0c12a8 SR |
305 | io_napi_blocking_busy_loop(ctx, iowq); |
306 | } | |
307 | ||
ff183d42 SR |
308 | /* |
309 | * io_napi_sqpoll_busy_poll() - busy poll loop for sqpoll | |
310 | * @ctx: pointer to io-uring context structure | |
311 | * | |
312 | * Splice of the napi list and execute the napi busy poll loop. | |
313 | */ | |
314 | int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx) | |
315 | { | |
316 | LIST_HEAD(napi_list); | |
317 | bool is_stale = false; | |
318 | ||
319 | if (!READ_ONCE(ctx->napi_busy_poll_to)) | |
320 | return 0; | |
321 | if (list_empty_careful(&ctx->napi_list)) | |
322 | return 0; | |
323 | ||
324 | rcu_read_lock(); | |
325 | is_stale = __io_napi_do_busy_loop(ctx, NULL); | |
326 | rcu_read_unlock(); | |
327 | ||
328 | io_napi_remove_stale(ctx, is_stale); | |
329 | return 1; | |
330 | } | |
331 | ||
8d0c12a8 | 332 | #endif |