]>
Commit | Line | Data |
---|---|---|
1 | // SPDX-License-Identifier: GPL-2.0 | |
2 | /* Multipath TCP | |
3 | * | |
4 | * Copyright (c) 2021, Red Hat. | |
5 | */ | |
6 | ||
7 | #define pr_fmt(fmt) "MPTCP: " fmt | |
8 | ||
9 | #include <linux/kernel.h> | |
10 | #include <linux/module.h> | |
11 | #include <net/sock.h> | |
12 | #include <net/protocol.h> | |
13 | #include <net/tcp.h> | |
14 | #include <net/mptcp.h> | |
15 | #include "protocol.h" | |
16 | ||
17 | #define MIN_INFO_OPTLEN_SIZE 16 | |
18 | #define MIN_FULL_INFO_OPTLEN_SIZE 40 | |
19 | ||
20 | static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) | |
21 | { | |
22 | msk_owned_by_me(msk); | |
23 | ||
24 | if (likely(!__mptcp_check_fallback(msk))) | |
25 | return NULL; | |
26 | ||
27 | return msk->first; | |
28 | } | |
29 | ||
30 | static u32 sockopt_seq_reset(const struct sock *sk) | |
31 | { | |
32 | sock_owned_by_me(sk); | |
33 | ||
34 | /* Highbits contain state. Allows to distinguish sockopt_seq | |
35 | * of listener and established: | |
36 | * s0 = new_listener() | |
37 | * sockopt(s0) - seq is 1 | |
38 | * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) | |
39 | * sockopt(s0) - seq increments to 2 on s0 | |
40 | * sockopt(s1) // seq increments to 2 on s1 (different option) | |
41 | * new ssk completes join, inherits options from s0 // seq 2 | |
42 | * Needs sync from mptcp join logic, but ssk->seq == msk->seq | |
43 | * | |
44 | * Set High order bits to sk_state so ssk->seq == msk->seq test | |
45 | * will fail. | |
46 | */ | |
47 | ||
48 | return (u32)sk->sk_state << 24u; | |
49 | } | |
50 | ||
51 | static void sockopt_seq_inc(struct mptcp_sock *msk) | |
52 | { | |
53 | u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; | |
54 | ||
55 | msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq; | |
56 | } | |
57 | ||
58 | static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, | |
59 | unsigned int optlen, int *val) | |
60 | { | |
61 | if (optlen < sizeof(int)) | |
62 | return -EINVAL; | |
63 | ||
64 | if (copy_from_sockptr(val, optval, sizeof(*val))) | |
65 | return -EFAULT; | |
66 | ||
67 | return 0; | |
68 | } | |
69 | ||
70 | static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) | |
71 | { | |
72 | struct mptcp_subflow_context *subflow; | |
73 | struct sock *sk = (struct sock *)msk; | |
74 | ||
75 | lock_sock(sk); | |
76 | sockopt_seq_inc(msk); | |
77 | ||
78 | mptcp_for_each_subflow(msk, subflow) { | |
79 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
80 | bool slow = lock_sock_fast(ssk); | |
81 | ||
82 | switch (optname) { | |
83 | case SO_DEBUG: | |
84 | sock_valbool_flag(ssk, SOCK_DBG, !!val); | |
85 | break; | |
86 | case SO_KEEPALIVE: | |
87 | if (ssk->sk_prot->keepalive) | |
88 | ssk->sk_prot->keepalive(ssk, !!val); | |
89 | sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); | |
90 | break; | |
91 | case SO_PRIORITY: | |
92 | WRITE_ONCE(ssk->sk_priority, val); | |
93 | break; | |
94 | case SO_SNDBUF: | |
95 | case SO_SNDBUFFORCE: | |
96 | ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; | |
97 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); | |
98 | mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; | |
99 | break; | |
100 | case SO_RCVBUF: | |
101 | case SO_RCVBUFFORCE: | |
102 | ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; | |
103 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); | |
104 | break; | |
105 | case SO_MARK: | |
106 | if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { | |
107 | WRITE_ONCE(ssk->sk_mark, sk->sk_mark); | |
108 | sk_dst_reset(ssk); | |
109 | } | |
110 | break; | |
111 | case SO_INCOMING_CPU: | |
112 | WRITE_ONCE(ssk->sk_incoming_cpu, val); | |
113 | break; | |
114 | } | |
115 | ||
116 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
117 | unlock_sock_fast(ssk, slow); | |
118 | } | |
119 | ||
120 | release_sock(sk); | |
121 | } | |
122 | ||
123 | static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) | |
124 | { | |
125 | sockptr_t optval = KERNEL_SOCKPTR(&val); | |
126 | struct sock *sk = (struct sock *)msk; | |
127 | int ret; | |
128 | ||
129 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, | |
130 | optval, sizeof(val)); | |
131 | if (ret) | |
132 | return ret; | |
133 | ||
134 | mptcp_sol_socket_sync_intval(msk, optname, val); | |
135 | return 0; | |
136 | } | |
137 | ||
138 | static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) | |
139 | { | |
140 | struct sock *sk = (struct sock *)msk; | |
141 | ||
142 | WRITE_ONCE(sk->sk_incoming_cpu, val); | |
143 | ||
144 | mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); | |
145 | } | |
146 | ||
147 | static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) | |
148 | { | |
149 | sockptr_t optval = KERNEL_SOCKPTR(&val); | |
150 | struct mptcp_subflow_context *subflow; | |
151 | struct sock *sk = (struct sock *)msk; | |
152 | int ret; | |
153 | ||
154 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, | |
155 | optval, sizeof(val)); | |
156 | if (ret) | |
157 | return ret; | |
158 | ||
159 | lock_sock(sk); | |
160 | mptcp_for_each_subflow(msk, subflow) { | |
161 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
162 | bool slow = lock_sock_fast(ssk); | |
163 | ||
164 | sock_set_timestamp(sk, optname, !!val); | |
165 | unlock_sock_fast(ssk, slow); | |
166 | } | |
167 | ||
168 | release_sock(sk); | |
169 | return 0; | |
170 | } | |
171 | ||
172 | static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, | |
173 | sockptr_t optval, | |
174 | unsigned int optlen) | |
175 | { | |
176 | int val, ret; | |
177 | ||
178 | ret = mptcp_get_int_option(msk, optval, optlen, &val); | |
179 | if (ret) | |
180 | return ret; | |
181 | ||
182 | switch (optname) { | |
183 | case SO_KEEPALIVE: | |
184 | case SO_DEBUG: | |
185 | case SO_MARK: | |
186 | case SO_PRIORITY: | |
187 | case SO_SNDBUF: | |
188 | case SO_SNDBUFFORCE: | |
189 | case SO_RCVBUF: | |
190 | case SO_RCVBUFFORCE: | |
191 | return mptcp_sol_socket_intval(msk, optname, val); | |
192 | case SO_INCOMING_CPU: | |
193 | mptcp_so_incoming_cpu(msk, val); | |
194 | return 0; | |
195 | case SO_TIMESTAMP_OLD: | |
196 | case SO_TIMESTAMP_NEW: | |
197 | case SO_TIMESTAMPNS_OLD: | |
198 | case SO_TIMESTAMPNS_NEW: | |
199 | return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); | |
200 | } | |
201 | ||
202 | return -ENOPROTOOPT; | |
203 | } | |
204 | ||
205 | static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, | |
206 | int optname, | |
207 | sockptr_t optval, | |
208 | unsigned int optlen) | |
209 | { | |
210 | struct mptcp_subflow_context *subflow; | |
211 | struct sock *sk = (struct sock *)msk; | |
212 | struct so_timestamping timestamping; | |
213 | int ret; | |
214 | ||
215 | if (optlen == sizeof(timestamping)) { | |
216 | if (copy_from_sockptr(×tamping, optval, | |
217 | sizeof(timestamping))) | |
218 | return -EFAULT; | |
219 | } else if (optlen == sizeof(int)) { | |
220 | memset(×tamping, 0, sizeof(timestamping)); | |
221 | ||
222 | if (copy_from_sockptr(×tamping.flags, optval, sizeof(int))) | |
223 | return -EFAULT; | |
224 | } else { | |
225 | return -EINVAL; | |
226 | } | |
227 | ||
228 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, | |
229 | KERNEL_SOCKPTR(×tamping), | |
230 | sizeof(timestamping)); | |
231 | if (ret) | |
232 | return ret; | |
233 | ||
234 | lock_sock(sk); | |
235 | ||
236 | mptcp_for_each_subflow(msk, subflow) { | |
237 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
238 | bool slow = lock_sock_fast(ssk); | |
239 | ||
240 | sock_set_timestamping(sk, optname, timestamping); | |
241 | unlock_sock_fast(ssk, slow); | |
242 | } | |
243 | ||
244 | release_sock(sk); | |
245 | ||
246 | return 0; | |
247 | } | |
248 | ||
249 | static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, | |
250 | unsigned int optlen) | |
251 | { | |
252 | struct mptcp_subflow_context *subflow; | |
253 | struct sock *sk = (struct sock *)msk; | |
254 | struct linger ling; | |
255 | sockptr_t kopt; | |
256 | int ret; | |
257 | ||
258 | if (optlen < sizeof(ling)) | |
259 | return -EINVAL; | |
260 | ||
261 | if (copy_from_sockptr(&ling, optval, sizeof(ling))) | |
262 | return -EFAULT; | |
263 | ||
264 | kopt = KERNEL_SOCKPTR(&ling); | |
265 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling)); | |
266 | if (ret) | |
267 | return ret; | |
268 | ||
269 | lock_sock(sk); | |
270 | sockopt_seq_inc(msk); | |
271 | mptcp_for_each_subflow(msk, subflow) { | |
272 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
273 | bool slow = lock_sock_fast(ssk); | |
274 | ||
275 | if (!ling.l_onoff) { | |
276 | sock_reset_flag(ssk, SOCK_LINGER); | |
277 | } else { | |
278 | ssk->sk_lingertime = sk->sk_lingertime; | |
279 | sock_set_flag(ssk, SOCK_LINGER); | |
280 | } | |
281 | ||
282 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
283 | unlock_sock_fast(ssk, slow); | |
284 | } | |
285 | ||
286 | release_sock(sk); | |
287 | return 0; | |
288 | } | |
289 | ||
290 | static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, | |
291 | sockptr_t optval, unsigned int optlen) | |
292 | { | |
293 | struct sock *sk = (struct sock *)msk; | |
294 | struct sock *ssk; | |
295 | int ret; | |
296 | ||
297 | switch (optname) { | |
298 | case SO_REUSEPORT: | |
299 | case SO_REUSEADDR: | |
300 | case SO_BINDTODEVICE: | |
301 | case SO_BINDTOIFINDEX: | |
302 | lock_sock(sk); | |
303 | ssk = __mptcp_nmpc_sk(msk); | |
304 | if (IS_ERR(ssk)) { | |
305 | release_sock(sk); | |
306 | return PTR_ERR(ssk); | |
307 | } | |
308 | ||
309 | ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen); | |
310 | if (ret == 0) { | |
311 | if (optname == SO_REUSEPORT) | |
312 | sk->sk_reuseport = ssk->sk_reuseport; | |
313 | else if (optname == SO_REUSEADDR) | |
314 | sk->sk_reuse = ssk->sk_reuse; | |
315 | else if (optname == SO_BINDTODEVICE) | |
316 | sk->sk_bound_dev_if = ssk->sk_bound_dev_if; | |
317 | else if (optname == SO_BINDTOIFINDEX) | |
318 | sk->sk_bound_dev_if = ssk->sk_bound_dev_if; | |
319 | } | |
320 | release_sock(sk); | |
321 | return ret; | |
322 | case SO_KEEPALIVE: | |
323 | case SO_PRIORITY: | |
324 | case SO_SNDBUF: | |
325 | case SO_SNDBUFFORCE: | |
326 | case SO_RCVBUF: | |
327 | case SO_RCVBUFFORCE: | |
328 | case SO_MARK: | |
329 | case SO_INCOMING_CPU: | |
330 | case SO_DEBUG: | |
331 | case SO_TIMESTAMP_OLD: | |
332 | case SO_TIMESTAMP_NEW: | |
333 | case SO_TIMESTAMPNS_OLD: | |
334 | case SO_TIMESTAMPNS_NEW: | |
335 | return mptcp_setsockopt_sol_socket_int(msk, optname, optval, | |
336 | optlen); | |
337 | case SO_TIMESTAMPING_OLD: | |
338 | case SO_TIMESTAMPING_NEW: | |
339 | return mptcp_setsockopt_sol_socket_timestamping(msk, optname, | |
340 | optval, optlen); | |
341 | case SO_LINGER: | |
342 | return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); | |
343 | case SO_RCVLOWAT: | |
344 | case SO_RCVTIMEO_OLD: | |
345 | case SO_RCVTIMEO_NEW: | |
346 | case SO_SNDTIMEO_OLD: | |
347 | case SO_SNDTIMEO_NEW: | |
348 | case SO_BUSY_POLL: | |
349 | case SO_PREFER_BUSY_POLL: | |
350 | case SO_BUSY_POLL_BUDGET: | |
351 | /* No need to copy: only relevant for msk */ | |
352 | return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); | |
353 | case SO_NO_CHECK: | |
354 | case SO_DONTROUTE: | |
355 | case SO_BROADCAST: | |
356 | case SO_BSDCOMPAT: | |
357 | case SO_PASSCRED: | |
358 | case SO_PASSPIDFD: | |
359 | case SO_PASSSEC: | |
360 | case SO_RXQ_OVFL: | |
361 | case SO_WIFI_STATUS: | |
362 | case SO_NOFCS: | |
363 | case SO_SELECT_ERR_QUEUE: | |
364 | return 0; | |
365 | } | |
366 | ||
367 | /* SO_OOBINLINE is not supported, let's avoid the related mess | |
368 | * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, | |
369 | * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, | |
370 | * we must be careful with subflows | |
371 | * | |
372 | * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks | |
373 | * explicitly the sk_protocol field | |
374 | * | |
375 | * SO_PEEK_OFF is unsupported, as it is for plain TCP | |
376 | * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows | |
377 | * SO_CNX_ADVICE is currently unsupported, could possibly be relevant, | |
378 | * but likely needs careful design | |
379 | * | |
380 | * SO_ZEROCOPY is currently unsupported, TODO in sndmsg | |
381 | * SO_TXTIME is currently unsupported | |
382 | */ | |
383 | ||
384 | return -EOPNOTSUPP; | |
385 | } | |
386 | ||
387 | static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, | |
388 | sockptr_t optval, unsigned int optlen) | |
389 | { | |
390 | struct sock *sk = (struct sock *)msk; | |
391 | int ret = -EOPNOTSUPP; | |
392 | struct sock *ssk; | |
393 | ||
394 | switch (optname) { | |
395 | case IPV6_V6ONLY: | |
396 | case IPV6_TRANSPARENT: | |
397 | case IPV6_FREEBIND: | |
398 | lock_sock(sk); | |
399 | ssk = __mptcp_nmpc_sk(msk); | |
400 | if (IS_ERR(ssk)) { | |
401 | release_sock(sk); | |
402 | return PTR_ERR(ssk); | |
403 | } | |
404 | ||
405 | ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen); | |
406 | if (ret != 0) { | |
407 | release_sock(sk); | |
408 | return ret; | |
409 | } | |
410 | ||
411 | sockopt_seq_inc(msk); | |
412 | ||
413 | switch (optname) { | |
414 | case IPV6_V6ONLY: | |
415 | sk->sk_ipv6only = ssk->sk_ipv6only; | |
416 | break; | |
417 | case IPV6_TRANSPARENT: | |
418 | inet_assign_bit(TRANSPARENT, sk, | |
419 | inet_test_bit(TRANSPARENT, ssk)); | |
420 | break; | |
421 | case IPV6_FREEBIND: | |
422 | inet_assign_bit(FREEBIND, sk, | |
423 | inet_test_bit(FREEBIND, ssk)); | |
424 | break; | |
425 | } | |
426 | ||
427 | release_sock(sk); | |
428 | break; | |
429 | } | |
430 | ||
431 | return ret; | |
432 | } | |
433 | ||
434 | static bool mptcp_supported_sockopt(int level, int optname) | |
435 | { | |
436 | if (level == SOL_IP) { | |
437 | switch (optname) { | |
438 | /* should work fine */ | |
439 | case IP_FREEBIND: | |
440 | case IP_TRANSPARENT: | |
441 | case IP_BIND_ADDRESS_NO_PORT: | |
442 | case IP_LOCAL_PORT_RANGE: | |
443 | ||
444 | /* the following are control cmsg related */ | |
445 | case IP_PKTINFO: | |
446 | case IP_RECVTTL: | |
447 | case IP_RECVTOS: | |
448 | case IP_RECVOPTS: | |
449 | case IP_RETOPTS: | |
450 | case IP_PASSSEC: | |
451 | case IP_RECVORIGDSTADDR: | |
452 | case IP_CHECKSUM: | |
453 | case IP_RECVFRAGSIZE: | |
454 | ||
455 | /* common stuff that need some love */ | |
456 | case IP_TOS: | |
457 | case IP_TTL: | |
458 | case IP_MTU_DISCOVER: | |
459 | case IP_RECVERR: | |
460 | ||
461 | /* possibly less common may deserve some love */ | |
462 | case IP_MINTTL: | |
463 | ||
464 | /* the following is apparently a no-op for plain TCP */ | |
465 | case IP_RECVERR_RFC4884: | |
466 | return true; | |
467 | } | |
468 | ||
469 | /* IP_OPTIONS is not supported, needs subflow care */ | |
470 | /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ | |
471 | /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, | |
472 | * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, | |
473 | * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, | |
474 | * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, | |
475 | * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, | |
476 | * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal | |
477 | * with mcast stuff | |
478 | */ | |
479 | /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ | |
480 | return false; | |
481 | } | |
482 | if (level == SOL_IPV6) { | |
483 | switch (optname) { | |
484 | case IPV6_V6ONLY: | |
485 | ||
486 | /* the following are control cmsg related */ | |
487 | case IPV6_RECVPKTINFO: | |
488 | case IPV6_2292PKTINFO: | |
489 | case IPV6_RECVHOPLIMIT: | |
490 | case IPV6_2292HOPLIMIT: | |
491 | case IPV6_RECVRTHDR: | |
492 | case IPV6_2292RTHDR: | |
493 | case IPV6_RECVHOPOPTS: | |
494 | case IPV6_2292HOPOPTS: | |
495 | case IPV6_RECVDSTOPTS: | |
496 | case IPV6_2292DSTOPTS: | |
497 | case IPV6_RECVTCLASS: | |
498 | case IPV6_FLOWINFO: | |
499 | case IPV6_RECVPATHMTU: | |
500 | case IPV6_RECVORIGDSTADDR: | |
501 | case IPV6_RECVFRAGSIZE: | |
502 | ||
503 | /* the following ones need some love but are quite common */ | |
504 | case IPV6_TCLASS: | |
505 | case IPV6_TRANSPARENT: | |
506 | case IPV6_FREEBIND: | |
507 | case IPV6_PKTINFO: | |
508 | case IPV6_2292PKTOPTIONS: | |
509 | case IPV6_UNICAST_HOPS: | |
510 | case IPV6_MTU_DISCOVER: | |
511 | case IPV6_MTU: | |
512 | case IPV6_RECVERR: | |
513 | case IPV6_FLOWINFO_SEND: | |
514 | case IPV6_FLOWLABEL_MGR: | |
515 | case IPV6_MINHOPCOUNT: | |
516 | case IPV6_DONTFRAG: | |
517 | case IPV6_AUTOFLOWLABEL: | |
518 | ||
519 | /* the following one is a no-op for plain TCP */ | |
520 | case IPV6_RECVERR_RFC4884: | |
521 | return true; | |
522 | } | |
523 | ||
524 | /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are | |
525 | * not supported | |
526 | */ | |
527 | /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, | |
528 | * IPV6_MULTICAST_IF, IPV6_ADDRFORM, | |
529 | * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, | |
530 | * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, | |
531 | * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, | |
532 | * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER | |
533 | * are not supported better not deal with mcast | |
534 | */ | |
535 | /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ | |
536 | ||
537 | /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ | |
538 | /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ | |
539 | return false; | |
540 | } | |
541 | if (level == SOL_TCP) { | |
542 | switch (optname) { | |
543 | /* the following are no-op or should work just fine */ | |
544 | case TCP_THIN_DUPACK: | |
545 | case TCP_DEFER_ACCEPT: | |
546 | ||
547 | /* the following need some love */ | |
548 | case TCP_MAXSEG: | |
549 | case TCP_NODELAY: | |
550 | case TCP_THIN_LINEAR_TIMEOUTS: | |
551 | case TCP_CONGESTION: | |
552 | case TCP_CORK: | |
553 | case TCP_KEEPIDLE: | |
554 | case TCP_KEEPINTVL: | |
555 | case TCP_KEEPCNT: | |
556 | case TCP_SYNCNT: | |
557 | case TCP_SAVE_SYN: | |
558 | case TCP_LINGER2: | |
559 | case TCP_WINDOW_CLAMP: | |
560 | case TCP_QUICKACK: | |
561 | case TCP_USER_TIMEOUT: | |
562 | case TCP_TIMESTAMP: | |
563 | case TCP_NOTSENT_LOWAT: | |
564 | case TCP_TX_DELAY: | |
565 | case TCP_INQ: | |
566 | case TCP_FASTOPEN: | |
567 | case TCP_FASTOPEN_CONNECT: | |
568 | case TCP_FASTOPEN_KEY: | |
569 | case TCP_FASTOPEN_NO_COOKIE: | |
570 | return true; | |
571 | } | |
572 | ||
573 | /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ | |
574 | ||
575 | /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, | |
576 | * TCP_REPAIR_WINDOW are not supported, better avoid this mess | |
577 | */ | |
578 | } | |
579 | return false; | |
580 | } | |
581 | ||
582 | static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, | |
583 | unsigned int optlen) | |
584 | { | |
585 | struct mptcp_subflow_context *subflow; | |
586 | struct sock *sk = (struct sock *)msk; | |
587 | char name[TCP_CA_NAME_MAX]; | |
588 | bool cap_net_admin; | |
589 | int ret; | |
590 | ||
591 | if (optlen < 1) | |
592 | return -EINVAL; | |
593 | ||
594 | ret = strncpy_from_sockptr(name, optval, | |
595 | min_t(long, TCP_CA_NAME_MAX - 1, optlen)); | |
596 | if (ret < 0) | |
597 | return -EFAULT; | |
598 | ||
599 | name[ret] = 0; | |
600 | ||
601 | cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); | |
602 | ||
603 | ret = 0; | |
604 | lock_sock(sk); | |
605 | sockopt_seq_inc(msk); | |
606 | mptcp_for_each_subflow(msk, subflow) { | |
607 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
608 | int err; | |
609 | ||
610 | lock_sock(ssk); | |
611 | err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); | |
612 | if (err < 0 && ret == 0) | |
613 | ret = err; | |
614 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
615 | release_sock(ssk); | |
616 | } | |
617 | ||
618 | if (ret == 0) | |
619 | strscpy(msk->ca_name, name, sizeof(msk->ca_name)); | |
620 | ||
621 | release_sock(sk); | |
622 | return ret; | |
623 | } | |
624 | ||
625 | static int __mptcp_setsockopt_set_val(struct mptcp_sock *msk, int max, | |
626 | int (*set_val)(struct sock *, int), | |
627 | int *msk_val, int val) | |
628 | { | |
629 | struct mptcp_subflow_context *subflow; | |
630 | int err = 0; | |
631 | ||
632 | mptcp_for_each_subflow(msk, subflow) { | |
633 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
634 | int ret; | |
635 | ||
636 | lock_sock(ssk); | |
637 | ret = set_val(ssk, val); | |
638 | err = err ? : ret; | |
639 | release_sock(ssk); | |
640 | } | |
641 | ||
642 | if (!err) { | |
643 | *msk_val = val; | |
644 | sockopt_seq_inc(msk); | |
645 | } | |
646 | ||
647 | return err; | |
648 | } | |
649 | ||
650 | static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val) | |
651 | { | |
652 | struct mptcp_subflow_context *subflow; | |
653 | struct sock *sk = (struct sock *)msk; | |
654 | ||
655 | sockopt_seq_inc(msk); | |
656 | msk->cork = !!val; | |
657 | mptcp_for_each_subflow(msk, subflow) { | |
658 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
659 | ||
660 | lock_sock(ssk); | |
661 | __tcp_sock_set_cork(ssk, !!val); | |
662 | release_sock(ssk); | |
663 | } | |
664 | if (!val) | |
665 | mptcp_check_and_set_pending(sk); | |
666 | ||
667 | return 0; | |
668 | } | |
669 | ||
670 | static int __mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, int val) | |
671 | { | |
672 | struct mptcp_subflow_context *subflow; | |
673 | struct sock *sk = (struct sock *)msk; | |
674 | ||
675 | sockopt_seq_inc(msk); | |
676 | msk->nodelay = !!val; | |
677 | mptcp_for_each_subflow(msk, subflow) { | |
678 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
679 | ||
680 | lock_sock(ssk); | |
681 | __tcp_sock_set_nodelay(ssk, !!val); | |
682 | release_sock(ssk); | |
683 | } | |
684 | if (val) | |
685 | mptcp_check_and_set_pending(sk); | |
686 | return 0; | |
687 | } | |
688 | ||
689 | static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname, | |
690 | sockptr_t optval, unsigned int optlen) | |
691 | { | |
692 | struct sock *sk = (struct sock *)msk; | |
693 | struct sock *ssk; | |
694 | int err; | |
695 | ||
696 | err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); | |
697 | if (err != 0) | |
698 | return err; | |
699 | ||
700 | lock_sock(sk); | |
701 | ||
702 | ssk = __mptcp_nmpc_sk(msk); | |
703 | if (IS_ERR(ssk)) { | |
704 | release_sock(sk); | |
705 | return PTR_ERR(ssk); | |
706 | } | |
707 | ||
708 | switch (optname) { | |
709 | case IP_FREEBIND: | |
710 | inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); | |
711 | break; | |
712 | case IP_TRANSPARENT: | |
713 | inet_assign_bit(TRANSPARENT, ssk, | |
714 | inet_test_bit(TRANSPARENT, sk)); | |
715 | break; | |
716 | case IP_BIND_ADDRESS_NO_PORT: | |
717 | inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, | |
718 | inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); | |
719 | break; | |
720 | case IP_LOCAL_PORT_RANGE: | |
721 | WRITE_ONCE(inet_sk(ssk)->local_port_range, | |
722 | READ_ONCE(inet_sk(sk)->local_port_range)); | |
723 | break; | |
724 | default: | |
725 | release_sock(sk); | |
726 | WARN_ON_ONCE(1); | |
727 | return -EOPNOTSUPP; | |
728 | } | |
729 | ||
730 | sockopt_seq_inc(msk); | |
731 | release_sock(sk); | |
732 | return 0; | |
733 | } | |
734 | ||
735 | static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, | |
736 | sockptr_t optval, unsigned int optlen) | |
737 | { | |
738 | struct mptcp_subflow_context *subflow; | |
739 | struct sock *sk = (struct sock *)msk; | |
740 | int err, val; | |
741 | ||
742 | err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); | |
743 | ||
744 | if (err != 0) | |
745 | return err; | |
746 | ||
747 | lock_sock(sk); | |
748 | sockopt_seq_inc(msk); | |
749 | val = READ_ONCE(inet_sk(sk)->tos); | |
750 | mptcp_for_each_subflow(msk, subflow) { | |
751 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
752 | bool slow; | |
753 | ||
754 | slow = lock_sock_fast(ssk); | |
755 | __ip_sock_set_tos(ssk, val); | |
756 | unlock_sock_fast(ssk, slow); | |
757 | } | |
758 | release_sock(sk); | |
759 | ||
760 | return 0; | |
761 | } | |
762 | ||
763 | static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, | |
764 | sockptr_t optval, unsigned int optlen) | |
765 | { | |
766 | switch (optname) { | |
767 | case IP_FREEBIND: | |
768 | case IP_TRANSPARENT: | |
769 | case IP_BIND_ADDRESS_NO_PORT: | |
770 | case IP_LOCAL_PORT_RANGE: | |
771 | return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen); | |
772 | case IP_TOS: | |
773 | return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); | |
774 | } | |
775 | ||
776 | return -EOPNOTSUPP; | |
777 | } | |
778 | ||
779 | static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, | |
780 | sockptr_t optval, unsigned int optlen) | |
781 | { | |
782 | struct sock *sk = (struct sock *)msk; | |
783 | struct sock *ssk; | |
784 | int ret; | |
785 | ||
786 | /* Limit to first subflow, before the connection establishment */ | |
787 | lock_sock(sk); | |
788 | ssk = __mptcp_nmpc_sk(msk); | |
789 | if (IS_ERR(ssk)) { | |
790 | ret = PTR_ERR(ssk); | |
791 | goto unlock; | |
792 | } | |
793 | ||
794 | ret = tcp_setsockopt(ssk, level, optname, optval, optlen); | |
795 | ||
796 | unlock: | |
797 | release_sock(sk); | |
798 | return ret; | |
799 | } | |
800 | ||
801 | static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, | |
802 | sockptr_t optval, unsigned int optlen) | |
803 | { | |
804 | struct sock *sk = (void *)msk; | |
805 | int ret, val; | |
806 | ||
807 | switch (optname) { | |
808 | case TCP_ULP: | |
809 | return -EOPNOTSUPP; | |
810 | case TCP_CONGESTION: | |
811 | return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); | |
812 | case TCP_DEFER_ACCEPT: | |
813 | /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ | |
814 | mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); | |
815 | return 0; | |
816 | case TCP_FASTOPEN: | |
817 | case TCP_FASTOPEN_CONNECT: | |
818 | case TCP_FASTOPEN_KEY: | |
819 | case TCP_FASTOPEN_NO_COOKIE: | |
820 | return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, | |
821 | optval, optlen); | |
822 | } | |
823 | ||
824 | ret = mptcp_get_int_option(msk, optval, optlen, &val); | |
825 | if (ret) | |
826 | return ret; | |
827 | ||
828 | lock_sock(sk); | |
829 | switch (optname) { | |
830 | case TCP_INQ: | |
831 | if (val < 0 || val > 1) | |
832 | ret = -EINVAL; | |
833 | else | |
834 | msk->recvmsg_inq = !!val; | |
835 | break; | |
836 | case TCP_NOTSENT_LOWAT: | |
837 | WRITE_ONCE(msk->notsent_lowat, val); | |
838 | mptcp_write_space(sk); | |
839 | break; | |
840 | case TCP_CORK: | |
841 | ret = __mptcp_setsockopt_sol_tcp_cork(msk, val); | |
842 | break; | |
843 | case TCP_NODELAY: | |
844 | ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val); | |
845 | break; | |
846 | case TCP_KEEPIDLE: | |
847 | ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPIDLE, | |
848 | &tcp_sock_set_keepidle_locked, | |
849 | &msk->keepalive_idle, val); | |
850 | break; | |
851 | case TCP_KEEPINTVL: | |
852 | ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPINTVL, | |
853 | &tcp_sock_set_keepintvl, | |
854 | &msk->keepalive_intvl, val); | |
855 | break; | |
856 | case TCP_KEEPCNT: | |
857 | ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPCNT, | |
858 | &tcp_sock_set_keepcnt, | |
859 | &msk->keepalive_cnt, | |
860 | val); | |
861 | break; | |
862 | default: | |
863 | ret = -ENOPROTOOPT; | |
864 | } | |
865 | ||
866 | release_sock(sk); | |
867 | return ret; | |
868 | } | |
869 | ||
870 | int mptcp_setsockopt(struct sock *sk, int level, int optname, | |
871 | sockptr_t optval, unsigned int optlen) | |
872 | { | |
873 | struct mptcp_sock *msk = mptcp_sk(sk); | |
874 | struct sock *ssk; | |
875 | ||
876 | pr_debug("msk=%p\n", msk); | |
877 | ||
878 | if (level == SOL_SOCKET) | |
879 | return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); | |
880 | ||
881 | if (!mptcp_supported_sockopt(level, optname)) | |
882 | return -ENOPROTOOPT; | |
883 | ||
884 | /* @@ the meaning of setsockopt() when the socket is connected and | |
885 | * there are multiple subflows is not yet defined. It is up to the | |
886 | * MPTCP-level socket to configure the subflows until the subflow | |
887 | * is in TCP fallback, when TCP socket options are passed through | |
888 | * to the one remaining subflow. | |
889 | */ | |
890 | lock_sock(sk); | |
891 | ssk = __mptcp_tcp_fallback(msk); | |
892 | release_sock(sk); | |
893 | if (ssk) | |
894 | return tcp_setsockopt(ssk, level, optname, optval, optlen); | |
895 | ||
896 | if (level == SOL_IP) | |
897 | return mptcp_setsockopt_v4(msk, optname, optval, optlen); | |
898 | ||
899 | if (level == SOL_IPV6) | |
900 | return mptcp_setsockopt_v6(msk, optname, optval, optlen); | |
901 | ||
902 | if (level == SOL_TCP) | |
903 | return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); | |
904 | ||
905 | return -EOPNOTSUPP; | |
906 | } | |
907 | ||
908 | static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, | |
909 | char __user *optval, int __user *optlen) | |
910 | { | |
911 | struct sock *sk = (struct sock *)msk; | |
912 | struct sock *ssk; | |
913 | int ret; | |
914 | ||
915 | lock_sock(sk); | |
916 | ssk = msk->first; | |
917 | if (ssk) { | |
918 | ret = tcp_getsockopt(ssk, level, optname, optval, optlen); | |
919 | goto out; | |
920 | } | |
921 | ||
922 | ssk = __mptcp_nmpc_sk(msk); | |
923 | if (IS_ERR(ssk)) { | |
924 | ret = PTR_ERR(ssk); | |
925 | goto out; | |
926 | } | |
927 | ||
928 | ret = tcp_getsockopt(ssk, level, optname, optval, optlen); | |
929 | ||
930 | out: | |
931 | release_sock(sk); | |
932 | return ret; | |
933 | } | |
934 | ||
935 | void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) | |
936 | { | |
937 | struct sock *sk = (struct sock *)msk; | |
938 | u32 flags = 0; | |
939 | bool slow; | |
940 | u32 now; | |
941 | ||
942 | memset(info, 0, sizeof(*info)); | |
943 | ||
944 | info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); | |
945 | info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); | |
946 | info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); | |
947 | info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); | |
948 | ||
949 | if (inet_sk_state_load(sk) == TCP_LISTEN) | |
950 | return; | |
951 | ||
952 | /* The following limits only make sense for the in-kernel PM */ | |
953 | if (mptcp_pm_is_kernel(msk)) { | |
954 | info->mptcpi_subflows_max = | |
955 | mptcp_pm_get_subflows_max(msk); | |
956 | info->mptcpi_add_addr_signal_max = | |
957 | mptcp_pm_get_add_addr_signal_max(msk); | |
958 | info->mptcpi_add_addr_accepted_max = | |
959 | mptcp_pm_get_add_addr_accept_max(msk); | |
960 | info->mptcpi_local_addr_max = | |
961 | mptcp_pm_get_local_addr_max(msk); | |
962 | } | |
963 | ||
964 | if (__mptcp_check_fallback(msk)) | |
965 | flags |= MPTCP_INFO_FLAG_FALLBACK; | |
966 | if (READ_ONCE(msk->can_ack)) | |
967 | flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; | |
968 | info->mptcpi_flags = flags; | |
969 | ||
970 | slow = lock_sock_fast(sk); | |
971 | info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); | |
972 | info->mptcpi_token = msk->token; | |
973 | info->mptcpi_write_seq = msk->write_seq; | |
974 | info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits; | |
975 | info->mptcpi_bytes_sent = msk->bytes_sent; | |
976 | info->mptcpi_bytes_received = msk->bytes_received; | |
977 | info->mptcpi_bytes_retrans = msk->bytes_retrans; | |
978 | info->mptcpi_subflows_total = info->mptcpi_subflows + | |
979 | __mptcp_has_initial_subflow(msk); | |
980 | now = tcp_jiffies32; | |
981 | info->mptcpi_last_data_sent = jiffies_to_msecs(now - msk->last_data_sent); | |
982 | info->mptcpi_last_data_recv = jiffies_to_msecs(now - msk->last_data_recv); | |
983 | unlock_sock_fast(sk, slow); | |
984 | ||
985 | mptcp_data_lock(sk); | |
986 | info->mptcpi_last_ack_recv = jiffies_to_msecs(now - msk->last_ack_recv); | |
987 | info->mptcpi_snd_una = msk->snd_una; | |
988 | info->mptcpi_rcv_nxt = msk->ack_seq; | |
989 | info->mptcpi_bytes_acked = msk->bytes_acked; | |
990 | mptcp_data_unlock(sk); | |
991 | } | |
992 | EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); | |
993 | ||
994 | static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen) | |
995 | { | |
996 | struct mptcp_info m_info; | |
997 | int len; | |
998 | ||
999 | if (get_user(len, optlen)) | |
1000 | return -EFAULT; | |
1001 | ||
1002 | /* When used only to check if a fallback to TCP happened. */ | |
1003 | if (len == 0) | |
1004 | return 0; | |
1005 | ||
1006 | len = min_t(unsigned int, len, sizeof(struct mptcp_info)); | |
1007 | ||
1008 | mptcp_diag_fill_info(msk, &m_info); | |
1009 | ||
1010 | if (put_user(len, optlen)) | |
1011 | return -EFAULT; | |
1012 | ||
1013 | if (copy_to_user(optval, &m_info, len)) | |
1014 | return -EFAULT; | |
1015 | ||
1016 | return 0; | |
1017 | } | |
1018 | ||
1019 | static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, | |
1020 | char __user *optval, | |
1021 | u32 copied, | |
1022 | int __user *optlen) | |
1023 | { | |
1024 | u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd)); | |
1025 | ||
1026 | if (copied) | |
1027 | copied += sfd->size_subflow_data; | |
1028 | else | |
1029 | copied = copylen; | |
1030 | ||
1031 | if (put_user(copied, optlen)) | |
1032 | return -EFAULT; | |
1033 | ||
1034 | if (copy_to_user(optval, sfd, copylen)) | |
1035 | return -EFAULT; | |
1036 | ||
1037 | return 0; | |
1038 | } | |
1039 | ||
1040 | static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, | |
1041 | char __user *optval, | |
1042 | int __user *optlen) | |
1043 | { | |
1044 | int len, copylen; | |
1045 | ||
1046 | if (get_user(len, optlen)) | |
1047 | return -EFAULT; | |
1048 | ||
1049 | /* if mptcp_subflow_data size is changed, need to adjust | |
1050 | * this function to deal with programs using old version. | |
1051 | */ | |
1052 | BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE); | |
1053 | ||
1054 | if (len < MIN_INFO_OPTLEN_SIZE) | |
1055 | return -EINVAL; | |
1056 | ||
1057 | memset(sfd, 0, sizeof(*sfd)); | |
1058 | ||
1059 | copylen = min_t(unsigned int, len, sizeof(*sfd)); | |
1060 | if (copy_from_user(sfd, optval, copylen)) | |
1061 | return -EFAULT; | |
1062 | ||
1063 | /* size_subflow_data is u32, but len is signed */ | |
1064 | if (sfd->size_subflow_data > INT_MAX || | |
1065 | sfd->size_user > INT_MAX) | |
1066 | return -EINVAL; | |
1067 | ||
1068 | if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE || | |
1069 | sfd->size_subflow_data > len) | |
1070 | return -EINVAL; | |
1071 | ||
1072 | if (sfd->num_subflows || sfd->size_kernel) | |
1073 | return -EINVAL; | |
1074 | ||
1075 | return len - sfd->size_subflow_data; | |
1076 | } | |
1077 | ||
1078 | static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, | |
1079 | int __user *optlen) | |
1080 | { | |
1081 | struct mptcp_subflow_context *subflow; | |
1082 | struct sock *sk = (struct sock *)msk; | |
1083 | unsigned int sfcount = 0, copied = 0; | |
1084 | struct mptcp_subflow_data sfd; | |
1085 | char __user *infoptr; | |
1086 | int len; | |
1087 | ||
1088 | len = mptcp_get_subflow_data(&sfd, optval, optlen); | |
1089 | if (len < 0) | |
1090 | return len; | |
1091 | ||
1092 | sfd.size_kernel = sizeof(struct tcp_info); | |
1093 | sfd.size_user = min_t(unsigned int, sfd.size_user, | |
1094 | sizeof(struct tcp_info)); | |
1095 | ||
1096 | infoptr = optval + sfd.size_subflow_data; | |
1097 | ||
1098 | lock_sock(sk); | |
1099 | ||
1100 | mptcp_for_each_subflow(msk, subflow) { | |
1101 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
1102 | ||
1103 | ++sfcount; | |
1104 | ||
1105 | if (len && len >= sfd.size_user) { | |
1106 | struct tcp_info info; | |
1107 | ||
1108 | tcp_get_info(ssk, &info); | |
1109 | ||
1110 | if (copy_to_user(infoptr, &info, sfd.size_user)) { | |
1111 | release_sock(sk); | |
1112 | return -EFAULT; | |
1113 | } | |
1114 | ||
1115 | infoptr += sfd.size_user; | |
1116 | copied += sfd.size_user; | |
1117 | len -= sfd.size_user; | |
1118 | } | |
1119 | } | |
1120 | ||
1121 | release_sock(sk); | |
1122 | ||
1123 | sfd.num_subflows = sfcount; | |
1124 | ||
1125 | if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) | |
1126 | return -EFAULT; | |
1127 | ||
1128 | return 0; | |
1129 | } | |
1130 | ||
1131 | static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) | |
1132 | { | |
1133 | const struct inet_sock *inet = inet_sk(sk); | |
1134 | ||
1135 | memset(a, 0, sizeof(*a)); | |
1136 | ||
1137 | if (sk->sk_family == AF_INET) { | |
1138 | a->sin_local.sin_family = AF_INET; | |
1139 | a->sin_local.sin_port = inet->inet_sport; | |
1140 | a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr; | |
1141 | ||
1142 | if (!a->sin_local.sin_addr.s_addr) | |
1143 | a->sin_local.sin_addr.s_addr = inet->inet_saddr; | |
1144 | ||
1145 | a->sin_remote.sin_family = AF_INET; | |
1146 | a->sin_remote.sin_port = inet->inet_dport; | |
1147 | a->sin_remote.sin_addr.s_addr = inet->inet_daddr; | |
1148 | #if IS_ENABLED(CONFIG_IPV6) | |
1149 | } else if (sk->sk_family == AF_INET6) { | |
1150 | const struct ipv6_pinfo *np = inet6_sk(sk); | |
1151 | ||
1152 | if (WARN_ON_ONCE(!np)) | |
1153 | return; | |
1154 | ||
1155 | a->sin6_local.sin6_family = AF_INET6; | |
1156 | a->sin6_local.sin6_port = inet->inet_sport; | |
1157 | ||
1158 | if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) | |
1159 | a->sin6_local.sin6_addr = np->saddr; | |
1160 | else | |
1161 | a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr; | |
1162 | ||
1163 | a->sin6_remote.sin6_family = AF_INET6; | |
1164 | a->sin6_remote.sin6_port = inet->inet_dport; | |
1165 | a->sin6_remote.sin6_addr = sk->sk_v6_daddr; | |
1166 | #endif | |
1167 | } | |
1168 | } | |
1169 | ||
1170 | static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval, | |
1171 | int __user *optlen) | |
1172 | { | |
1173 | struct mptcp_subflow_context *subflow; | |
1174 | struct sock *sk = (struct sock *)msk; | |
1175 | unsigned int sfcount = 0, copied = 0; | |
1176 | struct mptcp_subflow_data sfd; | |
1177 | char __user *addrptr; | |
1178 | int len; | |
1179 | ||
1180 | len = mptcp_get_subflow_data(&sfd, optval, optlen); | |
1181 | if (len < 0) | |
1182 | return len; | |
1183 | ||
1184 | sfd.size_kernel = sizeof(struct mptcp_subflow_addrs); | |
1185 | sfd.size_user = min_t(unsigned int, sfd.size_user, | |
1186 | sizeof(struct mptcp_subflow_addrs)); | |
1187 | ||
1188 | addrptr = optval + sfd.size_subflow_data; | |
1189 | ||
1190 | lock_sock(sk); | |
1191 | ||
1192 | mptcp_for_each_subflow(msk, subflow) { | |
1193 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
1194 | ||
1195 | ++sfcount; | |
1196 | ||
1197 | if (len && len >= sfd.size_user) { | |
1198 | struct mptcp_subflow_addrs a; | |
1199 | ||
1200 | mptcp_get_sub_addrs(ssk, &a); | |
1201 | ||
1202 | if (copy_to_user(addrptr, &a, sfd.size_user)) { | |
1203 | release_sock(sk); | |
1204 | return -EFAULT; | |
1205 | } | |
1206 | ||
1207 | addrptr += sfd.size_user; | |
1208 | copied += sfd.size_user; | |
1209 | len -= sfd.size_user; | |
1210 | } | |
1211 | } | |
1212 | ||
1213 | release_sock(sk); | |
1214 | ||
1215 | sfd.num_subflows = sfcount; | |
1216 | ||
1217 | if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) | |
1218 | return -EFAULT; | |
1219 | ||
1220 | return 0; | |
1221 | } | |
1222 | ||
1223 | static int mptcp_get_full_info(struct mptcp_full_info *mfi, | |
1224 | char __user *optval, | |
1225 | int __user *optlen) | |
1226 | { | |
1227 | int len; | |
1228 | ||
1229 | BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) != | |
1230 | MIN_FULL_INFO_OPTLEN_SIZE); | |
1231 | ||
1232 | if (get_user(len, optlen)) | |
1233 | return -EFAULT; | |
1234 | ||
1235 | if (len < MIN_FULL_INFO_OPTLEN_SIZE) | |
1236 | return -EINVAL; | |
1237 | ||
1238 | memset(mfi, 0, sizeof(*mfi)); | |
1239 | if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE)) | |
1240 | return -EFAULT; | |
1241 | ||
1242 | if (mfi->size_tcpinfo_kernel || | |
1243 | mfi->size_sfinfo_kernel || | |
1244 | mfi->num_subflows) | |
1245 | return -EINVAL; | |
1246 | ||
1247 | if (mfi->size_sfinfo_user > INT_MAX || | |
1248 | mfi->size_tcpinfo_user > INT_MAX) | |
1249 | return -EINVAL; | |
1250 | ||
1251 | return len - MIN_FULL_INFO_OPTLEN_SIZE; | |
1252 | } | |
1253 | ||
1254 | static int mptcp_put_full_info(struct mptcp_full_info *mfi, | |
1255 | char __user *optval, | |
1256 | u32 copylen, | |
1257 | int __user *optlen) | |
1258 | { | |
1259 | copylen += MIN_FULL_INFO_OPTLEN_SIZE; | |
1260 | if (put_user(copylen, optlen)) | |
1261 | return -EFAULT; | |
1262 | ||
1263 | if (copy_to_user(optval, mfi, copylen)) | |
1264 | return -EFAULT; | |
1265 | return 0; | |
1266 | } | |
1267 | ||
1268 | static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval, | |
1269 | int __user *optlen) | |
1270 | { | |
1271 | unsigned int sfcount = 0, copylen = 0; | |
1272 | struct mptcp_subflow_context *subflow; | |
1273 | struct sock *sk = (struct sock *)msk; | |
1274 | void __user *tcpinfoptr, *sfinfoptr; | |
1275 | struct mptcp_full_info mfi; | |
1276 | int len; | |
1277 | ||
1278 | len = mptcp_get_full_info(&mfi, optval, optlen); | |
1279 | if (len < 0) | |
1280 | return len; | |
1281 | ||
1282 | /* don't bother filling the mptcp info if there is not enough | |
1283 | * user-space-provided storage | |
1284 | */ | |
1285 | if (len > 0) { | |
1286 | mptcp_diag_fill_info(msk, &mfi.mptcp_info); | |
1287 | copylen += min_t(unsigned int, len, sizeof(struct mptcp_info)); | |
1288 | } | |
1289 | ||
1290 | mfi.size_tcpinfo_kernel = sizeof(struct tcp_info); | |
1291 | mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user, | |
1292 | sizeof(struct tcp_info)); | |
1293 | sfinfoptr = u64_to_user_ptr(mfi.subflow_info); | |
1294 | mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info); | |
1295 | mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user, | |
1296 | sizeof(struct mptcp_subflow_info)); | |
1297 | tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); | |
1298 | ||
1299 | lock_sock(sk); | |
1300 | mptcp_for_each_subflow(msk, subflow) { | |
1301 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
1302 | struct mptcp_subflow_info sfinfo; | |
1303 | struct tcp_info tcp_info; | |
1304 | ||
1305 | if (sfcount++ >= mfi.size_arrays_user) | |
1306 | continue; | |
1307 | ||
1308 | /* fetch addr/tcp_info only if the user space buffers | |
1309 | * are wide enough | |
1310 | */ | |
1311 | memset(&sfinfo, 0, sizeof(sfinfo)); | |
1312 | sfinfo.id = subflow->subflow_id; | |
1313 | if (mfi.size_sfinfo_user > | |
1314 | offsetof(struct mptcp_subflow_info, addrs)) | |
1315 | mptcp_get_sub_addrs(ssk, &sfinfo.addrs); | |
1316 | if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user)) | |
1317 | goto fail_release; | |
1318 | ||
1319 | if (mfi.size_tcpinfo_user) { | |
1320 | tcp_get_info(ssk, &tcp_info); | |
1321 | if (copy_to_user(tcpinfoptr, &tcp_info, | |
1322 | mfi.size_tcpinfo_user)) | |
1323 | goto fail_release; | |
1324 | } | |
1325 | ||
1326 | tcpinfoptr += mfi.size_tcpinfo_user; | |
1327 | sfinfoptr += mfi.size_sfinfo_user; | |
1328 | } | |
1329 | release_sock(sk); | |
1330 | ||
1331 | mfi.num_subflows = sfcount; | |
1332 | if (mptcp_put_full_info(&mfi, optval, copylen, optlen)) | |
1333 | return -EFAULT; | |
1334 | ||
1335 | return 0; | |
1336 | ||
1337 | fail_release: | |
1338 | release_sock(sk); | |
1339 | return -EFAULT; | |
1340 | } | |
1341 | ||
1342 | static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, | |
1343 | int __user *optlen, int val) | |
1344 | { | |
1345 | int len; | |
1346 | ||
1347 | if (get_user(len, optlen)) | |
1348 | return -EFAULT; | |
1349 | if (len < 0) | |
1350 | return -EINVAL; | |
1351 | ||
1352 | if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { | |
1353 | unsigned char ucval = (unsigned char)val; | |
1354 | ||
1355 | len = 1; | |
1356 | if (put_user(len, optlen)) | |
1357 | return -EFAULT; | |
1358 | if (copy_to_user(optval, &ucval, 1)) | |
1359 | return -EFAULT; | |
1360 | } else { | |
1361 | len = min_t(unsigned int, len, sizeof(int)); | |
1362 | if (put_user(len, optlen)) | |
1363 | return -EFAULT; | |
1364 | if (copy_to_user(optval, &val, len)) | |
1365 | return -EFAULT; | |
1366 | } | |
1367 | ||
1368 | return 0; | |
1369 | } | |
1370 | ||
1371 | static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, | |
1372 | char __user *optval, int __user *optlen) | |
1373 | { | |
1374 | struct sock *sk = (void *)msk; | |
1375 | ||
1376 | switch (optname) { | |
1377 | case TCP_ULP: | |
1378 | case TCP_CONGESTION: | |
1379 | case TCP_INFO: | |
1380 | case TCP_CC_INFO: | |
1381 | case TCP_DEFER_ACCEPT: | |
1382 | case TCP_FASTOPEN: | |
1383 | case TCP_FASTOPEN_CONNECT: | |
1384 | case TCP_FASTOPEN_KEY: | |
1385 | case TCP_FASTOPEN_NO_COOKIE: | |
1386 | return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, | |
1387 | optval, optlen); | |
1388 | case TCP_INQ: | |
1389 | return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq); | |
1390 | case TCP_CORK: | |
1391 | return mptcp_put_int_option(msk, optval, optlen, msk->cork); | |
1392 | case TCP_NODELAY: | |
1393 | return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); | |
1394 | case TCP_KEEPIDLE: | |
1395 | return mptcp_put_int_option(msk, optval, optlen, | |
1396 | msk->keepalive_idle ? : | |
1397 | READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_time) / HZ); | |
1398 | case TCP_KEEPINTVL: | |
1399 | return mptcp_put_int_option(msk, optval, optlen, | |
1400 | msk->keepalive_intvl ? : | |
1401 | READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_intvl) / HZ); | |
1402 | case TCP_KEEPCNT: | |
1403 | return mptcp_put_int_option(msk, optval, optlen, | |
1404 | msk->keepalive_cnt ? : | |
1405 | READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_probes)); | |
1406 | case TCP_NOTSENT_LOWAT: | |
1407 | return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat); | |
1408 | case TCP_IS_MPTCP: | |
1409 | return mptcp_put_int_option(msk, optval, optlen, 1); | |
1410 | } | |
1411 | return -EOPNOTSUPP; | |
1412 | } | |
1413 | ||
1414 | static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, | |
1415 | char __user *optval, int __user *optlen) | |
1416 | { | |
1417 | struct sock *sk = (void *)msk; | |
1418 | ||
1419 | switch (optname) { | |
1420 | case IP_TOS: | |
1421 | return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos)); | |
1422 | case IP_FREEBIND: | |
1423 | return mptcp_put_int_option(msk, optval, optlen, | |
1424 | inet_test_bit(FREEBIND, sk)); | |
1425 | case IP_TRANSPARENT: | |
1426 | return mptcp_put_int_option(msk, optval, optlen, | |
1427 | inet_test_bit(TRANSPARENT, sk)); | |
1428 | case IP_BIND_ADDRESS_NO_PORT: | |
1429 | return mptcp_put_int_option(msk, optval, optlen, | |
1430 | inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); | |
1431 | case IP_LOCAL_PORT_RANGE: | |
1432 | return mptcp_put_int_option(msk, optval, optlen, | |
1433 | READ_ONCE(inet_sk(sk)->local_port_range)); | |
1434 | } | |
1435 | ||
1436 | return -EOPNOTSUPP; | |
1437 | } | |
1438 | ||
1439 | static int mptcp_getsockopt_v6(struct mptcp_sock *msk, int optname, | |
1440 | char __user *optval, int __user *optlen) | |
1441 | { | |
1442 | struct sock *sk = (void *)msk; | |
1443 | ||
1444 | switch (optname) { | |
1445 | case IPV6_V6ONLY: | |
1446 | return mptcp_put_int_option(msk, optval, optlen, | |
1447 | sk->sk_ipv6only); | |
1448 | case IPV6_TRANSPARENT: | |
1449 | return mptcp_put_int_option(msk, optval, optlen, | |
1450 | inet_test_bit(TRANSPARENT, sk)); | |
1451 | case IPV6_FREEBIND: | |
1452 | return mptcp_put_int_option(msk, optval, optlen, | |
1453 | inet_test_bit(FREEBIND, sk)); | |
1454 | } | |
1455 | ||
1456 | return -EOPNOTSUPP; | |
1457 | } | |
1458 | ||
1459 | static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, | |
1460 | char __user *optval, int __user *optlen) | |
1461 | { | |
1462 | switch (optname) { | |
1463 | case MPTCP_INFO: | |
1464 | return mptcp_getsockopt_info(msk, optval, optlen); | |
1465 | case MPTCP_FULL_INFO: | |
1466 | return mptcp_getsockopt_full_info(msk, optval, optlen); | |
1467 | case MPTCP_TCPINFO: | |
1468 | return mptcp_getsockopt_tcpinfo(msk, optval, optlen); | |
1469 | case MPTCP_SUBFLOW_ADDRS: | |
1470 | return mptcp_getsockopt_subflow_addrs(msk, optval, optlen); | |
1471 | } | |
1472 | ||
1473 | return -EOPNOTSUPP; | |
1474 | } | |
1475 | ||
1476 | int mptcp_getsockopt(struct sock *sk, int level, int optname, | |
1477 | char __user *optval, int __user *option) | |
1478 | { | |
1479 | struct mptcp_sock *msk = mptcp_sk(sk); | |
1480 | struct sock *ssk; | |
1481 | ||
1482 | pr_debug("msk=%p\n", msk); | |
1483 | ||
1484 | /* @@ the meaning of setsockopt() when the socket is connected and | |
1485 | * there are multiple subflows is not yet defined. It is up to the | |
1486 | * MPTCP-level socket to configure the subflows until the subflow | |
1487 | * is in TCP fallback, when socket options are passed through | |
1488 | * to the one remaining subflow. | |
1489 | */ | |
1490 | lock_sock(sk); | |
1491 | ssk = __mptcp_tcp_fallback(msk); | |
1492 | release_sock(sk); | |
1493 | if (ssk) | |
1494 | return tcp_getsockopt(ssk, level, optname, optval, option); | |
1495 | ||
1496 | if (level == SOL_IP) | |
1497 | return mptcp_getsockopt_v4(msk, optname, optval, option); | |
1498 | if (level == SOL_IPV6) | |
1499 | return mptcp_getsockopt_v6(msk, optname, optval, option); | |
1500 | if (level == SOL_TCP) | |
1501 | return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); | |
1502 | if (level == SOL_MPTCP) | |
1503 | return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option); | |
1504 | return -EOPNOTSUPP; | |
1505 | } | |
1506 | ||
1507 | static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) | |
1508 | { | |
1509 | static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; | |
1510 | struct sock *sk = (struct sock *)msk; | |
1511 | ||
1512 | if (ssk->sk_prot->keepalive) { | |
1513 | if (sock_flag(sk, SOCK_KEEPOPEN)) | |
1514 | ssk->sk_prot->keepalive(ssk, 1); | |
1515 | else | |
1516 | ssk->sk_prot->keepalive(ssk, 0); | |
1517 | } | |
1518 | ||
1519 | ssk->sk_priority = sk->sk_priority; | |
1520 | ssk->sk_bound_dev_if = sk->sk_bound_dev_if; | |
1521 | ssk->sk_incoming_cpu = sk->sk_incoming_cpu; | |
1522 | ssk->sk_ipv6only = sk->sk_ipv6only; | |
1523 | __ip_sock_set_tos(ssk, inet_sk(sk)->tos); | |
1524 | ||
1525 | if (sk->sk_userlocks & tx_rx_locks) { | |
1526 | ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; | |
1527 | if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) { | |
1528 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); | |
1529 | mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; | |
1530 | } | |
1531 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) | |
1532 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); | |
1533 | } | |
1534 | ||
1535 | if (sock_flag(sk, SOCK_LINGER)) { | |
1536 | ssk->sk_lingertime = sk->sk_lingertime; | |
1537 | sock_set_flag(ssk, SOCK_LINGER); | |
1538 | } else { | |
1539 | sock_reset_flag(ssk, SOCK_LINGER); | |
1540 | } | |
1541 | ||
1542 | if (sk->sk_mark != ssk->sk_mark) { | |
1543 | ssk->sk_mark = sk->sk_mark; | |
1544 | sk_dst_reset(ssk); | |
1545 | } | |
1546 | ||
1547 | sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); | |
1548 | ||
1549 | if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) | |
1550 | tcp_set_congestion_control(ssk, msk->ca_name, false, true); | |
1551 | __tcp_sock_set_cork(ssk, !!msk->cork); | |
1552 | __tcp_sock_set_nodelay(ssk, !!msk->nodelay); | |
1553 | tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle); | |
1554 | tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl); | |
1555 | tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt); | |
1556 | ||
1557 | inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); | |
1558 | inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); | |
1559 | inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); | |
1560 | WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range)); | |
1561 | } | |
1562 | ||
1563 | void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) | |
1564 | { | |
1565 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); | |
1566 | ||
1567 | msk_owned_by_me(msk); | |
1568 | ||
1569 | ssk->sk_rcvlowat = 0; | |
1570 | ||
1571 | /* subflows must ignore any latency-related settings: will not affect | |
1572 | * the user-space - only the msk is relevant - but will foul the | |
1573 | * mptcp scheduler | |
1574 | */ | |
1575 | tcp_sk(ssk)->notsent_lowat = UINT_MAX; | |
1576 | ||
1577 | if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { | |
1578 | sync_socket_options(msk, ssk); | |
1579 | ||
1580 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
1581 | } | |
1582 | } | |
1583 | ||
1584 | /* unfortunately this is different enough from the tcp version so | |
1585 | * that we can't factor it out | |
1586 | */ | |
1587 | int mptcp_set_rcvlowat(struct sock *sk, int val) | |
1588 | { | |
1589 | struct mptcp_subflow_context *subflow; | |
1590 | int space, cap; | |
1591 | ||
1592 | /* bpf can land here with a wrong sk type */ | |
1593 | if (sk->sk_protocol == IPPROTO_TCP) | |
1594 | return -EINVAL; | |
1595 | ||
1596 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) | |
1597 | cap = sk->sk_rcvbuf >> 1; | |
1598 | else | |
1599 | cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; | |
1600 | val = min(val, cap); | |
1601 | WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); | |
1602 | ||
1603 | /* Check if we need to signal EPOLLIN right now */ | |
1604 | if (mptcp_epollin_ready(sk)) | |
1605 | sk->sk_data_ready(sk); | |
1606 | ||
1607 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) | |
1608 | return 0; | |
1609 | ||
1610 | space = mptcp_space_from_win(sk, val); | |
1611 | if (space <= sk->sk_rcvbuf) | |
1612 | return 0; | |
1613 | ||
1614 | /* propagate the rcvbuf changes to all the subflows */ | |
1615 | WRITE_ONCE(sk->sk_rcvbuf, space); | |
1616 | mptcp_for_each_subflow(mptcp_sk(sk), subflow) { | |
1617 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
1618 | bool slow; | |
1619 | ||
1620 | slow = lock_sock_fast(ssk); | |
1621 | WRITE_ONCE(ssk->sk_rcvbuf, space); | |
1622 | WRITE_ONCE(tcp_sk(ssk)->window_clamp, val); | |
1623 | unlock_sock_fast(ssk, slow); | |
1624 | } | |
1625 | return 0; | |
1626 | } |