]>
Commit | Line | Data |
---|---|---|
0abdde82 PA |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Multipath TCP | |
3 | * | |
4 | * Copyright (c) 2021, Red Hat. | |
5 | */ | |
6 | ||
7 | #define pr_fmt(fmt) "MPTCP: " fmt | |
8 | ||
9 | #include <linux/kernel.h> | |
10 | #include <linux/module.h> | |
11 | #include <net/sock.h> | |
12 | #include <net/protocol.h> | |
13 | #include <net/tcp.h> | |
14 | #include <net/mptcp.h> | |
15 | #include "protocol.h" | |
16 | ||
49243207 PA |
17 | #define MIN_INFO_OPTLEN_SIZE 16 |
18 | #define MIN_FULL_INFO_OPTLEN_SIZE 40 | |
06f15cee | 19 | |
0abdde82 PA |
20 | static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) |
21 | { | |
109cdeb8 | 22 | msk_owned_by_me(msk); |
0abdde82 PA |
23 | |
24 | if (likely(!__mptcp_check_fallback(msk))) | |
25 | return NULL; | |
26 | ||
27 | return msk->first; | |
28 | } | |
29 | ||
df00b087 FW |
30 | static u32 sockopt_seq_reset(const struct sock *sk) |
31 | { | |
32 | sock_owned_by_me(sk); | |
33 | ||
34 | /* Highbits contain state. Allows to distinguish sockopt_seq | |
35 | * of listener and established: | |
36 | * s0 = new_listener() | |
37 | * sockopt(s0) - seq is 1 | |
38 | * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) | |
39 | * sockopt(s0) - seq increments to 2 on s0 | |
40 | * sockopt(s1) // seq increments to 2 on s1 (different option) | |
41 | * new ssk completes join, inherits options from s0 // seq 2 | |
42 | * Needs sync from mptcp join logic, but ssk->seq == msk->seq | |
43 | * | |
44 | * Set High order bits to sk_state so ssk->seq == msk->seq test | |
45 | * will fail. | |
46 | */ | |
47 | ||
48 | return (u32)sk->sk_state << 24u; | |
49 | } | |
50 | ||
1b3e7ede FW |
51 | static void sockopt_seq_inc(struct mptcp_sock *msk) |
52 | { | |
53 | u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; | |
54 | ||
55 | msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq; | |
56 | } | |
57 | ||
58 | static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, | |
59 | unsigned int optlen, int *val) | |
60 | { | |
61 | if (optlen < sizeof(int)) | |
62 | return -EINVAL; | |
63 | ||
64 | if (copy_from_sockptr(val, optval, sizeof(*val))) | |
65 | return -EFAULT; | |
66 | ||
67 | return 0; | |
68 | } | |
69 | ||
70 | static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) | |
71 | { | |
72 | struct mptcp_subflow_context *subflow; | |
73 | struct sock *sk = (struct sock *)msk; | |
74 | ||
75 | lock_sock(sk); | |
76 | sockopt_seq_inc(msk); | |
77 | ||
78 | mptcp_for_each_subflow(msk, subflow) { | |
79 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
80 | bool slow = lock_sock_fast(ssk); | |
81 | ||
82 | switch (optname) { | |
a03c99b2 FW |
83 | case SO_DEBUG: |
84 | sock_valbool_flag(ssk, SOCK_DBG, !!val); | |
85 | break; | |
1b3e7ede FW |
86 | case SO_KEEPALIVE: |
87 | if (ssk->sk_prot->keepalive) | |
88 | ssk->sk_prot->keepalive(ssk, !!val); | |
89 | sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); | |
90 | break; | |
91 | case SO_PRIORITY: | |
10bbf165 | 92 | WRITE_ONCE(ssk->sk_priority, val); |
1b3e7ede | 93 | break; |
5d0a6bc8 FW |
94 | case SO_SNDBUF: |
95 | case SO_SNDBUFFORCE: | |
96 | ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; | |
97 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); | |
8005184f | 98 | mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; |
5d0a6bc8 FW |
99 | break; |
100 | case SO_RCVBUF: | |
101 | case SO_RCVBUFFORCE: | |
102 | ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; | |
103 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); | |
104 | break; | |
36704413 FW |
105 | case SO_MARK: |
106 | if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { | |
3c5b4d69 | 107 | WRITE_ONCE(ssk->sk_mark, sk->sk_mark); |
36704413 FW |
108 | sk_dst_reset(ssk); |
109 | } | |
110 | break; | |
6f0d7198 FW |
111 | case SO_INCOMING_CPU: |
112 | WRITE_ONCE(ssk->sk_incoming_cpu, val); | |
113 | break; | |
1b3e7ede FW |
114 | } |
115 | ||
116 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
117 | unlock_sock_fast(ssk, slow); | |
118 | } | |
119 | ||
120 | release_sock(sk); | |
121 | } | |
122 | ||
123 | static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) | |
124 | { | |
125 | sockptr_t optval = KERNEL_SOCKPTR(&val); | |
126 | struct sock *sk = (struct sock *)msk; | |
127 | int ret; | |
128 | ||
129 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, | |
130 | optval, sizeof(val)); | |
131 | if (ret) | |
132 | return ret; | |
133 | ||
134 | mptcp_sol_socket_sync_intval(msk, optname, val); | |
135 | return 0; | |
136 | } | |
137 | ||
6f0d7198 FW |
138 | static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) |
139 | { | |
140 | struct sock *sk = (struct sock *)msk; | |
141 | ||
142 | WRITE_ONCE(sk->sk_incoming_cpu, val); | |
143 | ||
144 | mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); | |
145 | } | |
146 | ||
9061f24b FW |
147 | static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) |
148 | { | |
149 | sockptr_t optval = KERNEL_SOCKPTR(&val); | |
150 | struct mptcp_subflow_context *subflow; | |
151 | struct sock *sk = (struct sock *)msk; | |
152 | int ret; | |
153 | ||
154 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, | |
155 | optval, sizeof(val)); | |
156 | if (ret) | |
157 | return ret; | |
158 | ||
159 | lock_sock(sk); | |
160 | mptcp_for_each_subflow(msk, subflow) { | |
161 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
162 | bool slow = lock_sock_fast(ssk); | |
163 | ||
6c9a0a0f | 164 | sock_set_timestamp(sk, optname, !!val); |
9061f24b FW |
165 | unlock_sock_fast(ssk, slow); |
166 | } | |
167 | ||
168 | release_sock(sk); | |
169 | return 0; | |
170 | } | |
171 | ||
1b3e7ede | 172 | static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, |
6c9a0a0f YL |
173 | sockptr_t optval, |
174 | unsigned int optlen) | |
1b3e7ede FW |
175 | { |
176 | int val, ret; | |
177 | ||
178 | ret = mptcp_get_int_option(msk, optval, optlen, &val); | |
179 | if (ret) | |
180 | return ret; | |
181 | ||
182 | switch (optname) { | |
183 | case SO_KEEPALIVE: | |
a03c99b2 | 184 | case SO_DEBUG: |
36704413 | 185 | case SO_MARK: |
1b3e7ede | 186 | case SO_PRIORITY: |
5d0a6bc8 FW |
187 | case SO_SNDBUF: |
188 | case SO_SNDBUFFORCE: | |
189 | case SO_RCVBUF: | |
190 | case SO_RCVBUFFORCE: | |
1b3e7ede | 191 | return mptcp_sol_socket_intval(msk, optname, val); |
6f0d7198 FW |
192 | case SO_INCOMING_CPU: |
193 | mptcp_so_incoming_cpu(msk, val); | |
194 | return 0; | |
9061f24b FW |
195 | case SO_TIMESTAMP_OLD: |
196 | case SO_TIMESTAMP_NEW: | |
197 | case SO_TIMESTAMPNS_OLD: | |
198 | case SO_TIMESTAMPNS_NEW: | |
9061f24b | 199 | return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); |
1b3e7ede FW |
200 | } |
201 | ||
202 | return -ENOPROTOOPT; | |
203 | } | |
204 | ||
6c9a0a0f YL |
205 | static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, |
206 | int optname, | |
207 | sockptr_t optval, | |
208 | unsigned int optlen) | |
209 | { | |
210 | struct mptcp_subflow_context *subflow; | |
211 | struct sock *sk = (struct sock *)msk; | |
d463126e YL |
212 | struct so_timestamping timestamping; |
213 | int ret; | |
6c9a0a0f | 214 | |
d463126e YL |
215 | if (optlen == sizeof(timestamping)) { |
216 | if (copy_from_sockptr(×tamping, optval, | |
217 | sizeof(timestamping))) | |
218 | return -EFAULT; | |
219 | } else if (optlen == sizeof(int)) { | |
220 | memset(×tamping, 0, sizeof(timestamping)); | |
221 | ||
222 | if (copy_from_sockptr(×tamping.flags, optval, sizeof(int))) | |
223 | return -EFAULT; | |
224 | } else { | |
225 | return -EINVAL; | |
226 | } | |
6c9a0a0f YL |
227 | |
228 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, | |
d463126e YL |
229 | KERNEL_SOCKPTR(×tamping), |
230 | sizeof(timestamping)); | |
6c9a0a0f YL |
231 | if (ret) |
232 | return ret; | |
233 | ||
234 | lock_sock(sk); | |
235 | ||
236 | mptcp_for_each_subflow(msk, subflow) { | |
237 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
238 | bool slow = lock_sock_fast(ssk); | |
239 | ||
d463126e | 240 | sock_set_timestamping(sk, optname, timestamping); |
6c9a0a0f YL |
241 | unlock_sock_fast(ssk, slow); |
242 | } | |
243 | ||
244 | release_sock(sk); | |
245 | ||
246 | return 0; | |
247 | } | |
248 | ||
268b1238 FW |
249 | static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, |
250 | unsigned int optlen) | |
251 | { | |
252 | struct mptcp_subflow_context *subflow; | |
253 | struct sock *sk = (struct sock *)msk; | |
254 | struct linger ling; | |
255 | sockptr_t kopt; | |
256 | int ret; | |
257 | ||
258 | if (optlen < sizeof(ling)) | |
259 | return -EINVAL; | |
260 | ||
261 | if (copy_from_sockptr(&ling, optval, sizeof(ling))) | |
262 | return -EFAULT; | |
263 | ||
264 | kopt = KERNEL_SOCKPTR(&ling); | |
265 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling)); | |
266 | if (ret) | |
267 | return ret; | |
268 | ||
269 | lock_sock(sk); | |
270 | sockopt_seq_inc(msk); | |
271 | mptcp_for_each_subflow(msk, subflow) { | |
272 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
273 | bool slow = lock_sock_fast(ssk); | |
274 | ||
275 | if (!ling.l_onoff) { | |
276 | sock_reset_flag(ssk, SOCK_LINGER); | |
277 | } else { | |
278 | ssk->sk_lingertime = sk->sk_lingertime; | |
279 | sock_set_flag(ssk, SOCK_LINGER); | |
280 | } | |
281 | ||
282 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
283 | unlock_sock_fast(ssk, slow); | |
284 | } | |
285 | ||
286 | release_sock(sk); | |
287 | return 0; | |
288 | } | |
289 | ||
0abdde82 PA |
290 | static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, |
291 | sockptr_t optval, unsigned int optlen) | |
292 | { | |
293 | struct sock *sk = (struct sock *)msk; | |
f0bc514b | 294 | struct sock *ssk; |
0abdde82 PA |
295 | int ret; |
296 | ||
297 | switch (optname) { | |
298 | case SO_REUSEPORT: | |
299 | case SO_REUSEADDR: | |
5d0a6bc8 FW |
300 | case SO_BINDTODEVICE: |
301 | case SO_BINDTOIFINDEX: | |
0abdde82 | 302 | lock_sock(sk); |
3f326a82 PA |
303 | ssk = __mptcp_nmpc_sk(msk); |
304 | if (IS_ERR(ssk)) { | |
0abdde82 | 305 | release_sock(sk); |
3f326a82 | 306 | return PTR_ERR(ssk); |
0abdde82 PA |
307 | } |
308 | ||
f0bc514b | 309 | ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen); |
0abdde82 PA |
310 | if (ret == 0) { |
311 | if (optname == SO_REUSEPORT) | |
f0bc514b | 312 | sk->sk_reuseport = ssk->sk_reuseport; |
0abdde82 | 313 | else if (optname == SO_REUSEADDR) |
f0bc514b | 314 | sk->sk_reuse = ssk->sk_reuse; |
5d0a6bc8 | 315 | else if (optname == SO_BINDTODEVICE) |
f0bc514b | 316 | sk->sk_bound_dev_if = ssk->sk_bound_dev_if; |
5d0a6bc8 | 317 | else if (optname == SO_BINDTOIFINDEX) |
f0bc514b | 318 | sk->sk_bound_dev_if = ssk->sk_bound_dev_if; |
0abdde82 PA |
319 | } |
320 | release_sock(sk); | |
321 | return ret; | |
1b3e7ede FW |
322 | case SO_KEEPALIVE: |
323 | case SO_PRIORITY: | |
5d0a6bc8 FW |
324 | case SO_SNDBUF: |
325 | case SO_SNDBUFFORCE: | |
326 | case SO_RCVBUF: | |
327 | case SO_RCVBUFFORCE: | |
36704413 | 328 | case SO_MARK: |
6f0d7198 | 329 | case SO_INCOMING_CPU: |
a03c99b2 | 330 | case SO_DEBUG: |
9061f24b FW |
331 | case SO_TIMESTAMP_OLD: |
332 | case SO_TIMESTAMP_NEW: | |
333 | case SO_TIMESTAMPNS_OLD: | |
334 | case SO_TIMESTAMPNS_NEW: | |
6c9a0a0f YL |
335 | return mptcp_setsockopt_sol_socket_int(msk, optname, optval, |
336 | optlen); | |
9061f24b FW |
337 | case SO_TIMESTAMPING_OLD: |
338 | case SO_TIMESTAMPING_NEW: | |
6c9a0a0f YL |
339 | return mptcp_setsockopt_sol_socket_timestamping(msk, optname, |
340 | optval, optlen); | |
268b1238 FW |
341 | case SO_LINGER: |
342 | return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); | |
7a009a70 FW |
343 | case SO_RCVLOWAT: |
344 | case SO_RCVTIMEO_OLD: | |
345 | case SO_RCVTIMEO_NEW: | |
d6ab5ea2 GT |
346 | case SO_SNDTIMEO_OLD: |
347 | case SO_SNDTIMEO_NEW: | |
7a009a70 FW |
348 | case SO_BUSY_POLL: |
349 | case SO_PREFER_BUSY_POLL: | |
350 | case SO_BUSY_POLL_BUDGET: | |
351 | /* No need to copy: only relevant for msk */ | |
352 | return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); | |
a03c99b2 FW |
353 | case SO_NO_CHECK: |
354 | case SO_DONTROUTE: | |
355 | case SO_BROADCAST: | |
356 | case SO_BSDCOMPAT: | |
357 | case SO_PASSCRED: | |
5e2ff670 | 358 | case SO_PASSPIDFD: |
a03c99b2 FW |
359 | case SO_PASSSEC: |
360 | case SO_RXQ_OVFL: | |
361 | case SO_WIFI_STATUS: | |
362 | case SO_NOFCS: | |
363 | case SO_SELECT_ERR_QUEUE: | |
364 | return 0; | |
0abdde82 PA |
365 | } |
366 | ||
7a009a70 FW |
367 | /* SO_OOBINLINE is not supported, let's avoid the related mess |
368 | * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, | |
369 | * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, | |
370 | * we must be careful with subflows | |
371 | * | |
372 | * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks | |
373 | * explicitly the sk_protocol field | |
374 | * | |
375 | * SO_PEEK_OFF is unsupported, as it is for plain TCP | |
376 | * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows | |
377 | * SO_CNX_ADVICE is currently unsupported, could possibly be relevant, | |
378 | * but likely needs careful design | |
379 | * | |
380 | * SO_ZEROCOPY is currently unsupported, TODO in sndmsg | |
381 | * SO_TXTIME is currently unsupported | |
382 | */ | |
383 | ||
384 | return -EOPNOTSUPP; | |
0abdde82 PA |
385 | } |
386 | ||
387 | static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, | |
388 | sockptr_t optval, unsigned int optlen) | |
389 | { | |
390 | struct sock *sk = (struct sock *)msk; | |
391 | int ret = -EOPNOTSUPP; | |
f0bc514b | 392 | struct sock *ssk; |
0abdde82 PA |
393 | |
394 | switch (optname) { | |
395 | case IPV6_V6ONLY: | |
c9406a23 FW |
396 | case IPV6_TRANSPARENT: |
397 | case IPV6_FREEBIND: | |
0abdde82 | 398 | lock_sock(sk); |
3f326a82 PA |
399 | ssk = __mptcp_nmpc_sk(msk); |
400 | if (IS_ERR(ssk)) { | |
0abdde82 | 401 | release_sock(sk); |
3f326a82 | 402 | return PTR_ERR(ssk); |
0abdde82 PA |
403 | } |
404 | ||
f0bc514b | 405 | ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen); |
c9406a23 FW |
406 | if (ret != 0) { |
407 | release_sock(sk); | |
408 | return ret; | |
409 | } | |
410 | ||
411 | sockopt_seq_inc(msk); | |
412 | ||
413 | switch (optname) { | |
414 | case IPV6_V6ONLY: | |
f0bc514b | 415 | sk->sk_ipv6only = ssk->sk_ipv6only; |
c9406a23 FW |
416 | break; |
417 | case IPV6_TRANSPARENT: | |
4bd0623f ED |
418 | inet_assign_bit(TRANSPARENT, sk, |
419 | inet_test_bit(TRANSPARENT, ssk)); | |
c9406a23 FW |
420 | break; |
421 | case IPV6_FREEBIND: | |
3f7e7532 ED |
422 | inet_assign_bit(FREEBIND, sk, |
423 | inet_test_bit(FREEBIND, ssk)); | |
c9406a23 FW |
424 | break; |
425 | } | |
0abdde82 PA |
426 | |
427 | release_sock(sk); | |
428 | break; | |
429 | } | |
430 | ||
431 | return ret; | |
432 | } | |
433 | ||
d9e4c129 PA |
434 | static bool mptcp_supported_sockopt(int level, int optname) |
435 | { | |
d9e4c129 PA |
436 | if (level == SOL_IP) { |
437 | switch (optname) { | |
438 | /* should work fine */ | |
439 | case IP_FREEBIND: | |
440 | case IP_TRANSPARENT: | |
c85636a2 MG |
441 | case IP_BIND_ADDRESS_NO_PORT: |
442 | case IP_LOCAL_PORT_RANGE: | |
d9e4c129 PA |
443 | |
444 | /* the following are control cmsg related */ | |
445 | case IP_PKTINFO: | |
446 | case IP_RECVTTL: | |
447 | case IP_RECVTOS: | |
448 | case IP_RECVOPTS: | |
449 | case IP_RETOPTS: | |
450 | case IP_PASSSEC: | |
451 | case IP_RECVORIGDSTADDR: | |
452 | case IP_CHECKSUM: | |
453 | case IP_RECVFRAGSIZE: | |
454 | ||
455 | /* common stuff that need some love */ | |
456 | case IP_TOS: | |
457 | case IP_TTL: | |
d9e4c129 PA |
458 | case IP_MTU_DISCOVER: |
459 | case IP_RECVERR: | |
460 | ||
461 | /* possibly less common may deserve some love */ | |
462 | case IP_MINTTL: | |
463 | ||
464 | /* the following is apparently a no-op for plain TCP */ | |
465 | case IP_RECVERR_RFC4884: | |
466 | return true; | |
467 | } | |
468 | ||
469 | /* IP_OPTIONS is not supported, needs subflow care */ | |
470 | /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ | |
471 | /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, | |
472 | * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, | |
473 | * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, | |
474 | * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, | |
475 | * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, | |
476 | * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal | |
477 | * with mcast stuff | |
478 | */ | |
479 | /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ | |
480 | return false; | |
481 | } | |
482 | if (level == SOL_IPV6) { | |
483 | switch (optname) { | |
484 | case IPV6_V6ONLY: | |
485 | ||
486 | /* the following are control cmsg related */ | |
487 | case IPV6_RECVPKTINFO: | |
488 | case IPV6_2292PKTINFO: | |
489 | case IPV6_RECVHOPLIMIT: | |
490 | case IPV6_2292HOPLIMIT: | |
491 | case IPV6_RECVRTHDR: | |
492 | case IPV6_2292RTHDR: | |
493 | case IPV6_RECVHOPOPTS: | |
494 | case IPV6_2292HOPOPTS: | |
495 | case IPV6_RECVDSTOPTS: | |
496 | case IPV6_2292DSTOPTS: | |
497 | case IPV6_RECVTCLASS: | |
498 | case IPV6_FLOWINFO: | |
499 | case IPV6_RECVPATHMTU: | |
500 | case IPV6_RECVORIGDSTADDR: | |
501 | case IPV6_RECVFRAGSIZE: | |
502 | ||
503 | /* the following ones need some love but are quite common */ | |
504 | case IPV6_TCLASS: | |
505 | case IPV6_TRANSPARENT: | |
506 | case IPV6_FREEBIND: | |
507 | case IPV6_PKTINFO: | |
508 | case IPV6_2292PKTOPTIONS: | |
509 | case IPV6_UNICAST_HOPS: | |
510 | case IPV6_MTU_DISCOVER: | |
511 | case IPV6_MTU: | |
512 | case IPV6_RECVERR: | |
513 | case IPV6_FLOWINFO_SEND: | |
514 | case IPV6_FLOWLABEL_MGR: | |
515 | case IPV6_MINHOPCOUNT: | |
516 | case IPV6_DONTFRAG: | |
517 | case IPV6_AUTOFLOWLABEL: | |
518 | ||
519 | /* the following one is a no-op for plain TCP */ | |
520 | case IPV6_RECVERR_RFC4884: | |
521 | return true; | |
522 | } | |
523 | ||
524 | /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are | |
525 | * not supported | |
526 | */ | |
527 | /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, | |
528 | * IPV6_MULTICAST_IF, IPV6_ADDRFORM, | |
529 | * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, | |
530 | * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, | |
531 | * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, | |
532 | * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER | |
533 | * are not supported better not deal with mcast | |
534 | */ | |
535 | /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ | |
536 | ||
537 | /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ | |
538 | /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ | |
539 | return false; | |
540 | } | |
541 | if (level == SOL_TCP) { | |
542 | switch (optname) { | |
543 | /* the following are no-op or should work just fine */ | |
544 | case TCP_THIN_DUPACK: | |
545 | case TCP_DEFER_ACCEPT: | |
546 | ||
547 | /* the following need some love */ | |
548 | case TCP_MAXSEG: | |
549 | case TCP_NODELAY: | |
550 | case TCP_THIN_LINEAR_TIMEOUTS: | |
551 | case TCP_CONGESTION: | |
d9e4c129 PA |
552 | case TCP_CORK: |
553 | case TCP_KEEPIDLE: | |
554 | case TCP_KEEPINTVL: | |
555 | case TCP_KEEPCNT: | |
556 | case TCP_SYNCNT: | |
557 | case TCP_SAVE_SYN: | |
558 | case TCP_LINGER2: | |
559 | case TCP_WINDOW_CLAMP: | |
560 | case TCP_QUICKACK: | |
561 | case TCP_USER_TIMEOUT: | |
562 | case TCP_TIMESTAMP: | |
563 | case TCP_NOTSENT_LOWAT: | |
564 | case TCP_TX_DELAY: | |
2c9e7765 | 565 | case TCP_INQ: |
4ffb0a02 | 566 | case TCP_FASTOPEN: |
54635bd0 | 567 | case TCP_FASTOPEN_CONNECT: |
cb99816c | 568 | case TCP_FASTOPEN_KEY: |
e64d4deb | 569 | case TCP_FASTOPEN_NO_COOKIE: |
d9e4c129 PA |
570 | return true; |
571 | } | |
572 | ||
573 | /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ | |
574 | ||
575 | /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, | |
576 | * TCP_REPAIR_WINDOW are not supported, better avoid this mess | |
577 | */ | |
d9e4c129 PA |
578 | } |
579 | return false; | |
580 | } | |
581 | ||
aa1fbd94 FW |
582 | static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, |
583 | unsigned int optlen) | |
584 | { | |
585 | struct mptcp_subflow_context *subflow; | |
586 | struct sock *sk = (struct sock *)msk; | |
587 | char name[TCP_CA_NAME_MAX]; | |
588 | bool cap_net_admin; | |
589 | int ret; | |
590 | ||
591 | if (optlen < 1) | |
592 | return -EINVAL; | |
593 | ||
594 | ret = strncpy_from_sockptr(name, optval, | |
595 | min_t(long, TCP_CA_NAME_MAX - 1, optlen)); | |
596 | if (ret < 0) | |
597 | return -EFAULT; | |
598 | ||
599 | name[ret] = 0; | |
600 | ||
601 | cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); | |
602 | ||
603 | ret = 0; | |
604 | lock_sock(sk); | |
605 | sockopt_seq_inc(msk); | |
606 | mptcp_for_each_subflow(msk, subflow) { | |
607 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
608 | int err; | |
609 | ||
610 | lock_sock(ssk); | |
611 | err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); | |
612 | if (err < 0 && ret == 0) | |
613 | ret = err; | |
614 | subflow->setsockopt_seq = msk->setsockopt_seq; | |
615 | release_sock(ssk); | |
616 | } | |
617 | ||
618 | if (ret == 0) | |
5eae7a82 | 619 | strscpy(msk->ca_name, name, sizeof(msk->ca_name)); |
aa1fbd94 FW |
620 | |
621 | release_sock(sk); | |
622 | return ret; | |
623 | } | |
624 | ||
bd11dc4f MBN |
625 | static int __mptcp_setsockopt_set_val(struct mptcp_sock *msk, int max, |
626 | int (*set_val)(struct sock *, int), | |
627 | int *msk_val, int val) | |
628 | { | |
629 | struct mptcp_subflow_context *subflow; | |
630 | int err = 0; | |
631 | ||
632 | mptcp_for_each_subflow(msk, subflow) { | |
633 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
634 | int ret; | |
635 | ||
636 | lock_sock(ssk); | |
637 | ret = set_val(ssk, val); | |
638 | err = err ? : ret; | |
639 | release_sock(ssk); | |
640 | } | |
641 | ||
642 | if (!err) { | |
643 | *msk_val = val; | |
644 | sockopt_seq_inc(msk); | |
645 | } | |
646 | ||
647 | return err; | |
648 | } | |
649 | ||
7f71a337 | 650 | static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val) |
4f6e14bd MG |
651 | { |
652 | struct mptcp_subflow_context *subflow; | |
653 | struct sock *sk = (struct sock *)msk; | |
4f6e14bd | 654 | |
4f6e14bd MG |
655 | sockopt_seq_inc(msk); |
656 | msk->cork = !!val; | |
657 | mptcp_for_each_subflow(msk, subflow) { | |
658 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
659 | ||
660 | lock_sock(ssk); | |
661 | __tcp_sock_set_cork(ssk, !!val); | |
662 | release_sock(ssk); | |
663 | } | |
664 | if (!val) | |
665 | mptcp_check_and_set_pending(sk); | |
4f6e14bd MG |
666 | |
667 | return 0; | |
668 | } | |
669 | ||
7f71a337 | 670 | static int __mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, int val) |
4f6e14bd MG |
671 | { |
672 | struct mptcp_subflow_context *subflow; | |
673 | struct sock *sk = (struct sock *)msk; | |
4f6e14bd | 674 | |
4f6e14bd MG |
675 | sockopt_seq_inc(msk); |
676 | msk->nodelay = !!val; | |
677 | mptcp_for_each_subflow(msk, subflow) { | |
678 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
679 | ||
680 | lock_sock(ssk); | |
681 | __tcp_sock_set_nodelay(ssk, !!val); | |
682 | release_sock(ssk); | |
683 | } | |
684 | if (val) | |
685 | mptcp_check_and_set_pending(sk); | |
4f6e14bd MG |
686 | return 0; |
687 | } | |
688 | ||
57d3117c MG |
689 | static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname, |
690 | sockptr_t optval, unsigned int optlen) | |
c9406a23 FW |
691 | { |
692 | struct sock *sk = (struct sock *)msk; | |
3f326a82 | 693 | struct sock *ssk; |
c9406a23 FW |
694 | int err; |
695 | ||
696 | err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); | |
697 | if (err != 0) | |
698 | return err; | |
699 | ||
700 | lock_sock(sk); | |
701 | ||
3f326a82 PA |
702 | ssk = __mptcp_nmpc_sk(msk); |
703 | if (IS_ERR(ssk)) { | |
c9406a23 | 704 | release_sock(sk); |
3f326a82 | 705 | return PTR_ERR(ssk); |
c9406a23 FW |
706 | } |
707 | ||
c9406a23 FW |
708 | switch (optname) { |
709 | case IP_FREEBIND: | |
3f7e7532 | 710 | inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); |
c9406a23 FW |
711 | break; |
712 | case IP_TRANSPARENT: | |
4bd0623f ED |
713 | inet_assign_bit(TRANSPARENT, ssk, |
714 | inet_test_bit(TRANSPARENT, sk)); | |
c9406a23 | 715 | break; |
c85636a2 MG |
716 | case IP_BIND_ADDRESS_NO_PORT: |
717 | inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, | |
718 | inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); | |
719 | break; | |
720 | case IP_LOCAL_PORT_RANGE: | |
721 | WRITE_ONCE(inet_sk(ssk)->local_port_range, | |
722 | READ_ONCE(inet_sk(sk)->local_port_range)); | |
723 | break; | |
c9406a23 FW |
724 | default: |
725 | release_sock(sk); | |
726 | WARN_ON_ONCE(1); | |
727 | return -EOPNOTSUPP; | |
728 | } | |
729 | ||
730 | sockopt_seq_inc(msk); | |
731 | release_sock(sk); | |
732 | return 0; | |
733 | } | |
734 | ||
ffcacff8 PS |
735 | static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, |
736 | sockptr_t optval, unsigned int optlen) | |
737 | { | |
738 | struct mptcp_subflow_context *subflow; | |
739 | struct sock *sk = (struct sock *)msk; | |
740 | int err, val; | |
741 | ||
742 | err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); | |
743 | ||
744 | if (err != 0) | |
745 | return err; | |
746 | ||
747 | lock_sock(sk); | |
748 | sockopt_seq_inc(msk); | |
e08d0b3d | 749 | val = READ_ONCE(inet_sk(sk)->tos); |
ffcacff8 PS |
750 | mptcp_for_each_subflow(msk, subflow) { |
751 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
7679d34f | 752 | bool slow; |
ffcacff8 | 753 | |
7679d34f | 754 | slow = lock_sock_fast(ssk); |
878d951c | 755 | __ip_sock_set_tos(ssk, val); |
7679d34f | 756 | unlock_sock_fast(ssk, slow); |
ffcacff8 PS |
757 | } |
758 | release_sock(sk); | |
759 | ||
03e7d28c | 760 | return 0; |
ffcacff8 PS |
761 | } |
762 | ||
763 | static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, | |
764 | sockptr_t optval, unsigned int optlen) | |
765 | { | |
766 | switch (optname) { | |
c9406a23 FW |
767 | case IP_FREEBIND: |
768 | case IP_TRANSPARENT: | |
c85636a2 MG |
769 | case IP_BIND_ADDRESS_NO_PORT: |
770 | case IP_LOCAL_PORT_RANGE: | |
57d3117c | 771 | return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen); |
ffcacff8 PS |
772 | case IP_TOS: |
773 | return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); | |
774 | } | |
775 | ||
776 | return -EOPNOTSUPP; | |
777 | } | |
778 | ||
d3d42904 MB |
779 | static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, |
780 | sockptr_t optval, unsigned int optlen) | |
54635bd0 | 781 | { |
21e43569 | 782 | struct sock *sk = (struct sock *)msk; |
3f326a82 | 783 | struct sock *ssk; |
ddb1a072 | 784 | int ret; |
54635bd0 | 785 | |
d3d42904 | 786 | /* Limit to first subflow, before the connection establishment */ |
21e43569 | 787 | lock_sock(sk); |
3f326a82 PA |
788 | ssk = __mptcp_nmpc_sk(msk); |
789 | if (IS_ERR(ssk)) { | |
790 | ret = PTR_ERR(ssk); | |
21e43569 | 791 | goto unlock; |
ddb1a072 | 792 | } |
54635bd0 | 793 | |
3f326a82 | 794 | ret = tcp_setsockopt(ssk, level, optname, optval, optlen); |
21e43569 PA |
795 | |
796 | unlock: | |
797 | release_sock(sk); | |
798 | return ret; | |
54635bd0 BH |
799 | } |
800 | ||
aa1fbd94 FW |
801 | static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, |
802 | sockptr_t optval, unsigned int optlen) | |
803 | { | |
2c9e7765 FW |
804 | struct sock *sk = (void *)msk; |
805 | int ret, val; | |
806 | ||
aa1fbd94 FW |
807 | switch (optname) { |
808 | case TCP_ULP: | |
809 | return -EOPNOTSUPP; | |
810 | case TCP_CONGESTION: | |
811 | return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); | |
ea1e301d | 812 | case TCP_DEFER_ACCEPT: |
caea6467 MB |
813 | /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ |
814 | mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); | |
815 | return 0; | |
4ffb0a02 | 816 | case TCP_FASTOPEN: |
54635bd0 | 817 | case TCP_FASTOPEN_CONNECT: |
cb99816c | 818 | case TCP_FASTOPEN_KEY: |
e64d4deb | 819 | case TCP_FASTOPEN_NO_COOKIE: |
d3d42904 MB |
820 | return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, |
821 | optval, optlen); | |
aa1fbd94 FW |
822 | } |
823 | ||
7f71a337 PA |
824 | ret = mptcp_get_int_option(msk, optval, optlen, &val); |
825 | if (ret) | |
826 | return ret; | |
827 | ||
828 | lock_sock(sk); | |
829 | switch (optname) { | |
830 | case TCP_INQ: | |
831 | if (val < 0 || val > 1) | |
832 | ret = -EINVAL; | |
833 | else | |
834 | msk->recvmsg_inq = !!val; | |
835 | break; | |
836 | case TCP_NOTSENT_LOWAT: | |
837 | WRITE_ONCE(msk->notsent_lowat, val); | |
838 | mptcp_write_space(sk); | |
839 | break; | |
840 | case TCP_CORK: | |
841 | ret = __mptcp_setsockopt_sol_tcp_cork(msk, val); | |
842 | break; | |
843 | case TCP_NODELAY: | |
844 | ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val); | |
845 | break; | |
bd11dc4f MBN |
846 | case TCP_KEEPIDLE: |
847 | ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPIDLE, | |
848 | &tcp_sock_set_keepidle_locked, | |
849 | &msk->keepalive_idle, val); | |
850 | break; | |
851 | case TCP_KEEPINTVL: | |
852 | ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPINTVL, | |
853 | &tcp_sock_set_keepintvl, | |
854 | &msk->keepalive_intvl, val); | |
855 | break; | |
856 | case TCP_KEEPCNT: | |
857 | ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPCNT, | |
858 | &tcp_sock_set_keepcnt, | |
859 | &msk->keepalive_cnt, | |
860 | val); | |
861 | break; | |
7f71a337 PA |
862 | default: |
863 | ret = -ENOPROTOOPT; | |
864 | } | |
865 | ||
866 | release_sock(sk); | |
867 | return ret; | |
aa1fbd94 FW |
868 | } |
869 | ||
0abdde82 PA |
870 | int mptcp_setsockopt(struct sock *sk, int level, int optname, |
871 | sockptr_t optval, unsigned int optlen) | |
872 | { | |
873 | struct mptcp_sock *msk = mptcp_sk(sk); | |
874 | struct sock *ssk; | |
875 | ||
876 | pr_debug("msk=%p", msk); | |
877 | ||
878 | if (level == SOL_SOCKET) | |
879 | return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); | |
880 | ||
7a009a70 FW |
881 | if (!mptcp_supported_sockopt(level, optname)) |
882 | return -ENOPROTOOPT; | |
883 | ||
0abdde82 PA |
884 | /* @@ the meaning of setsockopt() when the socket is connected and |
885 | * there are multiple subflows is not yet defined. It is up to the | |
886 | * MPTCP-level socket to configure the subflows until the subflow | |
887 | * is in TCP fallback, when TCP socket options are passed through | |
888 | * to the one remaining subflow. | |
889 | */ | |
890 | lock_sock(sk); | |
891 | ssk = __mptcp_tcp_fallback(msk); | |
892 | release_sock(sk); | |
893 | if (ssk) | |
894 | return tcp_setsockopt(ssk, level, optname, optval, optlen); | |
895 | ||
ffcacff8 PS |
896 | if (level == SOL_IP) |
897 | return mptcp_setsockopt_v4(msk, optname, optval, optlen); | |
898 | ||
0abdde82 PA |
899 | if (level == SOL_IPV6) |
900 | return mptcp_setsockopt_v6(msk, optname, optval, optlen); | |
901 | ||
aa1fbd94 FW |
902 | if (level == SOL_TCP) |
903 | return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); | |
904 | ||
905 | return -EOPNOTSUPP; | |
906 | } | |
907 | ||
908 | static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, | |
909 | char __user *optval, int __user *optlen) | |
910 | { | |
911 | struct sock *sk = (struct sock *)msk; | |
aa1fbd94 | 912 | struct sock *ssk; |
f0bc514b | 913 | int ret; |
aa1fbd94 FW |
914 | |
915 | lock_sock(sk); | |
916 | ssk = msk->first; | |
917 | if (ssk) { | |
918 | ret = tcp_getsockopt(ssk, level, optname, optval, optlen); | |
919 | goto out; | |
920 | } | |
921 | ||
3f326a82 PA |
922 | ssk = __mptcp_nmpc_sk(msk); |
923 | if (IS_ERR(ssk)) { | |
924 | ret = PTR_ERR(ssk); | |
aa1fbd94 | 925 | goto out; |
ddb1a072 | 926 | } |
aa1fbd94 | 927 | |
f0bc514b | 928 | ret = tcp_getsockopt(ssk, level, optname, optval, optlen); |
aa1fbd94 FW |
929 | |
930 | out: | |
931 | release_sock(sk); | |
932 | return ret; | |
933 | } | |
934 | ||
61bc6e82 FW |
935 | void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) |
936 | { | |
38967f42 | 937 | struct sock *sk = (struct sock *)msk; |
61bc6e82 | 938 | u32 flags = 0; |
38967f42 | 939 | bool slow; |
18d82cde | 940 | u32 now; |
61bc6e82 | 941 | |
55c42fa7 FW |
942 | memset(info, 0, sizeof(*info)); |
943 | ||
61bc6e82 FW |
944 | info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); |
945 | info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); | |
946 | info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); | |
947 | info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); | |
e925a032 | 948 | |
38967f42 PA |
949 | if (inet_sk_state_load(sk) == TCP_LISTEN) |
950 | return; | |
951 | ||
e925a032 MB |
952 | /* The following limits only make sense for the in-kernel PM */ |
953 | if (mptcp_pm_is_kernel(msk)) { | |
954 | info->mptcpi_subflows_max = | |
955 | mptcp_pm_get_subflows_max(msk); | |
956 | info->mptcpi_add_addr_signal_max = | |
957 | mptcp_pm_get_add_addr_signal_max(msk); | |
958 | info->mptcpi_add_addr_accepted_max = | |
959 | mptcp_pm_get_add_addr_accept_max(msk); | |
960 | info->mptcpi_local_addr_max = | |
961 | mptcp_pm_get_local_addr_max(msk); | |
962 | } | |
963 | ||
83d580dd | 964 | if (__mptcp_check_fallback(msk)) |
61bc6e82 FW |
965 | flags |= MPTCP_INFO_FLAG_FALLBACK; |
966 | if (READ_ONCE(msk->can_ack)) | |
967 | flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; | |
968 | info->mptcpi_flags = flags; | |
38967f42 PA |
969 | |
970 | slow = lock_sock_fast(sk); | |
28e5c138 | 971 | info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); |
38967f42 PA |
972 | info->mptcpi_token = msk->token; |
973 | info->mptcpi_write_seq = msk->write_seq; | |
974 | info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits; | |
975 | info->mptcpi_bytes_sent = msk->bytes_sent; | |
976 | info->mptcpi_bytes_received = msk->bytes_received; | |
977 | info->mptcpi_bytes_retrans = msk->bytes_retrans; | |
6ebf6f90 GT |
978 | info->mptcpi_subflows_total = info->mptcpi_subflows + |
979 | __mptcp_has_initial_subflow(msk); | |
18d82cde GT |
980 | now = tcp_jiffies32; |
981 | info->mptcpi_last_data_sent = jiffies_to_msecs(now - msk->last_data_sent); | |
982 | info->mptcpi_last_data_recv = jiffies_to_msecs(now - msk->last_data_recv); | |
38967f42 | 983 | unlock_sock_fast(sk, slow); |
18d82cde GT |
984 | |
985 | mptcp_data_lock(sk); | |
986 | info->mptcpi_last_ack_recv = jiffies_to_msecs(now - msk->last_ack_recv); | |
987 | info->mptcpi_snd_una = msk->snd_una; | |
988 | info->mptcpi_rcv_nxt = msk->ack_seq; | |
989 | info->mptcpi_bytes_acked = msk->bytes_acked; | |
990 | mptcp_data_unlock(sk); | |
61bc6e82 FW |
991 | } |
992 | EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); | |
993 | ||
55c42fa7 FW |
994 | static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen) |
995 | { | |
996 | struct mptcp_info m_info; | |
997 | int len; | |
998 | ||
999 | if (get_user(len, optlen)) | |
1000 | return -EFAULT; | |
1001 | ||
ce5f6f71 MBN |
1002 | /* When used only to check if a fallback to TCP happened. */ |
1003 | if (len == 0) | |
1004 | return 0; | |
1005 | ||
55c42fa7 FW |
1006 | len = min_t(unsigned int, len, sizeof(struct mptcp_info)); |
1007 | ||
1008 | mptcp_diag_fill_info(msk, &m_info); | |
1009 | ||
1010 | if (put_user(len, optlen)) | |
1011 | return -EFAULT; | |
1012 | ||
1013 | if (copy_to_user(optval, &m_info, len)) | |
1014 | return -EFAULT; | |
1015 | ||
1016 | return 0; | |
1017 | } | |
1018 | ||
06f15cee FW |
1019 | static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, |
1020 | char __user *optval, | |
1021 | u32 copied, | |
1022 | int __user *optlen) | |
1023 | { | |
1024 | u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd)); | |
1025 | ||
1026 | if (copied) | |
1027 | copied += sfd->size_subflow_data; | |
1028 | else | |
1029 | copied = copylen; | |
1030 | ||
1031 | if (put_user(copied, optlen)) | |
1032 | return -EFAULT; | |
1033 | ||
1034 | if (copy_to_user(optval, sfd, copylen)) | |
1035 | return -EFAULT; | |
1036 | ||
1037 | return 0; | |
1038 | } | |
1039 | ||
1040 | static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, | |
49243207 PA |
1041 | char __user *optval, |
1042 | int __user *optlen) | |
06f15cee FW |
1043 | { |
1044 | int len, copylen; | |
1045 | ||
1046 | if (get_user(len, optlen)) | |
1047 | return -EFAULT; | |
1048 | ||
1049 | /* if mptcp_subflow_data size is changed, need to adjust | |
1050 | * this function to deal with programs using old version. | |
1051 | */ | |
1052 | BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE); | |
1053 | ||
1054 | if (len < MIN_INFO_OPTLEN_SIZE) | |
1055 | return -EINVAL; | |
1056 | ||
1057 | memset(sfd, 0, sizeof(*sfd)); | |
1058 | ||
1059 | copylen = min_t(unsigned int, len, sizeof(*sfd)); | |
1060 | if (copy_from_user(sfd, optval, copylen)) | |
1061 | return -EFAULT; | |
1062 | ||
1063 | /* size_subflow_data is u32, but len is signed */ | |
1064 | if (sfd->size_subflow_data > INT_MAX || | |
1065 | sfd->size_user > INT_MAX) | |
1066 | return -EINVAL; | |
1067 | ||
1068 | if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE || | |
1069 | sfd->size_subflow_data > len) | |
1070 | return -EINVAL; | |
1071 | ||
1072 | if (sfd->num_subflows || sfd->size_kernel) | |
1073 | return -EINVAL; | |
1074 | ||
1075 | return len - sfd->size_subflow_data; | |
1076 | } | |
1077 | ||
1078 | static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, | |
1079 | int __user *optlen) | |
1080 | { | |
1081 | struct mptcp_subflow_context *subflow; | |
80638684 | 1082 | struct sock *sk = (struct sock *)msk; |
06f15cee FW |
1083 | unsigned int sfcount = 0, copied = 0; |
1084 | struct mptcp_subflow_data sfd; | |
1085 | char __user *infoptr; | |
1086 | int len; | |
1087 | ||
1088 | len = mptcp_get_subflow_data(&sfd, optval, optlen); | |
1089 | if (len < 0) | |
1090 | return len; | |
1091 | ||
1092 | sfd.size_kernel = sizeof(struct tcp_info); | |
1093 | sfd.size_user = min_t(unsigned int, sfd.size_user, | |
1094 | sizeof(struct tcp_info)); | |
1095 | ||
1096 | infoptr = optval + sfd.size_subflow_data; | |
1097 | ||
1098 | lock_sock(sk); | |
1099 | ||
1100 | mptcp_for_each_subflow(msk, subflow) { | |
1101 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
1102 | ||
1103 | ++sfcount; | |
1104 | ||
1105 | if (len && len >= sfd.size_user) { | |
1106 | struct tcp_info info; | |
1107 | ||
1108 | tcp_get_info(ssk, &info); | |
1109 | ||
1110 | if (copy_to_user(infoptr, &info, sfd.size_user)) { | |
1111 | release_sock(sk); | |
1112 | return -EFAULT; | |
1113 | } | |
1114 | ||
1115 | infoptr += sfd.size_user; | |
1116 | copied += sfd.size_user; | |
1117 | len -= sfd.size_user; | |
1118 | } | |
1119 | } | |
1120 | ||
1121 | release_sock(sk); | |
1122 | ||
1123 | sfd.num_subflows = sfcount; | |
1124 | ||
1125 | if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) | |
1126 | return -EFAULT; | |
1127 | ||
1128 | return 0; | |
1129 | } | |
1130 | ||
c11c5906 FW |
1131 | static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) |
1132 | { | |
abc17a11 | 1133 | const struct inet_sock *inet = inet_sk(sk); |
c11c5906 FW |
1134 | |
1135 | memset(a, 0, sizeof(*a)); | |
1136 | ||
1137 | if (sk->sk_family == AF_INET) { | |
1138 | a->sin_local.sin_family = AF_INET; | |
1139 | a->sin_local.sin_port = inet->inet_sport; | |
1140 | a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr; | |
1141 | ||
1142 | if (!a->sin_local.sin_addr.s_addr) | |
1143 | a->sin_local.sin_addr.s_addr = inet->inet_saddr; | |
1144 | ||
1145 | a->sin_remote.sin_family = AF_INET; | |
1146 | a->sin_remote.sin_port = inet->inet_dport; | |
1147 | a->sin_remote.sin_addr.s_addr = inet->inet_daddr; | |
1148 | #if IS_ENABLED(CONFIG_IPV6) | |
1149 | } else if (sk->sk_family == AF_INET6) { | |
1150 | const struct ipv6_pinfo *np = inet6_sk(sk); | |
1151 | ||
29211e7d TG |
1152 | if (WARN_ON_ONCE(!np)) |
1153 | return; | |
1154 | ||
c11c5906 FW |
1155 | a->sin6_local.sin6_family = AF_INET6; |
1156 | a->sin6_local.sin6_port = inet->inet_sport; | |
1157 | ||
1158 | if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) | |
1159 | a->sin6_local.sin6_addr = np->saddr; | |
1160 | else | |
1161 | a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr; | |
1162 | ||
1163 | a->sin6_remote.sin6_family = AF_INET6; | |
1164 | a->sin6_remote.sin6_port = inet->inet_dport; | |
1165 | a->sin6_remote.sin6_addr = sk->sk_v6_daddr; | |
1166 | #endif | |
1167 | } | |
1168 | } | |
1169 | ||
1170 | static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval, | |
1171 | int __user *optlen) | |
1172 | { | |
c11c5906 | 1173 | struct mptcp_subflow_context *subflow; |
80638684 | 1174 | struct sock *sk = (struct sock *)msk; |
c11c5906 FW |
1175 | unsigned int sfcount = 0, copied = 0; |
1176 | struct mptcp_subflow_data sfd; | |
1177 | char __user *addrptr; | |
1178 | int len; | |
1179 | ||
1180 | len = mptcp_get_subflow_data(&sfd, optval, optlen); | |
1181 | if (len < 0) | |
1182 | return len; | |
1183 | ||
1184 | sfd.size_kernel = sizeof(struct mptcp_subflow_addrs); | |
1185 | sfd.size_user = min_t(unsigned int, sfd.size_user, | |
1186 | sizeof(struct mptcp_subflow_addrs)); | |
1187 | ||
1188 | addrptr = optval + sfd.size_subflow_data; | |
1189 | ||
1190 | lock_sock(sk); | |
1191 | ||
1192 | mptcp_for_each_subflow(msk, subflow) { | |
1193 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
1194 | ||
1195 | ++sfcount; | |
1196 | ||
1197 | if (len && len >= sfd.size_user) { | |
1198 | struct mptcp_subflow_addrs a; | |
1199 | ||
1200 | mptcp_get_sub_addrs(ssk, &a); | |
1201 | ||
1202 | if (copy_to_user(addrptr, &a, sfd.size_user)) { | |
1203 | release_sock(sk); | |
1204 | return -EFAULT; | |
1205 | } | |
1206 | ||
1207 | addrptr += sfd.size_user; | |
1208 | copied += sfd.size_user; | |
1209 | len -= sfd.size_user; | |
1210 | } | |
1211 | } | |
1212 | ||
1213 | release_sock(sk); | |
1214 | ||
1215 | sfd.num_subflows = sfcount; | |
1216 | ||
1217 | if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) | |
1218 | return -EFAULT; | |
1219 | ||
1220 | return 0; | |
1221 | } | |
1222 | ||
49243207 PA |
1223 | static int mptcp_get_full_info(struct mptcp_full_info *mfi, |
1224 | char __user *optval, | |
1225 | int __user *optlen) | |
1226 | { | |
1227 | int len; | |
1228 | ||
1229 | BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) != | |
1230 | MIN_FULL_INFO_OPTLEN_SIZE); | |
1231 | ||
1232 | if (get_user(len, optlen)) | |
1233 | return -EFAULT; | |
1234 | ||
1235 | if (len < MIN_FULL_INFO_OPTLEN_SIZE) | |
1236 | return -EINVAL; | |
1237 | ||
1238 | memset(mfi, 0, sizeof(*mfi)); | |
1239 | if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE)) | |
1240 | return -EFAULT; | |
1241 | ||
1242 | if (mfi->size_tcpinfo_kernel || | |
1243 | mfi->size_sfinfo_kernel || | |
1244 | mfi->num_subflows) | |
1245 | return -EINVAL; | |
1246 | ||
1247 | if (mfi->size_sfinfo_user > INT_MAX || | |
1248 | mfi->size_tcpinfo_user > INT_MAX) | |
1249 | return -EINVAL; | |
1250 | ||
1251 | return len - MIN_FULL_INFO_OPTLEN_SIZE; | |
1252 | } | |
1253 | ||
1254 | static int mptcp_put_full_info(struct mptcp_full_info *mfi, | |
1255 | char __user *optval, | |
1256 | u32 copylen, | |
1257 | int __user *optlen) | |
1258 | { | |
1259 | copylen += MIN_FULL_INFO_OPTLEN_SIZE; | |
1260 | if (put_user(copylen, optlen)) | |
1261 | return -EFAULT; | |
1262 | ||
1263 | if (copy_to_user(optval, mfi, copylen)) | |
1264 | return -EFAULT; | |
1265 | return 0; | |
1266 | } | |
1267 | ||
1268 | static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval, | |
1269 | int __user *optlen) | |
1270 | { | |
1271 | unsigned int sfcount = 0, copylen = 0; | |
1272 | struct mptcp_subflow_context *subflow; | |
1273 | struct sock *sk = (struct sock *)msk; | |
1274 | void __user *tcpinfoptr, *sfinfoptr; | |
1275 | struct mptcp_full_info mfi; | |
1276 | int len; | |
1277 | ||
1278 | len = mptcp_get_full_info(&mfi, optval, optlen); | |
1279 | if (len < 0) | |
1280 | return len; | |
1281 | ||
1282 | /* don't bother filling the mptcp info if there is not enough | |
1283 | * user-space-provided storage | |
1284 | */ | |
1285 | if (len > 0) { | |
1286 | mptcp_diag_fill_info(msk, &mfi.mptcp_info); | |
1287 | copylen += min_t(unsigned int, len, sizeof(struct mptcp_info)); | |
1288 | } | |
1289 | ||
1290 | mfi.size_tcpinfo_kernel = sizeof(struct tcp_info); | |
1291 | mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user, | |
1292 | sizeof(struct tcp_info)); | |
1293 | sfinfoptr = u64_to_user_ptr(mfi.subflow_info); | |
1294 | mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info); | |
1295 | mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user, | |
1296 | sizeof(struct mptcp_subflow_info)); | |
1297 | tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); | |
1298 | ||
1299 | lock_sock(sk); | |
1300 | mptcp_for_each_subflow(msk, subflow) { | |
1301 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
1302 | struct mptcp_subflow_info sfinfo; | |
1303 | struct tcp_info tcp_info; | |
1304 | ||
1305 | if (sfcount++ >= mfi.size_arrays_user) | |
1306 | continue; | |
1307 | ||
1308 | /* fetch addr/tcp_info only if the user space buffers | |
1309 | * are wide enough | |
1310 | */ | |
1311 | memset(&sfinfo, 0, sizeof(sfinfo)); | |
1312 | sfinfo.id = subflow->subflow_id; | |
1313 | if (mfi.size_sfinfo_user > | |
1314 | offsetof(struct mptcp_subflow_info, addrs)) | |
1315 | mptcp_get_sub_addrs(ssk, &sfinfo.addrs); | |
1316 | if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user)) | |
1317 | goto fail_release; | |
1318 | ||
1319 | if (mfi.size_tcpinfo_user) { | |
1320 | tcp_get_info(ssk, &tcp_info); | |
1321 | if (copy_to_user(tcpinfoptr, &tcp_info, | |
1322 | mfi.size_tcpinfo_user)) | |
1323 | goto fail_release; | |
1324 | } | |
1325 | ||
1326 | tcpinfoptr += mfi.size_tcpinfo_user; | |
1327 | sfinfoptr += mfi.size_sfinfo_user; | |
1328 | } | |
1329 | release_sock(sk); | |
1330 | ||
1331 | mfi.num_subflows = sfcount; | |
1332 | if (mptcp_put_full_info(&mfi, optval, copylen, optlen)) | |
1333 | return -EFAULT; | |
1334 | ||
1335 | return 0; | |
1336 | ||
1337 | fail_release: | |
1338 | release_sock(sk); | |
1339 | return -EFAULT; | |
1340 | } | |
1341 | ||
2c9e7765 FW |
1342 | static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, |
1343 | int __user *optlen, int val) | |
1344 | { | |
1345 | int len; | |
1346 | ||
1347 | if (get_user(len, optlen)) | |
1348 | return -EFAULT; | |
2c9e7765 FW |
1349 | if (len < 0) |
1350 | return -EINVAL; | |
1351 | ||
3b1e21eb FW |
1352 | if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { |
1353 | unsigned char ucval = (unsigned char)val; | |
1354 | ||
1355 | len = 1; | |
1356 | if (put_user(len, optlen)) | |
1357 | return -EFAULT; | |
1358 | if (copy_to_user(optval, &ucval, 1)) | |
1359 | return -EFAULT; | |
1360 | } else { | |
1361 | len = min_t(unsigned int, len, sizeof(int)); | |
1362 | if (put_user(len, optlen)) | |
1363 | return -EFAULT; | |
1364 | if (copy_to_user(optval, &val, len)) | |
1365 | return -EFAULT; | |
1366 | } | |
2c9e7765 FW |
1367 | |
1368 | return 0; | |
1369 | } | |
1370 | ||
aa1fbd94 FW |
1371 | static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, |
1372 | char __user *optval, int __user *optlen) | |
1373 | { | |
bd11dc4f MBN |
1374 | struct sock *sk = (void *)msk; |
1375 | ||
aa1fbd94 FW |
1376 | switch (optname) { |
1377 | case TCP_ULP: | |
1378 | case TCP_CONGESTION: | |
1379 | case TCP_INFO: | |
1380 | case TCP_CC_INFO: | |
ea1e301d | 1381 | case TCP_DEFER_ACCEPT: |
4ffb0a02 | 1382 | case TCP_FASTOPEN: |
54635bd0 | 1383 | case TCP_FASTOPEN_CONNECT: |
cb99816c | 1384 | case TCP_FASTOPEN_KEY: |
e64d4deb | 1385 | case TCP_FASTOPEN_NO_COOKIE: |
aa1fbd94 FW |
1386 | return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, |
1387 | optval, optlen); | |
2c9e7765 FW |
1388 | case TCP_INQ: |
1389 | return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq); | |
4f6e14bd MG |
1390 | case TCP_CORK: |
1391 | return mptcp_put_int_option(msk, optval, optlen, msk->cork); | |
1392 | case TCP_NODELAY: | |
1393 | return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); | |
bd11dc4f MBN |
1394 | case TCP_KEEPIDLE: |
1395 | return mptcp_put_int_option(msk, optval, optlen, | |
1396 | msk->keepalive_idle ? : | |
1397 | READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_time) / HZ); | |
1398 | case TCP_KEEPINTVL: | |
1399 | return mptcp_put_int_option(msk, optval, optlen, | |
1400 | msk->keepalive_intvl ? : | |
1401 | READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_intvl) / HZ); | |
1402 | case TCP_KEEPCNT: | |
1403 | return mptcp_put_int_option(msk, optval, optlen, | |
1404 | msk->keepalive_cnt ? : | |
1405 | READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_probes)); | |
29b5e5ef PA |
1406 | case TCP_NOTSENT_LOWAT: |
1407 | return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat); | |
c084ebd7 MBN |
1408 | case TCP_IS_MPTCP: |
1409 | return mptcp_put_int_option(msk, optval, optlen, 1); | |
aa1fbd94 | 1410 | } |
0abdde82 PA |
1411 | return -EOPNOTSUPP; |
1412 | } | |
1413 | ||
3b1e21eb FW |
1414 | static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, |
1415 | char __user *optval, int __user *optlen) | |
1416 | { | |
1417 | struct sock *sk = (void *)msk; | |
1418 | ||
1419 | switch (optname) { | |
1420 | case IP_TOS: | |
e08d0b3d | 1421 | return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos)); |
c85636a2 MG |
1422 | case IP_BIND_ADDRESS_NO_PORT: |
1423 | return mptcp_put_int_option(msk, optval, optlen, | |
1424 | inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); | |
1425 | case IP_LOCAL_PORT_RANGE: | |
1426 | return mptcp_put_int_option(msk, optval, optlen, | |
1427 | READ_ONCE(inet_sk(sk)->local_port_range)); | |
3b1e21eb FW |
1428 | } |
1429 | ||
1430 | return -EOPNOTSUPP; | |
1431 | } | |
1432 | ||
55c42fa7 FW |
1433 | static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, |
1434 | char __user *optval, int __user *optlen) | |
1435 | { | |
1436 | switch (optname) { | |
1437 | case MPTCP_INFO: | |
1438 | return mptcp_getsockopt_info(msk, optval, optlen); | |
49243207 PA |
1439 | case MPTCP_FULL_INFO: |
1440 | return mptcp_getsockopt_full_info(msk, optval, optlen); | |
06f15cee FW |
1441 | case MPTCP_TCPINFO: |
1442 | return mptcp_getsockopt_tcpinfo(msk, optval, optlen); | |
c11c5906 FW |
1443 | case MPTCP_SUBFLOW_ADDRS: |
1444 | return mptcp_getsockopt_subflow_addrs(msk, optval, optlen); | |
55c42fa7 FW |
1445 | } |
1446 | ||
1447 | return -EOPNOTSUPP; | |
1448 | } | |
1449 | ||
0abdde82 PA |
1450 | int mptcp_getsockopt(struct sock *sk, int level, int optname, |
1451 | char __user *optval, int __user *option) | |
1452 | { | |
1453 | struct mptcp_sock *msk = mptcp_sk(sk); | |
1454 | struct sock *ssk; | |
1455 | ||
1456 | pr_debug("msk=%p", msk); | |
1457 | ||
1458 | /* @@ the meaning of setsockopt() when the socket is connected and | |
1459 | * there are multiple subflows is not yet defined. It is up to the | |
1460 | * MPTCP-level socket to configure the subflows until the subflow | |
1461 | * is in TCP fallback, when socket options are passed through | |
1462 | * to the one remaining subflow. | |
1463 | */ | |
1464 | lock_sock(sk); | |
1465 | ssk = __mptcp_tcp_fallback(msk); | |
1466 | release_sock(sk); | |
1467 | if (ssk) | |
1468 | return tcp_getsockopt(ssk, level, optname, optval, option); | |
1469 | ||
3b1e21eb FW |
1470 | if (level == SOL_IP) |
1471 | return mptcp_getsockopt_v4(msk, optname, optval, option); | |
aa1fbd94 FW |
1472 | if (level == SOL_TCP) |
1473 | return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); | |
55c42fa7 FW |
1474 | if (level == SOL_MPTCP) |
1475 | return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option); | |
0abdde82 PA |
1476 | return -EOPNOTSUPP; |
1477 | } | |
1478 | ||
1b3e7ede FW |
1479 | static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) |
1480 | { | |
5d0a6bc8 | 1481 | static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; |
1b3e7ede FW |
1482 | struct sock *sk = (struct sock *)msk; |
1483 | ||
1484 | if (ssk->sk_prot->keepalive) { | |
1485 | if (sock_flag(sk, SOCK_KEEPOPEN)) | |
1486 | ssk->sk_prot->keepalive(ssk, 1); | |
1487 | else | |
1488 | ssk->sk_prot->keepalive(ssk, 0); | |
1489 | } | |
1490 | ||
1491 | ssk->sk_priority = sk->sk_priority; | |
5d0a6bc8 FW |
1492 | ssk->sk_bound_dev_if = sk->sk_bound_dev_if; |
1493 | ssk->sk_incoming_cpu = sk->sk_incoming_cpu; | |
7e9740e0 | 1494 | ssk->sk_ipv6only = sk->sk_ipv6only; |
878d951c | 1495 | __ip_sock_set_tos(ssk, inet_sk(sk)->tos); |
5d0a6bc8 FW |
1496 | |
1497 | if (sk->sk_userlocks & tx_rx_locks) { | |
1498 | ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; | |
8005184f | 1499 | if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) { |
5d0a6bc8 | 1500 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); |
8005184f PA |
1501 | mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; |
1502 | } | |
5d0a6bc8 FW |
1503 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) |
1504 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); | |
1505 | } | |
1506 | ||
1507 | if (sock_flag(sk, SOCK_LINGER)) { | |
1508 | ssk->sk_lingertime = sk->sk_lingertime; | |
1509 | sock_set_flag(ssk, SOCK_LINGER); | |
1510 | } else { | |
1511 | sock_reset_flag(ssk, SOCK_LINGER); | |
1512 | } | |
1513 | ||
1514 | if (sk->sk_mark != ssk->sk_mark) { | |
1515 | ssk->sk_mark = sk->sk_mark; | |
1516 | sk_dst_reset(ssk); | |
1517 | } | |
1518 | ||
1519 | sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); | |
1520 | ||
1521 | if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) | |
20b5759f | 1522 | tcp_set_congestion_control(ssk, msk->ca_name, false, true); |
4f6e14bd MG |
1523 | __tcp_sock_set_cork(ssk, !!msk->cork); |
1524 | __tcp_sock_set_nodelay(ssk, !!msk->nodelay); | |
bd11dc4f MBN |
1525 | tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle); |
1526 | tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl); | |
1527 | tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt); | |
c9406a23 | 1528 | |
4bd0623f | 1529 | inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); |
3f7e7532 | 1530 | inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); |
c85636a2 MG |
1531 | inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); |
1532 | WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range)); | |
1b3e7ede FW |
1533 | } |
1534 | ||
3e501490 | 1535 | void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) |
78962489 | 1536 | { |
3e501490 | 1537 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); |
78962489 | 1538 | |
3e501490 | 1539 | msk_owned_by_me(msk); |
78962489 | 1540 | |
5684ab1a PA |
1541 | ssk->sk_rcvlowat = 0; |
1542 | ||
9fdc7793 PA |
1543 | /* subflows must ignore any latency-related settings: will not affect |
1544 | * the user-space - only the msk is relevant - but will foul the | |
1545 | * mptcp scheduler | |
1546 | */ | |
1547 | tcp_sk(ssk)->notsent_lowat = UINT_MAX; | |
1548 | ||
3e501490 PA |
1549 | if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { |
1550 | sync_socket_options(msk, ssk); | |
78962489 | 1551 | |
3e501490 | 1552 | subflow->setsockopt_seq = msk->setsockopt_seq; |
78962489 FW |
1553 | } |
1554 | } | |
5684ab1a PA |
1555 | |
1556 | /* unfortunately this is different enough from the tcp version so | |
1557 | * that we can't factor it out | |
1558 | */ | |
1559 | int mptcp_set_rcvlowat(struct sock *sk, int val) | |
1560 | { | |
1561 | struct mptcp_subflow_context *subflow; | |
1562 | int space, cap; | |
1563 | ||
fcf4692f PA |
1564 | /* bpf can land here with a wrong sk type */ |
1565 | if (sk->sk_protocol == IPPROTO_TCP) | |
1566 | return -EINVAL; | |
1567 | ||
5684ab1a PA |
1568 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) |
1569 | cap = sk->sk_rcvbuf >> 1; | |
1570 | else | |
1571 | cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; | |
1572 | val = min(val, cap); | |
1573 | WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); | |
1574 | ||
1575 | /* Check if we need to signal EPOLLIN right now */ | |
1576 | if (mptcp_epollin_ready(sk)) | |
1577 | sk->sk_data_ready(sk); | |
1578 | ||
1579 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) | |
1580 | return 0; | |
1581 | ||
1582 | space = __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, val); | |
1583 | if (space <= sk->sk_rcvbuf) | |
1584 | return 0; | |
1585 | ||
1586 | /* propagate the rcvbuf changes to all the subflows */ | |
1587 | WRITE_ONCE(sk->sk_rcvbuf, space); | |
1588 | mptcp_for_each_subflow(mptcp_sk(sk), subflow) { | |
1589 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); | |
1590 | bool slow; | |
1591 | ||
1592 | slow = lock_sock_fast(ssk); | |
1593 | WRITE_ONCE(ssk->sk_rcvbuf, space); | |
f410cbea | 1594 | WRITE_ONCE(tcp_sk(ssk)->window_clamp, val); |
5684ab1a PA |
1595 | unlock_sock_fast(ssk, slow); |
1596 | } | |
1597 | return 0; | |
1598 | } |