]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/shared/local-addresses.c
local-addresses: also save weight of multipath routes
[thirdparty/systemd.git] / src / shared / local-addresses.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
8041b5ba 2
54e6f97b
LP
3#include <net/if_arp.h>
4
1c4baffc 5#include "sd-netlink.h"
b5efdb8a
LP
6
7#include "alloc-util.h"
54e6f97b 8#include "fd-util.h"
e80af1bd 9#include "local-addresses.h"
cf0fbc49
TA
10#include "macro.h"
11#include "netlink-util.h"
760877e9 12#include "sort-util.h"
8041b5ba 13
93bab288
YW
14static int address_compare(const struct local_address *a, const struct local_address *b) {
15 int r;
5502f0d9
LP
16
17 /* Order lowest scope first, IPv4 before IPv6, lowest interface index first */
18
e9140aff
LP
19 if (a->family == AF_INET && b->family == AF_INET6)
20 return -1;
21 if (a->family == AF_INET6 && b->family == AF_INET)
22 return 1;
23
93bab288
YW
24 r = CMP(a->scope, b->scope);
25 if (r != 0)
26 return r;
5502f0d9 27
37359b1c 28 r = CMP(a->priority, b->priority);
93bab288
YW
29 if (r != 0)
30 return r;
5502f0d9 31
eb1f9ed6
YW
32 r = CMP(a->weight, b->weight);
33 if (r != 0)
34 return r;
35
93bab288
YW
36 r = CMP(a->ifindex, b->ifindex);
37 if (r != 0)
38 return r;
5502f0d9 39
00d75e57 40 return memcmp(&a->address, &b->address, FAMILY_ADDRESS_SIZE(a->family));
5502f0d9
LP
41}
42
54e6f97b
LP
43static void suppress_duplicates(struct local_address *list, size_t *n_list) {
44 size_t old_size, new_size;
45
46 /* Removes duplicate entries, assumes the list of addresses is already sorted. Updates in-place. */
47
48 if (*n_list < 2) /* list with less than two entries can't have duplicates */
49 return;
50
51 old_size = *n_list;
52 new_size = 1;
53
54 for (size_t i = 1; i < old_size; i++) {
55
56 if (address_compare(list + i, list + new_size - 1) == 0)
57 continue;
58
59 list[new_size++] = list[i];
60 }
61
62 *n_list = new_size;
63}
64
0b2c0c31
YW
65static int add_local_address_full(
66 struct local_address **list,
67 size_t *n_list,
68 int ifindex,
69 unsigned char scope,
70 uint32_t priority,
eb1f9ed6 71 uint32_t weight,
0b2c0c31
YW
72 int family,
73 const union in_addr_union *address) {
74
75 assert(list);
76 assert(n_list);
77 assert(ifindex > 0);
78 assert(IN_SET(family, AF_INET, AF_INET6));
79 assert(address);
80
81 if (!GREEDY_REALLOC(*list, *n_list + 1))
82 return -ENOMEM;
83
84 (*list)[(*n_list)++] = (struct local_address) {
85 .ifindex = ifindex,
86 .scope = scope,
87 .priority = priority,
eb1f9ed6 88 .weight = weight,
0b2c0c31
YW
89 .family = family,
90 .address = *address,
91 };
92
93 return 1;
94}
95
96static int add_local_address(
97 struct local_address **list,
98 size_t *n_list,
99 int ifindex,
100 unsigned char scope,
101 int family,
102 const union in_addr_union *address) {
103
eb1f9ed6 104 return add_local_address_full(list, n_list, ifindex, scope, 0, 0, family, address);
0b2c0c31
YW
105}
106
54e6f97b
LP
107int local_addresses(
108 sd_netlink *context,
109 int ifindex,
110 int af,
111 struct local_address **ret) {
112
4afd3348
LP
113 _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
114 _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
e80af1bd 115 _cleanup_free_ struct local_address *list = NULL;
319a4f4b 116 size_t n_list = 0;
d1ca51b1 117 int r;
d73c3269 118
ee8c4568 119 if (context)
1c4baffc 120 rtnl = sd_netlink_ref(context);
ee8c4568 121 else {
1c4baffc 122 r = sd_netlink_open(&rtnl);
ee8c4568
LP
123 if (r < 0)
124 return r;
125 }
8041b5ba 126
6a28b78f 127 r = sd_rtnl_message_new_addr(rtnl, &req, RTM_GETADDR, ifindex, af);
d1ca51b1
TG
128 if (r < 0)
129 return r;
8041b5ba 130
24c0f385 131 r = sd_netlink_message_set_request_dump(req, true);
f318f643
YW
132 if (r < 0)
133 return r;
134
1c4baffc 135 r = sd_netlink_call(rtnl, req, 0, &reply);
d1ca51b1
TG
136 if (r < 0)
137 return r;
d1ca51b1 138
d856e1a7 139 for (sd_netlink_message *m = reply; m; m = sd_netlink_message_next(m)) {
0b2c0c31
YW
140 union in_addr_union a;
141 unsigned char flags, scope;
5502f0d9 142 uint16_t type;
1d050e1e 143 int ifi, family;
d1ca51b1 144
1c4baffc 145 r = sd_netlink_message_get_errno(m);
d1ca51b1
TG
146 if (r < 0)
147 return r;
148
1c4baffc 149 r = sd_netlink_message_get_type(m, &type);
d1ca51b1
TG
150 if (r < 0)
151 return r;
d1ca51b1 152 if (type != RTM_NEWADDR)
8041b5ba
LP
153 continue;
154
ee8c4568
LP
155 r = sd_rtnl_message_addr_get_ifindex(m, &ifi);
156 if (r < 0)
157 return r;
1d050e1e
LP
158 if (ifindex > 0 && ifi != ifindex)
159 continue;
ee8c4568 160
1d050e1e
LP
161 r = sd_rtnl_message_addr_get_family(m, &family);
162 if (r < 0)
163 return r;
5cb56068
YW
164 if (!IN_SET(family, AF_INET, AF_INET6))
165 continue;
1d050e1e 166 if (af != AF_UNSPEC && af != family)
ee8c4568
LP
167 continue;
168
5502f0d9 169 r = sd_rtnl_message_addr_get_flags(m, &flags);
d1ca51b1
TG
170 if (r < 0)
171 return r;
e90863f2 172 if ((flags & (IFA_F_DEPRECATED|IFA_F_TENTATIVE)) != 0)
d73c3269 173 continue;
8041b5ba 174
0b2c0c31 175 r = sd_rtnl_message_addr_get_scope(m, &scope);
d1ca51b1
TG
176 if (r < 0)
177 return r;
8041b5ba 178
0b2c0c31 179 if (ifindex == 0 && IN_SET(scope, RT_SCOPE_HOST, RT_SCOPE_NOWHERE))
d73c3269 180 continue;
8041b5ba 181
1d050e1e 182 switch (family) {
5502f0d9 183
d1ca51b1 184 case AF_INET:
0b2c0c31 185 r = sd_netlink_message_read_in_addr(m, IFA_LOCAL, &a.in);
d1ca51b1 186 if (r < 0) {
0b2c0c31 187 r = sd_netlink_message_read_in_addr(m, IFA_ADDRESS, &a.in);
d1ca51b1
TG
188 if (r < 0)
189 continue;
190 }
191 break;
5502f0d9 192
d1ca51b1 193 case AF_INET6:
0b2c0c31 194 r = sd_netlink_message_read_in6_addr(m, IFA_LOCAL, &a.in6);
d1ca51b1 195 if (r < 0) {
0b2c0c31 196 r = sd_netlink_message_read_in6_addr(m, IFA_ADDRESS, &a.in6);
d1ca51b1
TG
197 if (r < 0)
198 continue;
199 }
200 break;
5502f0d9 201
d1ca51b1 202 default:
d73c3269 203 continue;
d73c3269 204 }
8041b5ba 205
0b2c0c31
YW
206 r = add_local_address(&list, &n_list, ifi, scope, family, &a);
207 if (r < 0)
208 return r;
5502f0d9 209 };
8041b5ba 210
a64f6041
YW
211 typesafe_qsort(list, n_list, address_compare);
212 suppress_duplicates(list, &n_list);
213
214 if (ret)
c3a8c6aa 215 *ret = TAKE_PTR(list);
e9140aff
LP
216
217 return (int) n_list;
218}
219
bff94a84
YW
220static int add_local_gateway(
221 struct local_address **list,
222 size_t *n_list,
bff94a84 223 int ifindex,
37359b1c 224 uint32_t priority,
eb1f9ed6 225 uint32_t weight,
0b2c0c31
YW
226 int family,
227 const union in_addr_union *address) {
bff94a84 228
eb1f9ed6 229 return add_local_address_full(list, n_list, ifindex, 0, priority, weight, family, address);
bff94a84
YW
230}
231
54e6f97b
LP
232int local_gateways(
233 sd_netlink *context,
234 int ifindex,
235 int af,
236 struct local_address **ret) {
237
4afd3348
LP
238 _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
239 _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
e9140aff 240 _cleanup_free_ struct local_address *list = NULL;
319a4f4b 241 size_t n_list = 0;
e9140aff
LP
242 int r;
243
5cb56068
YW
244 /* The RTA_VIA attribute is used only for IPv4 routes with an IPv6 gateway. If IPv4 gateways are
245 * requested (af == AF_INET), then we do not return IPv6 gateway addresses. Similary, if IPv6
246 * gateways are requested (af == AF_INET6), then we do not return gateway addresses for IPv4 routes.
247 * So, the RTA_VIA attribute is only parsed when af == AF_UNSPEC. */
248 bool allow_via = af == AF_UNSPEC;
249
e9140aff 250 if (context)
1c4baffc 251 rtnl = sd_netlink_ref(context);
e9140aff 252 else {
1c4baffc 253 r = sd_netlink_open(&rtnl);
e9140aff
LP
254 if (r < 0)
255 return r;
256 }
257
1d050e1e 258 r = sd_rtnl_message_new_route(rtnl, &req, RTM_GETROUTE, af, RTPROT_UNSPEC);
e9140aff
LP
259 if (r < 0)
260 return r;
261
3e0eeb8e
YW
262 r = sd_rtnl_message_route_set_type(req, RTN_UNICAST);
263 if (r < 0)
264 return r;
265
266 r = sd_rtnl_message_route_set_table(req, RT_TABLE_MAIN);
267 if (r < 0)
268 return r;
269
24c0f385 270 r = sd_netlink_message_set_request_dump(req, true);
e9140aff
LP
271 if (r < 0)
272 return r;
273
1c4baffc 274 r = sd_netlink_call(rtnl, req, 0, &reply);
e9140aff
LP
275 if (r < 0)
276 return r;
277
bff94a84 278 for (sd_netlink_message *m = reply; m; m = sd_netlink_message_next(m)) {
e9140aff 279 uint16_t type;
d1b014df 280 unsigned char dst_len, src_len, table;
37359b1c 281 uint32_t ifi = 0, priority = 0;
1d050e1e 282 int family;
e9140aff 283
1c4baffc 284 r = sd_netlink_message_get_errno(m);
e9140aff
LP
285 if (r < 0)
286 return r;
287
1c4baffc 288 r = sd_netlink_message_get_type(m, &type);
e9140aff
LP
289 if (r < 0)
290 return r;
e9140aff
LP
291 if (type != RTM_NEWROUTE)
292 continue;
293
a98433c0 294 /* We only care for default routes */
584d0d2a 295 r = sd_rtnl_message_route_get_dst_prefixlen(m, &dst_len);
e9140aff
LP
296 if (r < 0)
297 return r;
e9140aff
LP
298 if (dst_len != 0)
299 continue;
300
584d0d2a 301 r = sd_rtnl_message_route_get_src_prefixlen(m, &src_len);
a98433c0
LP
302 if (r < 0)
303 return r;
304 if (src_len != 0)
305 continue;
306
d1b014df
LP
307 r = sd_rtnl_message_route_get_table(m, &table);
308 if (r < 0)
309 return r;
310 if (table != RT_TABLE_MAIN)
311 continue;
312
37359b1c 313 r = sd_netlink_message_read_u32(m, RTA_PRIORITY, &priority);
bff94a84 314 if (r < 0 && r != -ENODATA)
e9140aff 315 return r;
e9140aff 316
1d050e1e
LP
317 r = sd_rtnl_message_route_get_family(m, &family);
318 if (r < 0)
319 return r;
bff94a84 320 if (!IN_SET(family, AF_INET, AF_INET6))
1d050e1e 321 continue;
5cb56068
YW
322 if (af != AF_UNSPEC && af != family)
323 continue;
1d050e1e 324
bff94a84
YW
325 r = sd_netlink_message_read_u32(m, RTA_OIF, &ifi);
326 if (r < 0 && r != -ENODATA)
327 return r;
328 if (r >= 0) {
329 if (ifi <= 0)
330 return -EINVAL;
331 if (ifindex > 0 && (int) ifi != ifindex)
332 continue;
e9140aff 333
4019bec8 334 union in_addr_union gateway;
bff94a84
YW
335 r = netlink_message_read_in_addr_union(m, RTA_GATEWAY, family, &gateway);
336 if (r < 0 && r != -ENODATA)
337 return r;
338 if (r >= 0) {
eb1f9ed6 339 r = add_local_gateway(&list, &n_list, ifi, priority, 0, family, &gateway);
bff94a84
YW
340 if (r < 0)
341 return r;
e9140aff 342
e9140aff 343 continue;
bff94a84 344 }
e9140aff 345
5cb56068
YW
346 if (!allow_via)
347 continue;
348
bff94a84 349 if (family != AF_INET)
e9140aff
LP
350 continue;
351
4019bec8 352 RouteVia via;
bff94a84
YW
353 r = sd_netlink_message_read(m, RTA_VIA, sizeof(via), &via);
354 if (r < 0 && r != -ENODATA)
355 return r;
356 if (r >= 0) {
5cb56068
YW
357 if (via.family != AF_INET6)
358 return -EBADMSG;
359
eb1f9ed6 360 r = add_local_gateway(&list, &n_list, ifi, priority, 0, via.family,
0b2c0c31 361 &(union in_addr_union) { .in6 = via.address.in6 });
bff94a84
YW
362 if (r < 0)
363 return r;
bff94a84 364 }
1305fe4e
YW
365
366 /* If the route has RTA_OIF, it does not have RTA_MULTIPATH. */
367 continue;
e9140aff
LP
368 }
369
4019bec8
YW
370 size_t rta_len;
371 _cleanup_free_ void *rta_multipath = NULL;
bff94a84
YW
372 r = sd_netlink_message_read_data(m, RTA_MULTIPATH, &rta_len, &rta_multipath);
373 if (r < 0 && r != -ENODATA)
374 return r;
375 if (r >= 0) {
4019bec8 376 _cleanup_ordered_set_free_free_ OrderedSet *multipath_routes = NULL;
bff94a84 377 MultipathRoute *mr;
e9140aff 378
bff94a84
YW
379 r = rtattr_read_nexthop(rta_multipath, rta_len, family, &multipath_routes);
380 if (r < 0)
381 return r;
e9140aff 382
bff94a84
YW
383 ORDERED_SET_FOREACH(mr, multipath_routes) {
384 if (ifindex > 0 && mr->ifindex != ifindex)
385 continue;
386
5cb56068
YW
387 if (!allow_via && family != mr->gateway.family)
388 continue;
389
0b2c0c31 390 union in_addr_union a = mr->gateway.address;
eb1f9ed6 391 r = add_local_gateway(&list, &n_list, ifi, priority, mr->weight, mr->gateway.family, &a);
bff94a84
YW
392 if (r < 0)
393 return r;
394 }
395 }
e9140aff
LP
396 }
397
a64f6041
YW
398 typesafe_qsort(list, n_list, address_compare);
399 suppress_duplicates(list, &n_list);
400
401 if (ret)
54e6f97b 402 *ret = TAKE_PTR(list);
54e6f97b
LP
403
404 return (int) n_list;
405}
406
0b2c0c31
YW
407static int add_local_outbound(
408 struct local_address **list,
409 size_t *n_list,
410 int ifindex,
411 int family,
412 const union in_addr_union *address) {
413
eb1f9ed6 414 return add_local_address_full(list, n_list, ifindex, 0, 0, 0, family, address);
0b2c0c31
YW
415}
416
54e6f97b
LP
417int local_outbounds(
418 sd_netlink *context,
419 int ifindex,
420 int af,
421 struct local_address **ret) {
422
423 _cleanup_free_ struct local_address *list = NULL, *gateways = NULL;
319a4f4b 424 size_t n_list = 0;
54e6f97b
LP
425 int r, n_gateways;
426
427 /* Determines our default outbound addresses, i.e. the "primary" local addresses we use to talk to IP
428 * addresses behind the default routes. This is still an address of the local host (i.e. this doesn't
429 * resolve NAT or so), but it's the set of addresses the local IP stack most likely uses to talk to
430 * other hosts.
431 *
432 * This works by connect()ing a SOCK_DGRAM socket to the local gateways, and then reading the IP
433 * address off the socket that was chosen for the routing decision. */
434
435 n_gateways = local_gateways(context, ifindex, af, &gateways);
436 if (n_gateways < 0)
437 return n_gateways;
438 if (n_gateways == 0) {
439 /* No gateways? Then we have no outbound addresses either. */
440 if (ret)
441 *ret = NULL;
442
443 return 0;
444 }
445
446 for (int i = 0; i < n_gateways; i++) {
254d1313 447 _cleanup_close_ int fd = -EBADF;
54e6f97b
LP
448 union sockaddr_union sa;
449 socklen_t salen;
450
451 fd = socket(gateways[i].family, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
452 if (fd < 0)
453 return -errno;
454
455 switch (gateways[i].family) {
456
457 case AF_INET:
458 sa.in = (struct sockaddr_in) {
459 .sin_family = AF_INET,
460 .sin_addr = gateways[i].address.in,
098d42b6
YW
461 .sin_port = htobe16(53), /* doesn't really matter which port we pick —
462 * we just care about the routing decision */
54e6f97b
LP
463 };
464
465 break;
466
467 case AF_INET6:
468 sa.in6 = (struct sockaddr_in6) {
469 .sin6_family = AF_INET6,
470 .sin6_addr = gateways[i].address.in6,
471 .sin6_port = htobe16(53),
472 .sin6_scope_id = gateways[i].ifindex,
473 };
474
475 break;
476
477 default:
04499a70 478 assert_not_reached();
54e6f97b
LP
479 }
480
481 /* So ideally we'd just use IP_UNICAST_IF here to pass the ifindex info to the kernel before
482 * connect()ing, sot that it influences the routing decision. However, on current kernels
483 * IP_UNICAST_IF doesn't actually influence the routing decision for UDP — which I think
484 * should probably just be considered a bug. Once that bug is fixed this is the best API to
485 * use, since it is the most lightweight. */
486 r = socket_set_unicast_if(fd, gateways[i].family, gateways[i].ifindex);
487 if (r < 0)
488 log_debug_errno(r, "Failed to set unicast interface index %i, ignoring: %m", gateways[i].ifindex);
489
490 /* We'll also use SO_BINDTOINDEX. This requires CAP_NET_RAW on old kernels, hence there's a
491 * good chance this fails. Since 5.7 this restriction was dropped and the first
492 * SO_BINDTOINDEX on a socket may be done without privileges. This one has the benefit of
493 * really influencing the routing decision, i.e. this one definitely works for us — as long
098d42b6 494 * as we have the privileges for it. */
54e6f97b
LP
495 r = socket_bind_to_ifindex(fd, gateways[i].ifindex);
496 if (r < 0)
497 log_debug_errno(r, "Failed to bind socket to interface %i, ignoring: %m", gateways[i].ifindex);
498
499 /* Let's now connect() to the UDP socket, forcing the kernel to make a routing decision and
500 * auto-bind the socket. We ignore failures on this, since that failure might happen for a
501 * multitude of reasons (policy/firewall issues, who knows?) and some of them might be
502 * *after* the routing decision and the auto-binding already took place. If so we can still
503 * make use of the binding and return it. Hence, let's not unnecessarily fail early here: we
504 * can still easily detect if the auto-binding worked or not, by comparing the bound IP
098d42b6 505 * address with zero — which we do below. */
54e6f97b
LP
506 if (connect(fd, &sa.sa, SOCKADDR_LEN(sa)) < 0)
507 log_debug_errno(errno, "Failed to connect SOCK_DGRAM socket to gateway, ignoring: %m");
508
509 /* Let's now read the socket address of the socket. A routing decision should have been
510 * made. Let's verify that and use the data. */
511 salen = SOCKADDR_LEN(sa);
512 if (getsockname(fd, &sa.sa, &salen) < 0)
513 return -errno;
514 assert(sa.sa.sa_family == gateways[i].family);
515 assert(salen == SOCKADDR_LEN(sa));
516
517 switch (gateways[i].family) {
518
519 case AF_INET:
520 if (in4_addr_is_null(&sa.in.sin_addr)) /* Auto-binding didn't work. :-( */
521 continue;
522
0b2c0c31
YW
523 r = add_local_outbound(&list, &n_list, gateways[i].ifindex, gateways[i].family,
524 &(union in_addr_union) { .in = sa.in.sin_addr });
525 if (r < 0)
526 return r;
54e6f97b
LP
527 break;
528
529 case AF_INET6:
530 if (in6_addr_is_null(&sa.in6.sin6_addr))
531 continue;
532
0b2c0c31
YW
533 r = add_local_outbound(&list, &n_list, gateways[i].ifindex, gateways[i].family,
534 &(union in_addr_union) { .in6 = sa.in6.sin6_addr });
535 if (r < 0)
536 return r;
54e6f97b
LP
537 break;
538
539 default:
04499a70 540 assert_not_reached();
54e6f97b
LP
541 }
542 }
543
a64f6041
YW
544 typesafe_qsort(list, n_list, address_compare);
545 suppress_duplicates(list, &n_list);
546
547 if (ret)
c3a8c6aa 548 *ret = TAKE_PTR(list);
d73c3269 549
e80af1bd 550 return (int) n_list;
8041b5ba 551}