]> git.ipfire.org Git - thirdparty/systemd.git/blame_incremental - src/shared/local-addresses.c
man/systemd-sysext: list ephemeral/ephemeral-import in the list of options
[thirdparty/systemd.git] / src / shared / local-addresses.c
... / ...
CommitLineData
1/* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3#include "sd-netlink.h"
4
5#include "alloc-util.h"
6#include "fd-util.h"
7#include "local-addresses.h"
8#include "log.h"
9#include "netlink-util.h"
10#include "socket-util.h"
11#include "sort-util.h"
12
13static int address_compare(const struct local_address *a, const struct local_address *b) {
14 int r;
15
16 /* Order lowest scope first, IPv4 before IPv6, lowest interface index first */
17
18 if (a->family == AF_INET && b->family == AF_INET6)
19 return -1;
20 if (a->family == AF_INET6 && b->family == AF_INET)
21 return 1;
22
23 r = CMP(a->scope, b->scope);
24 if (r != 0)
25 return r;
26
27 r = CMP(a->priority, b->priority);
28 if (r != 0)
29 return r;
30
31 r = CMP(a->weight, b->weight);
32 if (r != 0)
33 return r;
34
35 r = CMP(a->ifindex, b->ifindex);
36 if (r != 0)
37 return r;
38
39 return memcmp(&a->address, &b->address, FAMILY_ADDRESS_SIZE(a->family));
40}
41
42bool has_local_address(const struct local_address *addresses, size_t n_addresses, const struct local_address *needle) {
43 assert(addresses || n_addresses == 0);
44 assert(needle);
45
46 FOREACH_ARRAY(i, addresses, n_addresses)
47 if (address_compare(i, needle) == 0)
48 return true;
49
50 return false;
51}
52
53static void suppress_duplicates(struct local_address *list, size_t *n_list) {
54 size_t old_size, new_size;
55
56 /* Removes duplicate entries, assumes the list of addresses is already sorted. Updates in-place. */
57
58 if (*n_list < 2) /* list with less than two entries can't have duplicates */
59 return;
60
61 old_size = *n_list;
62 new_size = 1;
63
64 for (size_t i = 1; i < old_size; i++) {
65
66 if (address_compare(list + i, list + new_size - 1) == 0)
67 continue;
68
69 list[new_size++] = list[i];
70 }
71
72 *n_list = new_size;
73}
74
75static int add_local_address_full(
76 struct local_address **list,
77 size_t *n_list,
78 int ifindex,
79 unsigned char scope,
80 uint32_t priority,
81 uint32_t weight,
82 int family,
83 const union in_addr_union *address,
84 const union in_addr_union *prefsrc) {
85
86 assert(list);
87 assert(n_list);
88 assert(ifindex > 0);
89 assert(IN_SET(family, AF_INET, AF_INET6));
90 assert(address);
91
92 if (!GREEDY_REALLOC(*list, *n_list + 1))
93 return -ENOMEM;
94
95 (*list)[(*n_list)++] = (struct local_address) {
96 .ifindex = ifindex,
97 .scope = scope,
98 .priority = priority,
99 .weight = weight,
100 .family = family,
101 .address = *address,
102 .prefsrc = prefsrc ? *prefsrc : IN_ADDR_NULL,
103 };
104
105 return 1;
106}
107
108int add_local_address(
109 struct local_address **list,
110 size_t *n_list,
111 int ifindex,
112 unsigned char scope,
113 int family,
114 const union in_addr_union *address) {
115
116 return add_local_address_full(
117 list, n_list, ifindex,
118 scope, /* priority = */ 0, /* weight = */ 0,
119 family, address, /* prefsrc = */ NULL);
120}
121
122int local_addresses(
123 sd_netlink *context,
124 int ifindex,
125 int af,
126 struct local_address **ret) {
127
128 _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
129 _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
130 _cleanup_free_ struct local_address *list = NULL;
131 size_t n_list = 0;
132 int r;
133
134 if (context)
135 rtnl = sd_netlink_ref(context);
136 else {
137 r = sd_netlink_open(&rtnl);
138 if (r < 0)
139 return r;
140 }
141
142 r = sd_rtnl_message_new_addr(rtnl, &req, RTM_GETADDR, ifindex, af);
143 if (r < 0)
144 return r;
145
146 r = sd_netlink_message_set_request_dump(req, true);
147 if (r < 0)
148 return r;
149
150 r = sd_netlink_call(rtnl, req, 0, &reply);
151 if (r < 0)
152 return r;
153
154 for (sd_netlink_message *m = reply; m; m = sd_netlink_message_next(m)) {
155 union in_addr_union a;
156 unsigned char scope;
157 uint16_t type;
158 int ifi, family;
159
160 r = sd_netlink_message_get_errno(m);
161 if (r < 0)
162 return r;
163
164 r = sd_netlink_message_get_type(m, &type);
165 if (r < 0)
166 return r;
167 if (type != RTM_NEWADDR)
168 continue;
169
170 r = sd_rtnl_message_addr_get_ifindex(m, &ifi);
171 if (r < 0)
172 return r;
173 if (ifindex > 0 && ifi != ifindex)
174 continue;
175
176 r = sd_rtnl_message_addr_get_family(m, &family);
177 if (r < 0)
178 return r;
179 if (!IN_SET(family, AF_INET, AF_INET6))
180 continue;
181 if (af != AF_UNSPEC && af != family)
182 continue;
183
184 uint32_t flags;
185 r = sd_netlink_message_read_u32(m, IFA_FLAGS, &flags);
186 if (r < 0)
187 return r;
188 if ((flags & (IFA_F_DEPRECATED|IFA_F_TENTATIVE)) != 0)
189 continue;
190
191 r = sd_rtnl_message_addr_get_scope(m, &scope);
192 if (r < 0)
193 return r;
194
195 if (ifindex == 0 && IN_SET(scope, RT_SCOPE_HOST, RT_SCOPE_NOWHERE))
196 continue;
197
198 switch (family) {
199
200 case AF_INET:
201 r = sd_netlink_message_read_in_addr(m, IFA_LOCAL, &a.in);
202 if (r < 0) {
203 r = sd_netlink_message_read_in_addr(m, IFA_ADDRESS, &a.in);
204 if (r < 0)
205 continue;
206 }
207 break;
208
209 case AF_INET6:
210 r = sd_netlink_message_read_in6_addr(m, IFA_LOCAL, &a.in6);
211 if (r < 0) {
212 r = sd_netlink_message_read_in6_addr(m, IFA_ADDRESS, &a.in6);
213 if (r < 0)
214 continue;
215 }
216 break;
217
218 default:
219 assert_not_reached();
220 }
221
222 r = add_local_address(&list, &n_list, ifi, scope, family, &a);
223 if (r < 0)
224 return r;
225 };
226
227 typesafe_qsort(list, n_list, address_compare);
228 suppress_duplicates(list, &n_list);
229
230 if (ret)
231 *ret = TAKE_PTR(list);
232
233 return (int) n_list;
234}
235
236static int add_local_gateway(
237 struct local_address **list,
238 size_t *n_list,
239 int ifindex,
240 uint32_t priority,
241 uint32_t weight,
242 int family,
243 const union in_addr_union *address,
244 const union in_addr_union *prefsrc) {
245
246 return add_local_address_full(
247 list, n_list,
248 ifindex,
249 /* scope = */ 0, priority, weight,
250 family, address, prefsrc);
251}
252
253static int parse_nexthop_one(
254 struct local_address **list,
255 size_t *n_list,
256 bool allow_via,
257 int family,
258 uint32_t priority,
259 const union in_addr_union *prefsrc,
260 const struct rtnexthop *rtnh) {
261
262 bool has_gw = false;
263 int r;
264
265 assert(rtnh);
266
267 size_t len = rtnh->rtnh_len - sizeof(struct rtnexthop);
268 for (struct rtattr *attr = RTNH_DATA(rtnh); RTA_OK(attr, len); attr = RTA_NEXT(attr, len))
269
270 switch (attr->rta_type) {
271 case RTA_GATEWAY:
272 if (has_gw)
273 return -EBADMSG;
274
275 has_gw = true;
276
277 if (attr->rta_len != RTA_LENGTH(FAMILY_ADDRESS_SIZE(family)))
278 return -EBADMSG;
279
280 union in_addr_union a;
281 memcpy(&a, RTA_DATA(attr), FAMILY_ADDRESS_SIZE(family));
282 r = add_local_gateway(list, n_list, rtnh->rtnh_ifindex, priority, rtnh->rtnh_hops, family, &a, prefsrc);
283 if (r < 0)
284 return r;
285
286 break;
287
288 case RTA_VIA:
289 if (has_gw)
290 return -EBADMSG;
291
292 has_gw = true;
293
294 if (!allow_via)
295 continue;
296
297 if (family != AF_INET)
298 return -EBADMSG; /* RTA_VIA is only supported for IPv4 routes. */
299
300 if (attr->rta_len != RTA_LENGTH(sizeof(RouteVia)))
301 return -EBADMSG;
302
303 RouteVia *via = RTA_DATA(attr);
304 if (via->family != AF_INET6)
305 return -EBADMSG; /* gateway address should be always IPv6. */
306
307 r = add_local_gateway(list, n_list, rtnh->rtnh_ifindex, priority, rtnh->rtnh_hops, via->family,
308 &(union in_addr_union) { .in6 = via->address.in6 },
309 /* prefsrc = */ NULL);
310 if (r < 0)
311 return r;
312
313 break;
314 }
315
316 return 0;
317}
318
319static int parse_nexthops(
320 struct local_address **list,
321 size_t *n_list,
322 int ifindex,
323 bool allow_via,
324 int family,
325 uint32_t priority,
326 const union in_addr_union *prefsrc,
327 const struct rtnexthop *rtnh,
328 size_t size) {
329
330 int r;
331
332 assert(list);
333 assert(n_list);
334 assert(IN_SET(family, AF_INET, AF_INET6));
335 assert(rtnh || size == 0);
336
337 if (size < sizeof(struct rtnexthop))
338 return -EBADMSG;
339
340 for (; size >= sizeof(struct rtnexthop); ) {
341 if (NLMSG_ALIGN(rtnh->rtnh_len) > size)
342 return -EBADMSG;
343
344 if (rtnh->rtnh_len < sizeof(struct rtnexthop))
345 return -EBADMSG;
346
347 if (ifindex > 0 && rtnh->rtnh_ifindex != ifindex)
348 goto next_nexthop;
349
350 r = parse_nexthop_one(list, n_list, allow_via, family, priority, prefsrc, rtnh);
351 if (r < 0)
352 return r;
353
354 next_nexthop:
355 size -= NLMSG_ALIGN(rtnh->rtnh_len);
356 rtnh = RTNH_NEXT(rtnh);
357 }
358
359 return 0;
360}
361
362int local_gateways(
363 sd_netlink *context,
364 int ifindex,
365 int af,
366 struct local_address **ret) {
367
368 _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
369 _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
370 _cleanup_free_ struct local_address *list = NULL;
371 size_t n_list = 0;
372 int r;
373
374 /* The RTA_VIA attribute is used only for IPv4 routes with an IPv6 gateway. If IPv4 gateways are
375 * requested (af == AF_INET), then we do not return IPv6 gateway addresses. Similarly, if IPv6
376 * gateways are requested (af == AF_INET6), then we do not return gateway addresses for IPv4 routes.
377 * So, the RTA_VIA attribute is only parsed when af == AF_UNSPEC. */
378 bool allow_via = af == AF_UNSPEC;
379
380 if (context)
381 rtnl = sd_netlink_ref(context);
382 else {
383 r = sd_netlink_open(&rtnl);
384 if (r < 0)
385 return r;
386 }
387
388 r = sd_rtnl_message_new_route(rtnl, &req, RTM_GETROUTE, af, RTPROT_UNSPEC);
389 if (r < 0)
390 return r;
391
392 r = sd_rtnl_message_route_set_type(req, RTN_UNICAST);
393 if (r < 0)
394 return r;
395
396 r = sd_rtnl_message_route_set_table(req, RT_TABLE_MAIN);
397 if (r < 0)
398 return r;
399
400 r = sd_netlink_message_set_request_dump(req, true);
401 if (r < 0)
402 return r;
403
404 r = sd_netlink_call(rtnl, req, 0, &reply);
405 if (r < 0)
406 return r;
407
408 for (sd_netlink_message *m = reply; m; m = sd_netlink_message_next(m)) {
409 union in_addr_union prefsrc = IN_ADDR_NULL;
410 uint16_t type;
411 unsigned char dst_len, src_len, table;
412 uint32_t ifi = 0, priority = 0;
413 int family;
414
415 r = sd_netlink_message_get_errno(m);
416 if (r < 0)
417 return r;
418
419 r = sd_netlink_message_get_type(m, &type);
420 if (r < 0)
421 return r;
422 if (type != RTM_NEWROUTE)
423 continue;
424
425 /* We only care for default routes */
426 r = sd_rtnl_message_route_get_dst_prefixlen(m, &dst_len);
427 if (r < 0)
428 return r;
429 if (dst_len != 0)
430 continue;
431
432 r = sd_rtnl_message_route_get_src_prefixlen(m, &src_len);
433 if (r < 0)
434 return r;
435 if (src_len != 0)
436 continue;
437
438 r = sd_rtnl_message_route_get_table(m, &table);
439 if (r < 0)
440 return r;
441 if (table != RT_TABLE_MAIN)
442 continue;
443
444 r = sd_netlink_message_read_u32(m, RTA_PRIORITY, &priority);
445 if (r < 0 && r != -ENODATA)
446 return r;
447
448 r = sd_rtnl_message_route_get_family(m, &family);
449 if (r < 0)
450 return r;
451 if (!IN_SET(family, AF_INET, AF_INET6))
452 continue;
453 if (af != AF_UNSPEC && af != family)
454 continue;
455
456 r = netlink_message_read_in_addr_union(m, RTA_PREFSRC, family, &prefsrc);
457 if (r < 0 && r != -ENODATA)
458 return r;
459
460 r = sd_netlink_message_read_u32(m, RTA_OIF, &ifi);
461 if (r < 0 && r != -ENODATA)
462 return r;
463 if (r >= 0) {
464 if (ifi <= 0)
465 return -EINVAL;
466 if (ifindex > 0 && (int) ifi != ifindex)
467 continue;
468
469 union in_addr_union gateway;
470 r = netlink_message_read_in_addr_union(m, RTA_GATEWAY, family, &gateway);
471 if (r < 0 && r != -ENODATA)
472 return r;
473 if (r >= 0) {
474 r = add_local_gateway(&list, &n_list, ifi, priority, 0, family, &gateway, &prefsrc);
475 if (r < 0)
476 return r;
477
478 continue;
479 }
480
481 if (!allow_via)
482 continue;
483
484 if (family != AF_INET)
485 continue;
486
487 RouteVia via;
488 r = sd_netlink_message_read(m, RTA_VIA, sizeof(via), &via);
489 if (r < 0 && r != -ENODATA)
490 return r;
491 if (r >= 0) {
492 if (via.family != AF_INET6)
493 return -EBADMSG;
494
495 /* Ignore prefsrc, and let's take the source address by socket command, if necessary. */
496 r = add_local_gateway(&list, &n_list, ifi, priority, 0, via.family,
497 &(union in_addr_union) { .in6 = via.address.in6 },
498 /* prefsrc = */ NULL);
499 if (r < 0)
500 return r;
501 }
502
503 /* If the route has RTA_OIF, it does not have RTA_MULTIPATH. */
504 continue;
505 }
506
507 size_t rta_len;
508 _cleanup_free_ void *rta_multipath = NULL;
509 r = sd_netlink_message_read_data(m, RTA_MULTIPATH, &rta_len, &rta_multipath);
510 if (r < 0 && r != -ENODATA)
511 return r;
512 if (r >= 0) {
513 r = parse_nexthops(&list, &n_list, ifindex, allow_via, family, priority, &prefsrc, rta_multipath, rta_len);
514 if (r < 0)
515 return r;
516 }
517 }
518
519 typesafe_qsort(list, n_list, address_compare);
520 suppress_duplicates(list, &n_list);
521
522 if (ret)
523 *ret = TAKE_PTR(list);
524
525 return (int) n_list;
526}
527
528static int add_local_outbound(
529 struct local_address **list,
530 size_t *n_list,
531 int ifindex,
532 int family,
533 const union in_addr_union *address) {
534
535 return add_local_address_full(
536 list, n_list, ifindex,
537 /* scope = */ 0, /* priority = */ 0, /* weight = */ 0,
538 family, address, /* prefsrc = */ NULL);
539}
540
541static int add_local_outbound_by_prefsrc(
542 struct local_address **list,
543 size_t *n_list,
544 const struct local_address *gateway,
545 const struct local_address *addresses,
546 size_t n_addresses) {
547
548 int r;
549
550 assert(list);
551 assert(n_list);
552 assert(gateway);
553
554 if (!in_addr_is_set(gateway->family, &gateway->prefsrc))
555 return 0;
556
557 /* If the gateway has prefsrc, then let's honor the field. But, check if the address is assigned to
558 * the same interface, like we do with SO_BINDTOINDEX. */
559
560 bool found = false;
561 FOREACH_ARRAY(a, addresses, n_addresses) {
562 if (a->ifindex != gateway->ifindex)
563 continue;
564 if (a->family != gateway->family)
565 continue;
566 if (in_addr_equal(a->family, &a->address, &gateway->prefsrc) <= 0)
567 continue;
568
569 found = true;
570 break;
571 }
572 if (!found)
573 return -EHOSTUNREACH;
574
575 r = add_local_outbound(list, n_list, gateway->ifindex, gateway->family, &gateway->prefsrc);
576 if (r < 0)
577 return r;
578
579 return 1;
580}
581
582int local_outbounds(
583 sd_netlink *context,
584 int ifindex,
585 int af,
586 struct local_address **ret) {
587
588 _cleanup_free_ struct local_address *list = NULL, *gateways = NULL, *addresses = NULL;
589 size_t n_list = 0;
590 int r, n_gateways, n_addresses;
591
592 /* Determines our default outbound addresses, i.e. the "primary" local addresses we use to talk to IP
593 * addresses behind the default routes. This is still an address of the local host (i.e. this doesn't
594 * resolve NAT or so), but it's the set of addresses the local IP stack most likely uses to talk to
595 * other hosts.
596 *
597 * This works by connect()ing a SOCK_DGRAM socket to the local gateways, and then reading the IP
598 * address off the socket that was chosen for the routing decision. */
599
600 n_gateways = local_gateways(context, ifindex, af, &gateways);
601 if (n_gateways < 0)
602 return n_gateways;
603 if (n_gateways == 0) {
604 /* No gateways? Then we have no outbound addresses either. */
605 if (ret)
606 *ret = NULL;
607
608 return 0;
609 }
610
611 n_addresses = local_addresses(context, ifindex, af, &addresses);
612 if (n_addresses < 0)
613 return n_addresses;
614
615 FOREACH_ARRAY(i, gateways, n_gateways) {
616 _cleanup_close_ int fd = -EBADF;
617 union sockaddr_union sa;
618 socklen_t salen;
619
620 r = add_local_outbound_by_prefsrc(&list, &n_list, i, addresses, n_addresses);
621 if (r > 0 || r == -EHOSTUNREACH)
622 continue;
623 if (r < 0)
624 return r;
625
626 fd = socket(i->family, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
627 if (fd < 0)
628 return -errno;
629
630 switch (i->family) {
631
632 case AF_INET:
633 sa.in = (struct sockaddr_in) {
634 .sin_family = AF_INET,
635 .sin_addr = i->address.in,
636 .sin_port = htobe16(53), /* doesn't really matter which port we pick —
637 * we just care about the routing decision */
638 };
639
640 break;
641
642 case AF_INET6:
643 sa.in6 = (struct sockaddr_in6) {
644 .sin6_family = AF_INET6,
645 .sin6_addr = i->address.in6,
646 .sin6_port = htobe16(53),
647 .sin6_scope_id = i->ifindex,
648 };
649
650 break;
651
652 default:
653 assert_not_reached();
654 }
655
656 /* So ideally we'd just use IP_UNICAST_IF here to pass the ifindex info to the kernel before
657 * connect()ing, sot that it influences the routing decision. However, on current kernels
658 * IP_UNICAST_IF doesn't actually influence the routing decision for UDP — which I think
659 * should probably just be considered a bug. Once that bug is fixed this is the best API to
660 * use, since it is the most lightweight. */
661 r = socket_set_unicast_if(fd, i->family, i->ifindex);
662 if (r < 0)
663 log_debug_errno(r, "Failed to set unicast interface index %i, ignoring: %m", i->ifindex);
664
665 /* We'll also use SO_BINDTOINDEX. This requires CAP_NET_RAW on old kernels, hence there's a
666 * good chance this fails. Since 5.7 this restriction was dropped and the first
667 * SO_BINDTOINDEX on a socket may be done without privileges. This one has the benefit of
668 * really influencing the routing decision, i.e. this one definitely works for us — as long
669 * as we have the privileges for it. */
670 r = socket_bind_to_ifindex(fd, i->ifindex);
671 if (r < 0)
672 log_debug_errno(r, "Failed to bind socket to interface %i, ignoring: %m", i->ifindex);
673
674 /* Let's now connect() to the UDP socket, forcing the kernel to make a routing decision and
675 * auto-bind the socket. We ignore failures on this, since that failure might happen for a
676 * multitude of reasons (policy/firewall issues, who knows?) and some of them might be
677 * *after* the routing decision and the auto-binding already took place. If so we can still
678 * make use of the binding and return it. Hence, let's not unnecessarily fail early here: we
679 * can still easily detect if the auto-binding worked or not, by comparing the bound IP
680 * address with zero — which we do below. */
681 if (connect(fd, &sa.sa, sockaddr_len(&sa)) < 0)
682 log_debug_errno(errno, "Failed to connect SOCK_DGRAM socket to gateway, ignoring: %m");
683
684 /* Let's now read the socket address of the socket. A routing decision should have been
685 * made. Let's verify that and use the data. */
686 salen = sockaddr_len(&sa);
687 if (getsockname(fd, &sa.sa, &salen) < 0)
688 return -errno;
689 assert(sa.sa.sa_family == i->family);
690 assert(salen == sockaddr_len(&sa));
691
692 switch (i->family) {
693
694 case AF_INET:
695 if (in4_addr_is_null(&sa.in.sin_addr)) /* Auto-binding didn't work. :-( */
696 continue;
697
698 r = add_local_outbound(&list, &n_list, i->ifindex, i->family,
699 &(union in_addr_union) { .in = sa.in.sin_addr });
700 if (r < 0)
701 return r;
702 break;
703
704 case AF_INET6:
705 if (in6_addr_is_null(&sa.in6.sin6_addr))
706 continue;
707
708 r = add_local_outbound(&list, &n_list, i->ifindex, i->family,
709 &(union in_addr_union) { .in6 = sa.in6.sin6_addr });
710 if (r < 0)
711 return r;
712 break;
713
714 default:
715 assert_not_reached();
716 }
717 }
718
719 typesafe_qsort(list, n_list, address_compare);
720 suppress_duplicates(list, &n_list);
721
722 if (ret)
723 *ret = TAKE_PTR(list);
724
725 return (int) n_list;
726}