1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
3 #include "sd-netlink.h"
5 #include "alloc-util.h"
7 #include "local-addresses.h"
9 #include "netlink-util.h"
10 #include "socket-util.h"
11 #include "sort-util.h"
13 static int address_compare(const struct local_address
*a
, const struct local_address
*b
) {
16 /* Order lowest scope first, IPv4 before IPv6, lowest interface index first */
18 if (a
->family
== AF_INET
&& b
->family
== AF_INET6
)
20 if (a
->family
== AF_INET6
&& b
->family
== AF_INET
)
23 r
= CMP(a
->scope
, b
->scope
);
27 r
= CMP(a
->priority
, b
->priority
);
31 r
= CMP(a
->weight
, b
->weight
);
35 r
= CMP(a
->ifindex
, b
->ifindex
);
39 return memcmp(&a
->address
, &b
->address
, FAMILY_ADDRESS_SIZE(a
->family
));
42 bool has_local_address(const struct local_address
*addresses
, size_t n_addresses
, const struct local_address
*needle
) {
43 assert(addresses
|| n_addresses
== 0);
46 FOREACH_ARRAY(i
, addresses
, n_addresses
)
47 if (address_compare(i
, needle
) == 0)
53 static void suppress_duplicates(struct local_address
*list
, size_t *n_list
) {
54 size_t old_size
, new_size
;
56 /* Removes duplicate entries, assumes the list of addresses is already sorted. Updates in-place. */
58 if (*n_list
< 2) /* list with less than two entries can't have duplicates */
64 for (size_t i
= 1; i
< old_size
; i
++) {
66 if (address_compare(list
+ i
, list
+ new_size
- 1) == 0)
69 list
[new_size
++] = list
[i
];
75 static int add_local_address_full(
76 struct local_address
**list
,
83 const union in_addr_union
*address
,
84 const union in_addr_union
*prefsrc
) {
89 assert(IN_SET(family
, AF_INET
, AF_INET6
));
92 if (!GREEDY_REALLOC(*list
, *n_list
+ 1))
95 (*list
)[(*n_list
)++] = (struct local_address
) {
102 .prefsrc
= prefsrc
? *prefsrc
: IN_ADDR_NULL
,
108 int add_local_address(
109 struct local_address
**list
,
114 const union in_addr_union
*address
) {
116 return add_local_address_full(
117 list
, n_list
, ifindex
,
118 scope
, /* priority = */ 0, /* weight = */ 0,
119 family
, address
, /* prefsrc = */ NULL
);
126 struct local_address
**ret
) {
128 _cleanup_(sd_netlink_message_unrefp
) sd_netlink_message
*req
= NULL
, *reply
= NULL
;
129 _cleanup_(sd_netlink_unrefp
) sd_netlink
*rtnl
= NULL
;
130 _cleanup_free_
struct local_address
*list
= NULL
;
135 rtnl
= sd_netlink_ref(context
);
137 r
= sd_netlink_open(&rtnl
);
142 r
= sd_rtnl_message_new_addr(rtnl
, &req
, RTM_GETADDR
, ifindex
, af
);
146 r
= sd_netlink_message_set_request_dump(req
, true);
150 r
= sd_netlink_call(rtnl
, req
, 0, &reply
);
154 for (sd_netlink_message
*m
= reply
; m
; m
= sd_netlink_message_next(m
)) {
155 union in_addr_union a
;
160 r
= sd_netlink_message_get_errno(m
);
164 r
= sd_netlink_message_get_type(m
, &type
);
167 if (type
!= RTM_NEWADDR
)
170 r
= sd_rtnl_message_addr_get_ifindex(m
, &ifi
);
173 if (ifindex
> 0 && ifi
!= ifindex
)
176 r
= sd_rtnl_message_addr_get_family(m
, &family
);
179 if (!IN_SET(family
, AF_INET
, AF_INET6
))
181 if (af
!= AF_UNSPEC
&& af
!= family
)
185 r
= sd_netlink_message_read_u32(m
, IFA_FLAGS
, &flags
);
188 if ((flags
& (IFA_F_DEPRECATED
|IFA_F_TENTATIVE
)) != 0)
191 r
= sd_rtnl_message_addr_get_scope(m
, &scope
);
195 if (ifindex
== 0 && IN_SET(scope
, RT_SCOPE_HOST
, RT_SCOPE_NOWHERE
))
201 r
= sd_netlink_message_read_in_addr(m
, IFA_LOCAL
, &a
.in
);
203 r
= sd_netlink_message_read_in_addr(m
, IFA_ADDRESS
, &a
.in
);
210 r
= sd_netlink_message_read_in6_addr(m
, IFA_LOCAL
, &a
.in6
);
212 r
= sd_netlink_message_read_in6_addr(m
, IFA_ADDRESS
, &a
.in6
);
219 assert_not_reached();
222 r
= add_local_address(&list
, &n_list
, ifi
, scope
, family
, &a
);
227 typesafe_qsort(list
, n_list
, address_compare
);
228 suppress_duplicates(list
, &n_list
);
231 *ret
= TAKE_PTR(list
);
236 static int add_local_gateway(
237 struct local_address
**list
,
243 const union in_addr_union
*address
,
244 const union in_addr_union
*prefsrc
) {
246 return add_local_address_full(
249 /* scope = */ 0, priority
, weight
,
250 family
, address
, prefsrc
);
253 static int parse_nexthop_one(
254 struct local_address
**list
,
259 const union in_addr_union
*prefsrc
,
260 const struct rtnexthop
*rtnh
) {
267 size_t len
= rtnh
->rtnh_len
- sizeof(struct rtnexthop
);
268 for (struct rtattr
*attr
= RTNH_DATA(rtnh
); RTA_OK(attr
, len
); attr
= RTA_NEXT(attr
, len
))
270 switch (attr
->rta_type
) {
277 if (attr
->rta_len
!= RTA_LENGTH(FAMILY_ADDRESS_SIZE(family
)))
280 union in_addr_union a
;
281 memcpy(&a
, RTA_DATA(attr
), FAMILY_ADDRESS_SIZE(family
));
282 r
= add_local_gateway(list
, n_list
, rtnh
->rtnh_ifindex
, priority
, rtnh
->rtnh_hops
, family
, &a
, prefsrc
);
297 if (family
!= AF_INET
)
298 return -EBADMSG
; /* RTA_VIA is only supported for IPv4 routes. */
300 if (attr
->rta_len
!= RTA_LENGTH(sizeof(RouteVia
)))
303 RouteVia
*via
= RTA_DATA(attr
);
304 if (via
->family
!= AF_INET6
)
305 return -EBADMSG
; /* gateway address should be always IPv6. */
307 r
= add_local_gateway(list
, n_list
, rtnh
->rtnh_ifindex
, priority
, rtnh
->rtnh_hops
, via
->family
,
308 &(union in_addr_union
) { .in6
= via
->address
.in6
},
309 /* prefsrc = */ NULL
);
319 static int parse_nexthops(
320 struct local_address
**list
,
326 const union in_addr_union
*prefsrc
,
327 const struct rtnexthop
*rtnh
,
334 assert(IN_SET(family
, AF_INET
, AF_INET6
));
335 assert(rtnh
|| size
== 0);
337 if (size
< sizeof(struct rtnexthop
))
340 for (; size
>= sizeof(struct rtnexthop
); ) {
341 if (NLMSG_ALIGN(rtnh
->rtnh_len
) > size
)
344 if (rtnh
->rtnh_len
< sizeof(struct rtnexthop
))
347 if (ifindex
> 0 && rtnh
->rtnh_ifindex
!= ifindex
)
350 r
= parse_nexthop_one(list
, n_list
, allow_via
, family
, priority
, prefsrc
, rtnh
);
355 size
-= NLMSG_ALIGN(rtnh
->rtnh_len
);
356 rtnh
= RTNH_NEXT(rtnh
);
366 struct local_address
**ret
) {
368 _cleanup_(sd_netlink_message_unrefp
) sd_netlink_message
*req
= NULL
, *reply
= NULL
;
369 _cleanup_(sd_netlink_unrefp
) sd_netlink
*rtnl
= NULL
;
370 _cleanup_free_
struct local_address
*list
= NULL
;
374 /* The RTA_VIA attribute is used only for IPv4 routes with an IPv6 gateway. If IPv4 gateways are
375 * requested (af == AF_INET), then we do not return IPv6 gateway addresses. Similarly, if IPv6
376 * gateways are requested (af == AF_INET6), then we do not return gateway addresses for IPv4 routes.
377 * So, the RTA_VIA attribute is only parsed when af == AF_UNSPEC. */
378 bool allow_via
= af
== AF_UNSPEC
;
381 rtnl
= sd_netlink_ref(context
);
383 r
= sd_netlink_open(&rtnl
);
388 r
= sd_rtnl_message_new_route(rtnl
, &req
, RTM_GETROUTE
, af
, RTPROT_UNSPEC
);
392 r
= sd_rtnl_message_route_set_type(req
, RTN_UNICAST
);
396 r
= sd_rtnl_message_route_set_table(req
, RT_TABLE_MAIN
);
400 r
= sd_netlink_message_set_request_dump(req
, true);
404 r
= sd_netlink_call(rtnl
, req
, 0, &reply
);
408 for (sd_netlink_message
*m
= reply
; m
; m
= sd_netlink_message_next(m
)) {
409 union in_addr_union prefsrc
= IN_ADDR_NULL
;
411 unsigned char dst_len
, src_len
, table
;
412 uint32_t ifi
= 0, priority
= 0;
415 r
= sd_netlink_message_get_errno(m
);
419 r
= sd_netlink_message_get_type(m
, &type
);
422 if (type
!= RTM_NEWROUTE
)
425 /* We only care for default routes */
426 r
= sd_rtnl_message_route_get_dst_prefixlen(m
, &dst_len
);
432 r
= sd_rtnl_message_route_get_src_prefixlen(m
, &src_len
);
438 r
= sd_rtnl_message_route_get_table(m
, &table
);
441 if (table
!= RT_TABLE_MAIN
)
444 r
= sd_netlink_message_read_u32(m
, RTA_PRIORITY
, &priority
);
445 if (r
< 0 && r
!= -ENODATA
)
448 r
= sd_rtnl_message_route_get_family(m
, &family
);
451 if (!IN_SET(family
, AF_INET
, AF_INET6
))
453 if (af
!= AF_UNSPEC
&& af
!= family
)
456 r
= netlink_message_read_in_addr_union(m
, RTA_PREFSRC
, family
, &prefsrc
);
457 if (r
< 0 && r
!= -ENODATA
)
460 r
= sd_netlink_message_read_u32(m
, RTA_OIF
, &ifi
);
461 if (r
< 0 && r
!= -ENODATA
)
466 if (ifindex
> 0 && (int) ifi
!= ifindex
)
469 union in_addr_union gateway
;
470 r
= netlink_message_read_in_addr_union(m
, RTA_GATEWAY
, family
, &gateway
);
471 if (r
< 0 && r
!= -ENODATA
)
474 r
= add_local_gateway(&list
, &n_list
, ifi
, priority
, 0, family
, &gateway
, &prefsrc
);
484 if (family
!= AF_INET
)
488 r
= sd_netlink_message_read(m
, RTA_VIA
, sizeof(via
), &via
);
489 if (r
< 0 && r
!= -ENODATA
)
492 if (via
.family
!= AF_INET6
)
495 /* Ignore prefsrc, and let's take the source address by socket command, if necessary. */
496 r
= add_local_gateway(&list
, &n_list
, ifi
, priority
, 0, via
.family
,
497 &(union in_addr_union
) { .in6
= via
.address
.in6
},
498 /* prefsrc = */ NULL
);
503 /* If the route has RTA_OIF, it does not have RTA_MULTIPATH. */
508 _cleanup_free_
void *rta_multipath
= NULL
;
509 r
= sd_netlink_message_read_data(m
, RTA_MULTIPATH
, &rta_len
, &rta_multipath
);
510 if (r
< 0 && r
!= -ENODATA
)
513 r
= parse_nexthops(&list
, &n_list
, ifindex
, allow_via
, family
, priority
, &prefsrc
, rta_multipath
, rta_len
);
519 typesafe_qsort(list
, n_list
, address_compare
);
520 suppress_duplicates(list
, &n_list
);
523 *ret
= TAKE_PTR(list
);
528 static int add_local_outbound(
529 struct local_address
**list
,
533 const union in_addr_union
*address
) {
535 return add_local_address_full(
536 list
, n_list
, ifindex
,
537 /* scope = */ 0, /* priority = */ 0, /* weight = */ 0,
538 family
, address
, /* prefsrc = */ NULL
);
541 static int add_local_outbound_by_prefsrc(
542 struct local_address
**list
,
544 const struct local_address
*gateway
,
545 const struct local_address
*addresses
,
546 size_t n_addresses
) {
554 if (!in_addr_is_set(gateway
->family
, &gateway
->prefsrc
))
557 /* If the gateway has prefsrc, then let's honor the field. But, check if the address is assigned to
558 * the same interface, like we do with SO_BINDTOINDEX. */
561 FOREACH_ARRAY(a
, addresses
, n_addresses
) {
562 if (a
->ifindex
!= gateway
->ifindex
)
564 if (a
->family
!= gateway
->family
)
566 if (in_addr_equal(a
->family
, &a
->address
, &gateway
->prefsrc
) <= 0)
573 return -EHOSTUNREACH
;
575 r
= add_local_outbound(list
, n_list
, gateway
->ifindex
, gateway
->family
, &gateway
->prefsrc
);
586 struct local_address
**ret
) {
588 _cleanup_free_
struct local_address
*list
= NULL
, *gateways
= NULL
, *addresses
= NULL
;
590 int r
, n_gateways
, n_addresses
;
592 /* Determines our default outbound addresses, i.e. the "primary" local addresses we use to talk to IP
593 * addresses behind the default routes. This is still an address of the local host (i.e. this doesn't
594 * resolve NAT or so), but it's the set of addresses the local IP stack most likely uses to talk to
597 * This works by connect()ing a SOCK_DGRAM socket to the local gateways, and then reading the IP
598 * address off the socket that was chosen for the routing decision. */
600 n_gateways
= local_gateways(context
, ifindex
, af
, &gateways
);
603 if (n_gateways
== 0) {
604 /* No gateways? Then we have no outbound addresses either. */
611 n_addresses
= local_addresses(context
, ifindex
, af
, &addresses
);
615 FOREACH_ARRAY(i
, gateways
, n_gateways
) {
616 _cleanup_close_
int fd
= -EBADF
;
617 union sockaddr_union sa
;
620 r
= add_local_outbound_by_prefsrc(&list
, &n_list
, i
, addresses
, n_addresses
);
621 if (r
> 0 || r
== -EHOSTUNREACH
)
626 fd
= socket(i
->family
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
633 sa
.in
= (struct sockaddr_in
) {
634 .sin_family
= AF_INET
,
635 .sin_addr
= i
->address
.in
,
636 .sin_port
= htobe16(53), /* doesn't really matter which port we pick —
637 * we just care about the routing decision */
643 sa
.in6
= (struct sockaddr_in6
) {
644 .sin6_family
= AF_INET6
,
645 .sin6_addr
= i
->address
.in6
,
646 .sin6_port
= htobe16(53),
647 .sin6_scope_id
= i
->ifindex
,
653 assert_not_reached();
656 /* So ideally we'd just use IP_UNICAST_IF here to pass the ifindex info to the kernel before
657 * connect()ing, sot that it influences the routing decision. However, on current kernels
658 * IP_UNICAST_IF doesn't actually influence the routing decision for UDP — which I think
659 * should probably just be considered a bug. Once that bug is fixed this is the best API to
660 * use, since it is the most lightweight. */
661 r
= socket_set_unicast_if(fd
, i
->family
, i
->ifindex
);
663 log_debug_errno(r
, "Failed to set unicast interface index %i, ignoring: %m", i
->ifindex
);
665 /* We'll also use SO_BINDTOINDEX. This requires CAP_NET_RAW on old kernels, hence there's a
666 * good chance this fails. Since 5.7 this restriction was dropped and the first
667 * SO_BINDTOINDEX on a socket may be done without privileges. This one has the benefit of
668 * really influencing the routing decision, i.e. this one definitely works for us — as long
669 * as we have the privileges for it. */
670 r
= socket_bind_to_ifindex(fd
, i
->ifindex
);
672 log_debug_errno(r
, "Failed to bind socket to interface %i, ignoring: %m", i
->ifindex
);
674 /* Let's now connect() to the UDP socket, forcing the kernel to make a routing decision and
675 * auto-bind the socket. We ignore failures on this, since that failure might happen for a
676 * multitude of reasons (policy/firewall issues, who knows?) and some of them might be
677 * *after* the routing decision and the auto-binding already took place. If so we can still
678 * make use of the binding and return it. Hence, let's not unnecessarily fail early here: we
679 * can still easily detect if the auto-binding worked or not, by comparing the bound IP
680 * address with zero — which we do below. */
681 if (connect(fd
, &sa
.sa
, sockaddr_len(&sa
)) < 0)
682 log_debug_errno(errno
, "Failed to connect SOCK_DGRAM socket to gateway, ignoring: %m");
684 /* Let's now read the socket address of the socket. A routing decision should have been
685 * made. Let's verify that and use the data. */
686 salen
= sockaddr_len(&sa
);
687 if (getsockname(fd
, &sa
.sa
, &salen
) < 0)
689 assert(sa
.sa
.sa_family
== i
->family
);
690 assert(salen
== sockaddr_len(&sa
));
695 if (in4_addr_is_null(&sa
.in
.sin_addr
)) /* Auto-binding didn't work. :-( */
698 r
= add_local_outbound(&list
, &n_list
, i
->ifindex
, i
->family
,
699 &(union in_addr_union
) { .in
= sa
.in
.sin_addr
});
705 if (in6_addr_is_null(&sa
.in6
.sin6_addr
))
708 r
= add_local_outbound(&list
, &n_list
, i
->ifindex
, i
->family
,
709 &(union in_addr_union
) { .in6
= sa
.in6
.sin6_addr
});
715 assert_not_reached();
719 typesafe_qsort(list
, n_list
, address_compare
);
720 suppress_duplicates(list
, &n_list
);
723 *ret
= TAKE_PTR(list
);