]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/shared/local-addresses.c
Merge pull request #19438 from poettering/nspawn-uidmap
[thirdparty/systemd.git] / src / shared / local-addresses.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
8041b5ba 2
54e6f97b
LP
3#include <net/if_arp.h>
4
1c4baffc 5#include "sd-netlink.h"
b5efdb8a
LP
6
7#include "alloc-util.h"
54e6f97b 8#include "fd-util.h"
e80af1bd 9#include "local-addresses.h"
cf0fbc49
TA
10#include "macro.h"
11#include "netlink-util.h"
760877e9 12#include "sort-util.h"
8041b5ba 13
93bab288
YW
14static int address_compare(const struct local_address *a, const struct local_address *b) {
15 int r;
5502f0d9
LP
16
17 /* Order lowest scope first, IPv4 before IPv6, lowest interface index first */
18
e9140aff
LP
19 if (a->family == AF_INET && b->family == AF_INET6)
20 return -1;
21 if (a->family == AF_INET6 && b->family == AF_INET)
22 return 1;
23
93bab288
YW
24 r = CMP(a->scope, b->scope);
25 if (r != 0)
26 return r;
5502f0d9 27
93bab288
YW
28 r = CMP(a->metric, b->metric);
29 if (r != 0)
30 return r;
5502f0d9 31
93bab288
YW
32 r = CMP(a->ifindex, b->ifindex);
33 if (r != 0)
34 return r;
5502f0d9 35
00d75e57 36 return memcmp(&a->address, &b->address, FAMILY_ADDRESS_SIZE(a->family));
5502f0d9
LP
37}
38
54e6f97b
LP
39static void suppress_duplicates(struct local_address *list, size_t *n_list) {
40 size_t old_size, new_size;
41
42 /* Removes duplicate entries, assumes the list of addresses is already sorted. Updates in-place. */
43
44 if (*n_list < 2) /* list with less than two entries can't have duplicates */
45 return;
46
47 old_size = *n_list;
48 new_size = 1;
49
50 for (size_t i = 1; i < old_size; i++) {
51
52 if (address_compare(list + i, list + new_size - 1) == 0)
53 continue;
54
55 list[new_size++] = list[i];
56 }
57
58 *n_list = new_size;
59}
60
61int local_addresses(
62 sd_netlink *context,
63 int ifindex,
64 int af,
65 struct local_address **ret) {
66
4afd3348
LP
67 _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
68 _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
e80af1bd 69 _cleanup_free_ struct local_address *list = NULL;
5502f0d9 70 size_t n_list = 0, n_allocated = 0;
1c4baffc 71 sd_netlink_message *m;
d1ca51b1 72 int r;
d73c3269 73
ee8c4568 74 if (context)
1c4baffc 75 rtnl = sd_netlink_ref(context);
ee8c4568 76 else {
1c4baffc 77 r = sd_netlink_open(&rtnl);
ee8c4568
LP
78 if (r < 0)
79 return r;
80 }
8041b5ba 81
1d050e1e 82 r = sd_rtnl_message_new_addr(rtnl, &req, RTM_GETADDR, 0, af);
d1ca51b1
TG
83 if (r < 0)
84 return r;
8041b5ba 85
1c4baffc 86 r = sd_netlink_call(rtnl, req, 0, &reply);
d1ca51b1
TG
87 if (r < 0)
88 return r;
d1ca51b1 89
1c4baffc 90 for (m = reply; m; m = sd_netlink_message_next(m)) {
e80af1bd 91 struct local_address *a;
d1ca51b1 92 unsigned char flags;
5502f0d9 93 uint16_t type;
1d050e1e 94 int ifi, family;
d1ca51b1 95
1c4baffc 96 r = sd_netlink_message_get_errno(m);
d1ca51b1
TG
97 if (r < 0)
98 return r;
99
1c4baffc 100 r = sd_netlink_message_get_type(m, &type);
d1ca51b1
TG
101 if (r < 0)
102 return r;
d1ca51b1 103 if (type != RTM_NEWADDR)
8041b5ba
LP
104 continue;
105
ee8c4568
LP
106 r = sd_rtnl_message_addr_get_ifindex(m, &ifi);
107 if (r < 0)
108 return r;
1d050e1e
LP
109 if (ifindex > 0 && ifi != ifindex)
110 continue;
ee8c4568 111
1d050e1e
LP
112 r = sd_rtnl_message_addr_get_family(m, &family);
113 if (r < 0)
114 return r;
115 if (af != AF_UNSPEC && af != family)
ee8c4568
LP
116 continue;
117
5502f0d9 118 r = sd_rtnl_message_addr_get_flags(m, &flags);
d1ca51b1
TG
119 if (r < 0)
120 return r;
5502f0d9 121 if (flags & IFA_F_DEPRECATED)
d73c3269 122 continue;
8041b5ba 123
e9140aff 124 if (!GREEDY_REALLOC0(list, n_allocated, n_list+1))
5502f0d9
LP
125 return -ENOMEM;
126
127 a = list + n_list;
128
129 r = sd_rtnl_message_addr_get_scope(m, &a->scope);
d1ca51b1
TG
130 if (r < 0)
131 return r;
8041b5ba 132
945c2931 133 if (ifindex == 0 && IN_SET(a->scope, RT_SCOPE_HOST, RT_SCOPE_NOWHERE))
d73c3269 134 continue;
8041b5ba 135
1d050e1e 136 switch (family) {
5502f0d9 137
d1ca51b1 138 case AF_INET:
1c4baffc 139 r = sd_netlink_message_read_in_addr(m, IFA_LOCAL, &a->address.in);
d1ca51b1 140 if (r < 0) {
1c4baffc 141 r = sd_netlink_message_read_in_addr(m, IFA_ADDRESS, &a->address.in);
d1ca51b1
TG
142 if (r < 0)
143 continue;
144 }
145 break;
5502f0d9 146
d1ca51b1 147 case AF_INET6:
1c4baffc 148 r = sd_netlink_message_read_in6_addr(m, IFA_LOCAL, &a->address.in6);
d1ca51b1 149 if (r < 0) {
1c4baffc 150 r = sd_netlink_message_read_in6_addr(m, IFA_ADDRESS, &a->address.in6);
d1ca51b1
TG
151 if (r < 0)
152 continue;
153 }
154 break;
5502f0d9 155
d1ca51b1 156 default:
d73c3269 157 continue;
d73c3269 158 }
8041b5ba 159
ee8c4568 160 a->ifindex = ifi;
1d050e1e 161 a->family = family;
8041b5ba 162
d1ca51b1 163 n_list++;
5502f0d9 164 };
8041b5ba 165
c3a8c6aa
LP
166 if (ret) {
167 typesafe_qsort(list, n_list, address_compare);
54e6f97b 168 suppress_duplicates(list, &n_list);
c3a8c6aa
LP
169 *ret = TAKE_PTR(list);
170 }
e9140aff
LP
171
172 return (int) n_list;
173}
174
bff94a84
YW
175static int add_local_gateway(
176 struct local_address **list,
177 size_t *n_list,
178 size_t *n_allocated,
179 int af,
180 int ifindex,
181 uint32_t metric,
182 const RouteVia *via) {
183
184 assert(list);
185 assert(n_list);
186 assert(n_allocated);
187 assert(via);
188
189 if (af != AF_UNSPEC && af != via->family)
190 return 0;
191
192 if (!GREEDY_REALLOC(*list, *n_allocated, *n_list + 1))
193 return -ENOMEM;
194
195 (*list)[(*n_list)++] = (struct local_address) {
196 .ifindex = ifindex,
197 .metric = metric,
198 .family = via->family,
199 .address = via->address,
200 };
201
202 return 0;
203}
204
54e6f97b
LP
205int local_gateways(
206 sd_netlink *context,
207 int ifindex,
208 int af,
209 struct local_address **ret) {
210
4afd3348
LP
211 _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *req = NULL, *reply = NULL;
212 _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
e9140aff 213 _cleanup_free_ struct local_address *list = NULL;
e9140aff
LP
214 size_t n_list = 0, n_allocated = 0;
215 int r;
216
e9140aff 217 if (context)
1c4baffc 218 rtnl = sd_netlink_ref(context);
e9140aff 219 else {
1c4baffc 220 r = sd_netlink_open(&rtnl);
e9140aff
LP
221 if (r < 0)
222 return r;
223 }
224
1d050e1e 225 r = sd_rtnl_message_new_route(rtnl, &req, RTM_GETROUTE, af, RTPROT_UNSPEC);
e9140aff
LP
226 if (r < 0)
227 return r;
228
1c4baffc 229 r = sd_netlink_message_request_dump(req, true);
e9140aff
LP
230 if (r < 0)
231 return r;
232
1c4baffc 233 r = sd_netlink_call(rtnl, req, 0, &reply);
e9140aff
LP
234 if (r < 0)
235 return r;
236
bff94a84
YW
237 for (sd_netlink_message *m = reply; m; m = sd_netlink_message_next(m)) {
238 _cleanup_ordered_set_free_free_ OrderedSet *multipath_routes = NULL;
239 _cleanup_free_ void *rta_multipath = NULL;
240 union in_addr_union gateway;
e9140aff 241 uint16_t type;
d1b014df 242 unsigned char dst_len, src_len, table;
d2f4a948 243 uint32_t ifi = 0, metric = 0;
bff94a84 244 size_t rta_len;
1d050e1e 245 int family;
bff94a84 246 RouteVia via;
e9140aff 247
1c4baffc 248 r = sd_netlink_message_get_errno(m);
e9140aff
LP
249 if (r < 0)
250 return r;
251
1c4baffc 252 r = sd_netlink_message_get_type(m, &type);
e9140aff
LP
253 if (r < 0)
254 return r;
e9140aff
LP
255 if (type != RTM_NEWROUTE)
256 continue;
257
a98433c0 258 /* We only care for default routes */
584d0d2a 259 r = sd_rtnl_message_route_get_dst_prefixlen(m, &dst_len);
e9140aff
LP
260 if (r < 0)
261 return r;
e9140aff
LP
262 if (dst_len != 0)
263 continue;
264
584d0d2a 265 r = sd_rtnl_message_route_get_src_prefixlen(m, &src_len);
a98433c0
LP
266 if (r < 0)
267 return r;
268 if (src_len != 0)
269 continue;
270
d1b014df
LP
271 r = sd_rtnl_message_route_get_table(m, &table);
272 if (r < 0)
273 return r;
274 if (table != RT_TABLE_MAIN)
275 continue;
276
bff94a84
YW
277 r = sd_netlink_message_read_u32(m, RTA_PRIORITY, &metric);
278 if (r < 0 && r != -ENODATA)
e9140aff 279 return r;
e9140aff 280
1d050e1e
LP
281 r = sd_rtnl_message_route_get_family(m, &family);
282 if (r < 0)
283 return r;
bff94a84 284 if (!IN_SET(family, AF_INET, AF_INET6))
1d050e1e
LP
285 continue;
286
bff94a84
YW
287 r = sd_netlink_message_read_u32(m, RTA_OIF, &ifi);
288 if (r < 0 && r != -ENODATA)
289 return r;
290 if (r >= 0) {
291 if (ifi <= 0)
292 return -EINVAL;
293 if (ifindex > 0 && (int) ifi != ifindex)
294 continue;
e9140aff 295
bff94a84
YW
296 r = netlink_message_read_in_addr_union(m, RTA_GATEWAY, family, &gateway);
297 if (r < 0 && r != -ENODATA)
298 return r;
299 if (r >= 0) {
300 via.family = family;
301 via.address = gateway;
302 r = add_local_gateway(&list, &n_list, &n_allocated, af, ifi, metric, &via);
303 if (r < 0)
304 return r;
e9140aff 305
e9140aff 306 continue;
bff94a84 307 }
e9140aff 308
bff94a84 309 if (family != AF_INET)
e9140aff
LP
310 continue;
311
bff94a84
YW
312 r = sd_netlink_message_read(m, RTA_VIA, sizeof(via), &via);
313 if (r < 0 && r != -ENODATA)
314 return r;
315 if (r >= 0) {
316 r = add_local_gateway(&list, &n_list, &n_allocated, af, ifi, metric, &via);
317 if (r < 0)
318 return r;
319
320 continue;
321 }
e9140aff
LP
322 }
323
bff94a84
YW
324 r = sd_netlink_message_read_data(m, RTA_MULTIPATH, &rta_len, &rta_multipath);
325 if (r < 0 && r != -ENODATA)
326 return r;
327 if (r >= 0) {
328 MultipathRoute *mr;
e9140aff 329
bff94a84
YW
330 r = rtattr_read_nexthop(rta_multipath, rta_len, family, &multipath_routes);
331 if (r < 0)
332 return r;
e9140aff 333
bff94a84
YW
334 ORDERED_SET_FOREACH(mr, multipath_routes) {
335 if (ifindex > 0 && mr->ifindex != ifindex)
336 continue;
337
338 r = add_local_gateway(&list, &n_list, &n_allocated, af, ifi, metric, &mr->gateway);
339 if (r < 0)
340 return r;
341 }
342 }
e9140aff
LP
343 }
344
c3a8c6aa
LP
345 if (ret) {
346 typesafe_qsort(list, n_list, address_compare);
54e6f97b
LP
347 suppress_duplicates(list, &n_list);
348 *ret = TAKE_PTR(list);
349 }
350
351 return (int) n_list;
352}
353
354int local_outbounds(
355 sd_netlink *context,
356 int ifindex,
357 int af,
358 struct local_address **ret) {
359
360 _cleanup_free_ struct local_address *list = NULL, *gateways = NULL;
361 size_t n_list = 0, n_allocated = 0;
362 int r, n_gateways;
363
364 /* Determines our default outbound addresses, i.e. the "primary" local addresses we use to talk to IP
365 * addresses behind the default routes. This is still an address of the local host (i.e. this doesn't
366 * resolve NAT or so), but it's the set of addresses the local IP stack most likely uses to talk to
367 * other hosts.
368 *
369 * This works by connect()ing a SOCK_DGRAM socket to the local gateways, and then reading the IP
370 * address off the socket that was chosen for the routing decision. */
371
372 n_gateways = local_gateways(context, ifindex, af, &gateways);
373 if (n_gateways < 0)
374 return n_gateways;
375 if (n_gateways == 0) {
376 /* No gateways? Then we have no outbound addresses either. */
377 if (ret)
378 *ret = NULL;
379
380 return 0;
381 }
382
383 for (int i = 0; i < n_gateways; i++) {
384 _cleanup_close_ int fd = -1;
385 union sockaddr_union sa;
386 socklen_t salen;
387
388 fd = socket(gateways[i].family, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
389 if (fd < 0)
390 return -errno;
391
392 switch (gateways[i].family) {
393
394 case AF_INET:
395 sa.in = (struct sockaddr_in) {
396 .sin_family = AF_INET,
397 .sin_addr = gateways[i].address.in,
398 .sin_port = htobe16(53), /* doesn't really matter which port we pick — we just care about the routing decision */
399 };
400
401 break;
402
403 case AF_INET6:
404 sa.in6 = (struct sockaddr_in6) {
405 .sin6_family = AF_INET6,
406 .sin6_addr = gateways[i].address.in6,
407 .sin6_port = htobe16(53),
408 .sin6_scope_id = gateways[i].ifindex,
409 };
410
411 break;
412
413 default:
414 assert_not_reached("Unexpected protocol");
415 }
416
417 /* So ideally we'd just use IP_UNICAST_IF here to pass the ifindex info to the kernel before
418 * connect()ing, sot that it influences the routing decision. However, on current kernels
419 * IP_UNICAST_IF doesn't actually influence the routing decision for UDP — which I think
420 * should probably just be considered a bug. Once that bug is fixed this is the best API to
421 * use, since it is the most lightweight. */
422 r = socket_set_unicast_if(fd, gateways[i].family, gateways[i].ifindex);
423 if (r < 0)
424 log_debug_errno(r, "Failed to set unicast interface index %i, ignoring: %m", gateways[i].ifindex);
425
426 /* We'll also use SO_BINDTOINDEX. This requires CAP_NET_RAW on old kernels, hence there's a
427 * good chance this fails. Since 5.7 this restriction was dropped and the first
428 * SO_BINDTOINDEX on a socket may be done without privileges. This one has the benefit of
429 * really influencing the routing decision, i.e. this one definitely works for us — as long
430 * as we have the privileges for it.*/
431 r = socket_bind_to_ifindex(fd, gateways[i].ifindex);
432 if (r < 0)
433 log_debug_errno(r, "Failed to bind socket to interface %i, ignoring: %m", gateways[i].ifindex);
434
435 /* Let's now connect() to the UDP socket, forcing the kernel to make a routing decision and
436 * auto-bind the socket. We ignore failures on this, since that failure might happen for a
437 * multitude of reasons (policy/firewall issues, who knows?) and some of them might be
438 * *after* the routing decision and the auto-binding already took place. If so we can still
439 * make use of the binding and return it. Hence, let's not unnecessarily fail early here: we
440 * can still easily detect if the auto-binding worked or not, by comparing the bound IP
441 * address with zero — which we do below. */
442 if (connect(fd, &sa.sa, SOCKADDR_LEN(sa)) < 0)
443 log_debug_errno(errno, "Failed to connect SOCK_DGRAM socket to gateway, ignoring: %m");
444
445 /* Let's now read the socket address of the socket. A routing decision should have been
446 * made. Let's verify that and use the data. */
447 salen = SOCKADDR_LEN(sa);
448 if (getsockname(fd, &sa.sa, &salen) < 0)
449 return -errno;
450 assert(sa.sa.sa_family == gateways[i].family);
451 assert(salen == SOCKADDR_LEN(sa));
452
453 switch (gateways[i].family) {
454
455 case AF_INET:
456 if (in4_addr_is_null(&sa.in.sin_addr)) /* Auto-binding didn't work. :-( */
457 continue;
458
459 if (!GREEDY_REALLOC(list, n_allocated, n_list+1))
460 return -ENOMEM;
461
462 list[n_list++] = (struct local_address) {
463 .family = gateways[i].family,
464 .ifindex = gateways[i].ifindex,
465 .address.in = sa.in.sin_addr,
466 };
467
468 break;
469
470 case AF_INET6:
471 if (in6_addr_is_null(&sa.in6.sin6_addr))
472 continue;
473
474 if (!GREEDY_REALLOC(list, n_allocated, n_list+1))
475 return -ENOMEM;
476
477 list[n_list++] = (struct local_address) {
478 .family = gateways[i].family,
479 .ifindex = gateways[i].ifindex,
480 .address.in6 = sa.in6.sin6_addr,
481 };
482 break;
483
484 default:
485 assert_not_reached("Unexpected protocol");
486 }
487 }
488
489 if (ret) {
490 typesafe_qsort(list, n_list, address_compare);
491 suppress_duplicates(list, &n_list);
c3a8c6aa
LP
492 *ret = TAKE_PTR(list);
493 }
d73c3269 494
e80af1bd 495 return (int) n_list;
8041b5ba 496}