]> git.ipfire.org Git - thirdparty/strongswan.git/blame - src/libcharon/plugins/kernel_netlink/kernel_netlink_net.c
Merge branch 'testing-route-based'
[thirdparty/strongswan.git] / src / libcharon / plugins / kernel_netlink / kernel_netlink_net.c
CommitLineData
507f26f6 1/*
66e9165b 2 * Copyright (C) 2008-2016 Tobias Brunner
ce5b1708 3 * Copyright (C) 2005-2008 Martin Willi
66e9165b 4 * HSR Hochschule fuer Technik Rapperswil
507f26f6
TB
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2 of the License, or (at your
9 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * for more details.
507f26f6
TB
15 */
16
d266e895
TE
17/*
18 * Copyright (C) 2010 secunet Security Networks AG
19 * Copyright (C) 2010 Thomas Egerer
20 *
21 * Permission is hereby granted, free of charge, to any person obtaining a copy
22 * of this software and associated documentation files (the "Software"), to deal
23 * in the Software without restriction, including without limitation the rights
24 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25 * copies of the Software, and to permit persons to whom the Software is
26 * furnished to do so, subject to the following conditions:
27 *
28 * The above copyright notice and this permission notice shall be included in
29 * all copies or substantial portions of the Software.
30 *
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
37 * THE SOFTWARE.
38 */
39
507f26f6 40#include <sys/socket.h>
7beb31aa 41#include <sys/utsname.h>
507f26f6
TB
42#include <linux/netlink.h>
43#include <linux/rtnetlink.h>
507f26f6
TB
44#include <unistd.h>
45#include <errno.h>
46#include <net/if.h>
8e8e97d1 47#ifdef HAVE_LINUX_FIB_RULES_H
51fefe46 48#include <linux/fib_rules.h>
8e8e97d1 49#endif
507f26f6
TB
50
51#include "kernel_netlink_net.h"
52#include "kernel_netlink_shared.h"
53
8394ea2a 54#include <daemon.h>
f05b4272 55#include <utils/debug.h>
eba64cef 56#include <threading/mutex.h>
a25d536e
TB
57#include <threading/rwlock.h>
58#include <threading/rwlock_condvar.h>
4134108c 59#include <threading/spinlock.h>
12642a68
TB
60#include <collections/hashtable.h>
61#include <collections/linked_list.h>
507f26f6 62#include <processing/jobs/callback_job.h>
507f26f6 63
ba26508d 64/** delay before firing roam events (ms) */
507f26f6
TB
65#define ROAM_DELAY 100
66
f834249c
TB
67/** delay before reinstalling routes (ms) */
68#define ROUTE_DELAY 100
69
cbd52e7d
TB
70/** maximum recursion when searching for addresses in get_route() */
71#define MAX_ROUTE_RECURSION 2
72
0b9ce21b
TB
73#ifndef ROUTING_TABLE
74#define ROUTING_TABLE 0
75#endif
76
77#ifndef ROUTING_TABLE_PRIO
78#define ROUTING_TABLE_PRIO 0
79#endif
80
12c0bde6
MW
81ENUM(rt_msg_names, RTM_NEWLINK, RTM_GETRULE,
82 "RTM_NEWLINK",
83 "RTM_DELLINK",
84 "RTM_GETLINK",
85 "RTM_SETLINK",
86 "RTM_NEWADDR",
87 "RTM_DELADDR",
88 "RTM_GETADDR",
89 "31",
90 "RTM_NEWROUTE",
91 "RTM_DELROUTE",
92 "RTM_GETROUTE",
93 "35",
94 "RTM_NEWNEIGH",
95 "RTM_DELNEIGH",
96 "RTM_GETNEIGH",
97 "RTM_NEWRULE",
98 "RTM_DELRULE",
99 "RTM_GETRULE",
100);
101
507f26f6
TB
102typedef struct addr_entry_t addr_entry_t;
103
104/**
c6b40158 105 * IP address in an iface_entry_t
507f26f6
TB
106 */
107struct addr_entry_t {
7daf5226 108
c6b40158 109 /** the ip address */
507f26f6 110 host_t *ip;
7daf5226 111
3bf98189
TB
112 /** address flags */
113 u_char flags;
114
507f26f6
TB
115 /** scope of the address */
116 u_char scope;
7daf5226 117
e8e9048f 118 /** number of times this IP is used, if virtual (i.e. managed by us) */
507f26f6 119 u_int refcount;
c6b40158
TB
120
121 /** TRUE once it is installed, if virtual */
122 bool installed;
507f26f6
TB
123};
124
125/**
126 * destroy a addr_entry_t object
127 */
128static void addr_entry_destroy(addr_entry_t *this)
129{
130 this->ip->destroy(this->ip);
131 free(this);
132}
133
134typedef struct iface_entry_t iface_entry_t;
135
136/**
137 * A network interface on this system, containing addr_entry_t's
138 */
139struct iface_entry_t {
7daf5226 140
507f26f6
TB
141 /** interface index */
142 int ifindex;
7daf5226 143
507f26f6
TB
144 /** name of the interface */
145 char ifname[IFNAMSIZ];
7daf5226 146
507f26f6
TB
147 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
148 u_int flags;
7daf5226 149
507f26f6
TB
150 /** list of addresses as host_t */
151 linked_list_t *addrs;
940e1b0f
TB
152
153 /** TRUE if usable by config */
154 bool usable;
507f26f6
TB
155};
156
157/**
158 * destroy an interface entry
159 */
160static void iface_entry_destroy(iface_entry_t *this)
161{
162 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
163 free(this);
164}
165
2e4d110d
TB
166CALLBACK(iface_entry_by_index, bool,
167 iface_entry_t *this, va_list args)
940e1b0f 168{
2e4d110d
TB
169 int ifindex;
170
171 VA_ARGS_VGET(args, ifindex);
172 return this->ifindex == ifindex;
940e1b0f
TB
173}
174
2e4d110d
TB
175CALLBACK(iface_entry_by_name, bool,
176 iface_entry_t *this, va_list args)
c6b40158 177{
2e4d110d
TB
178 char *ifname;
179
180 VA_ARGS_VGET(args, ifname);
c6b40158
TB
181 return streq(this->ifname, ifname);
182}
183
1f97e1aa
TB
184/**
185 * check if an interface is up
186 */
187static inline bool iface_entry_up(iface_entry_t *iface)
188{
189 return (iface->flags & IFF_UP) == IFF_UP;
190}
191
940e1b0f
TB
192/**
193 * check if an interface is up and usable
194 */
195static inline bool iface_entry_up_and_usable(iface_entry_t *iface)
196{
1f97e1aa
TB
197 return iface->usable && iface_entry_up(iface);
198}
199
200typedef struct addr_map_entry_t addr_map_entry_t;
201
202/**
203 * Entry that maps an IP address to an interface entry
204 */
205struct addr_map_entry_t {
206 /** The IP address */
207 host_t *ip;
208
c6b40158
TB
209 /** The address entry for this IP address */
210 addr_entry_t *addr;
211
1f97e1aa
TB
212 /** The interface this address is installed on */
213 iface_entry_t *iface;
214};
215
216/**
217 * Hash a addr_map_entry_t object, all entries with the same IP address
218 * are stored in the same bucket
219 */
220static u_int addr_map_entry_hash(addr_map_entry_t *this)
221{
222 return chunk_hash(this->ip->get_address(this->ip));
223}
224
225/**
226 * Compare two addr_map_entry_t objects, two entries are equal if they are
227 * installed on the same interface
228 */
229static bool addr_map_entry_equals(addr_map_entry_t *a, addr_map_entry_t *b)
230{
231 return a->iface->ifindex == b->iface->ifindex &&
232 a->ip->ip_equals(a->ip, b->ip);
233}
234
235/**
236 * Used with get_match this finds an address entry if it is installed on
237 * an up and usable interface
238 */
239static bool addr_map_entry_match_up_and_usable(addr_map_entry_t *a,
240 addr_map_entry_t *b)
241{
242 return iface_entry_up_and_usable(b->iface) &&
243 a->ip->ip_equals(a->ip, b->ip);
244}
245
246/**
247 * Used with get_match this finds an address entry if it is installed on
248 * any active local interface
249 */
250static bool addr_map_entry_match_up(addr_map_entry_t *a, addr_map_entry_t *b)
251{
252 return iface_entry_up(b->iface) && a->ip->ip_equals(a->ip, b->ip);
940e1b0f
TB
253}
254
c6b40158
TB
255/**
256 * Used with get_match this finds an address entry if it is installed on
257 * any local interface
258 */
259static bool addr_map_entry_match(addr_map_entry_t *a, addr_map_entry_t *b)
260{
261 return a->ip->ip_equals(a->ip, b->ip);
262}
263
74ba22c9
TB
264typedef struct route_entry_t route_entry_t;
265
266/**
267 * Installed routing entry
268 */
269struct route_entry_t {
270 /** Name of the interface the route is bound to */
271 char *if_name;
272
273 /** Source ip of the route */
274 host_t *src_ip;
275
276 /** Gateway for this route */
277 host_t *gateway;
278
279 /** Destination net */
280 chunk_t dst_net;
281
282 /** Destination net prefixlen */
b12c53ce 283 uint8_t prefixlen;
74ba22c9
TB
284};
285
286/**
287 * Clone a route_entry_t object.
288 */
289static route_entry_t *route_entry_clone(route_entry_t *this)
290{
291 route_entry_t *route;
292
293 INIT(route,
294 .if_name = strdup(this->if_name),
295 .src_ip = this->src_ip->clone(this->src_ip),
3cd7ba49 296 .gateway = this->gateway ? this->gateway->clone(this->gateway) : NULL,
74ba22c9
TB
297 .dst_net = chunk_clone(this->dst_net),
298 .prefixlen = this->prefixlen,
299 );
300 return route;
301}
302
303/**
304 * Destroy a route_entry_t object
305 */
306static void route_entry_destroy(route_entry_t *this)
307{
308 free(this->if_name);
309 DESTROY_IF(this->src_ip);
310 DESTROY_IF(this->gateway);
311 chunk_free(&this->dst_net);
312 free(this);
313}
314
315/**
316 * Hash a route_entry_t object
317 */
318static u_int route_entry_hash(route_entry_t *this)
319{
320 return chunk_hash_inc(chunk_from_thing(this->prefixlen),
321 chunk_hash(this->dst_net));
322}
323
324/**
325 * Compare two route_entry_t objects
326 */
327static bool route_entry_equals(route_entry_t *a, route_entry_t *b)
328{
3cd7ba49
TB
329 if (a->if_name && b->if_name && streq(a->if_name, b->if_name) &&
330 a->src_ip->ip_equals(a->src_ip, b->src_ip) &&
331 chunk_equals(a->dst_net, b->dst_net) && a->prefixlen == b->prefixlen)
332 {
333 return (!a->gateway && !b->gateway) || (a->gateway && b->gateway &&
334 a->gateway->ip_equals(a->gateway, b->gateway));
335 }
336 return FALSE;
74ba22c9
TB
337}
338
f834249c
TB
339typedef struct net_change_t net_change_t;
340
341/**
342 * Queued network changes
343 */
344struct net_change_t {
345 /** Name of the interface that got activated (or an IP appeared on) */
346 char *if_name;
f834249c
TB
347};
348
349/**
350 * Destroy a net_change_t object
351 */
352static void net_change_destroy(net_change_t *this)
353{
f834249c
TB
354 free(this->if_name);
355 free(this);
356}
357
358/**
359 * Hash a net_change_t object
360 */
361static u_int net_change_hash(net_change_t *this)
362{
f834249c
TB
363 return chunk_hash(chunk_create(this->if_name, strlen(this->if_name)));
364}
365
366/**
367 * Compare two net_change_t objects
368 */
369static bool net_change_equals(net_change_t *a, net_change_t *b)
370{
c732e220 371 return streq(a->if_name, b->if_name);
f834249c
TB
372}
373
507f26f6
TB
374typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t;
375
376/**
377 * Private variables and functions of kernel_netlink_net class.
378 */
379struct private_kernel_netlink_net_t {
380 /**
381 * Public part of the kernel_netlink_net_t object.
382 */
383 kernel_netlink_net_t public;
7daf5226 384
507f26f6 385 /**
a25d536e 386 * lock to access various lists and maps
507f26f6 387 */
a25d536e 388 rwlock_t *lock;
7daf5226 389
507f26f6
TB
390 /**
391 * condition variable to signal virtual IP add/removal
392 */
a25d536e 393 rwlock_condvar_t *condvar;
7daf5226 394
507f26f6
TB
395 /**
396 * Cached list of interfaces and its addresses (iface_entry_t)
397 */
398 linked_list_t *ifaces;
7daf5226 399
1f97e1aa
TB
400 /**
401 * Map for IP addresses to iface_entry_t objects (addr_map_entry_t)
402 */
403 hashtable_t *addrs;
404
c6b40158
TB
405 /**
406 * Map for virtual IP addresses to iface_entry_t objects (addr_map_entry_t)
407 */
408 hashtable_t *vips;
409
507f26f6
TB
410 /**
411 * netlink rt socket (routing)
412 */
413 netlink_socket_t *socket;
7daf5226 414
507f26f6
TB
415 /**
416 * Netlink rt socket to receive address change events
417 */
418 int socket_events;
7daf5226 419
507f26f6 420 /**
4134108c 421 * earliest time of the next roam event
507f26f6 422 */
4134108c
TB
423 timeval_t next_roam;
424
77d4a028
TB
425 /**
426 * roam event due to address change
427 */
428 bool roam_address;
429
4134108c
TB
430 /**
431 * lock to check and update roam event time
432 */
433 spinlock_t *roam_lock;
7daf5226 434
507f26f6
TB
435 /**
436 * routing table to install routes
437 */
438 int routing_table;
7daf5226 439
507f26f6
TB
440 /**
441 * priority of used routing table
442 */
443 int routing_table_prio;
7daf5226 444
74ba22c9
TB
445 /**
446 * installed routes
447 */
448 hashtable_t *routes;
449
16d62305
TB
450 /**
451 * mutex for routes
452 */
453 mutex_t *routes_lock;
454
f834249c 455 /**
c732e220 456 * interface changes which may trigger route reinstallation
f834249c
TB
457 */
458 hashtable_t *net_changes;
459
460 /**
461 * mutex for route reinstallation triggers
462 */
463 mutex_t *net_changes_lock;
464
465 /**
466 * time of last route reinstallation
467 */
468 timeval_t last_route_reinstall;
469
507f26f6
TB
470 /**
471 * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
472 */
473 bool process_route;
7daf5226 474
37873f99
TB
475 /**
476 * whether to trigger roam events
477 */
478 bool roam_events;
479
558691b3
MW
480 /**
481 * whether to install IPsec policy routes
482 */
483 bool install_routes;
484
9474a0d9
MW
485 /**
486 * whether to actually install virtual IPs
487 */
488 bool install_virtual_ip;
d266e895 489
e8e9048f
TB
490 /**
491 * the name of the interface virtual IP addresses are installed on
492 */
493 char *install_virtual_ip_on;
494
7beb31aa
TB
495 /**
496 * whether preferred source addresses can be specified for IPv6 routes
497 */
498 bool rta_prefsrc_for_ipv6;
499
6bd1216e
TB
500 /**
501 * whether marks can be used in route lookups
502 */
503 bool rta_mark;
504
505 /**
506 * the mark excluded from the routing rule used for virtual IPs
507 */
508 mark_t routing_mark;
509
3bf98189
TB
510 /**
511 * whether to prefer temporary IPv6 addresses over public ones
512 */
513 bool prefer_temporary_addrs;
514
d266e895
TE
515 /**
516 * list with routing tables to be excluded from route lookup
517 */
518 linked_list_t *rt_exclude;
c1adf7e0
TB
519
520 /**
521 * MTU to set on installed routes
522 */
b12c53ce 523 uint32_t mtu;
47a0e289
TB
524
525 /**
526 * MSS to set on installed routes
527 */
b12c53ce 528 uint32_t mss;
507f26f6
TB
529};
530
f834249c
TB
531/**
532 * Forward declaration
533 */
534static status_t manage_srcroute(private_kernel_netlink_net_t *this,
535 int nlmsg_type, int flags, chunk_t dst_net,
b12c53ce 536 uint8_t prefixlen, host_t *gateway,
f834249c
TB
537 host_t *src_ip, char *if_name);
538
539/**
540 * Clear the queued network changes.
541 */
542static void net_changes_clear(private_kernel_netlink_net_t *this)
543{
544 enumerator_t *enumerator;
545 net_change_t *change;
546
547 enumerator = this->net_changes->create_enumerator(this->net_changes);
548 while (enumerator->enumerate(enumerator, NULL, (void**)&change))
549 {
550 this->net_changes->remove_at(this->net_changes, enumerator);
551 net_change_destroy(change);
552 }
553 enumerator->destroy(enumerator);
554}
555
556/**
557 * Act upon queued network changes.
558 */
559static job_requeue_t reinstall_routes(private_kernel_netlink_net_t *this)
560{
561 enumerator_t *enumerator;
562 route_entry_t *route;
563
564 this->net_changes_lock->lock(this->net_changes_lock);
16d62305 565 this->routes_lock->lock(this->routes_lock);
f834249c
TB
566
567 enumerator = this->routes->create_enumerator(this->routes);
568 while (enumerator->enumerate(enumerator, NULL, (void**)&route))
569 {
570 net_change_t *change, lookup = {
571 .if_name = route->if_name,
572 };
c732e220 573 /* check if a change for the outgoing interface is queued */
f834249c
TB
574 change = this->net_changes->get(this->net_changes, &lookup);
575 if (!change)
c732e220 576 { /* in case src_ip is not on the outgoing interface */
9ba36c0f
TB
577 if (this->public.interface.get_interface(&this->public.interface,
578 route->src_ip, &lookup.if_name))
c732e220 579 {
9ba36c0f
TB
580 if (!streq(lookup.if_name, route->if_name))
581 {
582 change = this->net_changes->get(this->net_changes, &lookup);
583 }
584 free(lookup.if_name);
c732e220 585 }
f834249c
TB
586 }
587 if (change)
588 {
589 manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
590 route->dst_net, route->prefixlen, route->gateway,
591 route->src_ip, route->if_name);
592 }
593 }
594 enumerator->destroy(enumerator);
16d62305 595 this->routes_lock->unlock(this->routes_lock);
f834249c
TB
596
597 net_changes_clear(this);
598 this->net_changes_lock->unlock(this->net_changes_lock);
599 return JOB_REQUEUE_NONE;
600}
601
602/**
603 * Queue route reinstallation caused by network changes for a given interface.
f834249c
TB
604 *
605 * The route reinstallation is delayed for a while and only done once for
606 * several calls during this delay, in order to avoid doing it too often.
c732e220 607 * The interface name is freed.
f834249c
TB
608 */
609static void queue_route_reinstall(private_kernel_netlink_net_t *this,
c732e220 610 char *if_name)
f834249c
TB
611{
612 net_change_t *update, *found;
613 timeval_t now;
614 job_t *job;
615
616 INIT(update,
c732e220 617 .if_name = if_name
f834249c
TB
618 );
619
620 this->net_changes_lock->lock(this->net_changes_lock);
c732e220 621 found = this->net_changes->put(this->net_changes, update, update);
f834249c
TB
622 if (found)
623 {
c732e220 624 net_change_destroy(found);
f834249c
TB
625 }
626 time_monotonic(&now);
627 if (timercmp(&now, &this->last_route_reinstall, >))
628 {
eecd41e3 629 timeval_add_ms(&now, ROUTE_DELAY);
f834249c
TB
630 this->last_route_reinstall = now;
631
632 job = (job_t*)callback_job_create((callback_job_cb_t)reinstall_routes,
633 this, NULL, NULL);
634 lib->scheduler->schedule_job_ms(lib->scheduler, job, ROUTE_DELAY);
635 }
636 this->net_changes_lock->unlock(this->net_changes_lock);
637}
638
507f26f6 639/**
c6b40158
TB
640 * check if the given IP is known as virtual IP and currently installed
641 *
642 * this function will also return TRUE if the virtual IP entry disappeared.
643 * in that case the returned entry will be NULL.
644 *
a25d536e 645 * this->lock must be held when calling this function
507f26f6 646 */
c6b40158
TB
647static bool is_vip_installed_or_gone(private_kernel_netlink_net_t *this,
648 host_t *ip, addr_map_entry_t **entry)
507f26f6 649{
c6b40158
TB
650 addr_map_entry_t lookup = {
651 .ip = ip,
652 };
7daf5226 653
c6b40158
TB
654 *entry = this->vips->get_match(this->vips, &lookup,
655 (void*)addr_map_entry_match);
656 if (*entry == NULL)
657 { /* the virtual IP disappeared */
658 return TRUE;
507f26f6 659 }
c6b40158
TB
660 return (*entry)->addr->installed;
661}
7daf5226 662
c6b40158
TB
663/**
664 * check if the given IP is known as virtual IP
665 *
a25d536e 666 * this->lock must be held when calling this function
c6b40158
TB
667 */
668static bool is_known_vip(private_kernel_netlink_net_t *this, host_t *ip)
669{
670 addr_map_entry_t lookup = {
671 .ip = ip,
672 };
673
674 return this->vips->get_match(this->vips, &lookup,
675 (void*)addr_map_entry_match) != NULL;
507f26f6
TB
676}
677
1f97e1aa
TB
678/**
679 * Add an address map entry
680 */
c6b40158
TB
681static void addr_map_entry_add(hashtable_t *map, addr_entry_t *addr,
682 iface_entry_t *iface)
1f97e1aa
TB
683{
684 addr_map_entry_t *entry;
685
1f97e1aa
TB
686 INIT(entry,
687 .ip = addr->ip,
c6b40158 688 .addr = addr,
1f97e1aa
TB
689 .iface = iface,
690 );
c6b40158 691 entry = map->put(map, entry, entry);
1f97e1aa
TB
692 free(entry);
693}
694
695/**
c6b40158 696 * Remove an address map entry
1f97e1aa 697 */
c6b40158
TB
698static void addr_map_entry_remove(hashtable_t *map, addr_entry_t *addr,
699 iface_entry_t *iface)
1f97e1aa
TB
700{
701 addr_map_entry_t *entry, lookup = {
702 .ip = addr->ip,
c6b40158 703 .addr = addr,
1f97e1aa
TB
704 .iface = iface,
705 };
706
c6b40158 707 entry = map->remove(map, &lookup);
1f97e1aa
TB
708 free(entry);
709}
710
bfc595a3
TB
711/**
712 * Check if an address or net (addr with prefix net bits) is in
713 * subnet (net with net_len net bits)
714 */
715static bool addr_in_subnet(chunk_t addr, int prefix, chunk_t net, int net_len)
716{
717 static const u_char mask[] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
718 int byte = 0;
719
720 if (net_len == 0)
721 { /* any address matches a /0 network */
722 return TRUE;
723 }
724 if (addr.len != net.len || net_len > 8 * net.len || prefix < net_len)
725 {
726 return FALSE;
727 }
728 /* scan through all bytes in network order */
729 while (net_len > 0)
730 {
731 if (net_len < 8)
732 {
733 return (mask[net_len] & addr.ptr[byte]) == (mask[net_len] & net.ptr[byte]);
734 }
735 else
736 {
737 if (addr.ptr[byte] != net.ptr[byte])
738 {
739 return FALSE;
740 }
741 byte++;
742 net_len -= 8;
743 }
744 }
745 return TRUE;
746}
747
748/**
749 * Check if the given address is in subnet (net with net_len net bits)
750 */
751static bool host_in_subnet(host_t *host, chunk_t net, int net_len)
752{
753 chunk_t addr;
754
755 addr = host->get_address(host);
756 return addr_in_subnet(addr, addr.len * 8, net, net_len);
757}
758
29607690 759/**
3bf98189
TB
760 * Determine the type or scope of the given unicast IP address. This is not
761 * the same thing returned in rtm_scope/ifa_scope.
762 *
763 * We use return values as defined in RFC 6724 (referring to RFC 4291).
764 */
765static u_char get_scope(host_t *ip)
766{
767 chunk_t addr;
768
769 addr = ip->get_address(ip);
770 switch (addr.len)
771 {
772 case 4:
773 /* we use the mapping defined in RFC 6724, 3.2 */
774 if (addr.ptr[0] == 127)
775 { /* link-local, same as the IPv6 loopback address */
776 return 2;
777 }
778 if (addr.ptr[0] == 169 && addr.ptr[1] == 254)
779 { /* link-local */
780 return 2;
781 }
782 break;
783 case 16:
cd6b2af3 784 if (IN6_IS_ADDR_LOOPBACK((struct in6_addr*)addr.ptr))
3bf98189
TB
785 { /* link-local, according to RFC 4291, 2.5.3 */
786 return 2;
787 }
cd6b2af3 788 if (IN6_IS_ADDR_LINKLOCAL((struct in6_addr*)addr.ptr))
3bf98189
TB
789 {
790 return 2;
791 }
cd6b2af3 792 if (IN6_IS_ADDR_SITELOCAL((struct in6_addr*)addr.ptr))
3bf98189
TB
793 { /* deprecated, according to RFC 4291, 2.5.7 */
794 return 5;
795 }
796 break;
797 default:
798 break;
799 }
800 /* global */
801 return 14;
802}
803
7a40162c
TB
804/**
805 * Determine the label of the given unicast IP address.
806 *
807 * We currently only support the default table given in RFC 6724:
808 *
809 * Prefix Precedence Label
810 * ::1/128 50 0
811 * ::/0 40 1
812 * ::ffff:0:0/96 35 4
813 * 2002::/16 30 2
814 * 2001::/32 5 5
815 * fc00::/7 3 13
816 * ::/96 1 3
817 * fec0::/10 1 11
818 * 3ffe::/16 1 12
819 */
820static u_char get_label(host_t *ip)
821{
822 struct {
823 chunk_t net;
824 u_char prefix;
825 u_char label;
826 } priorities[] = {
827 /* priority table ordered by prefix */
828 /* ::1/128 */
829 { chunk_from_chars(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
830 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01), 128, 0 },
831 /* ::ffff:0:0/96 */
832 { chunk_from_chars(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
833 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00), 96, 4 },
834 /* ::/96 */
835 { chunk_from_chars(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
836 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 96, 3 },
837 /* 2001::/32 */
838 { chunk_from_chars(0x20, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
839 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 32, 5 },
840 /* 2002::/16 */
841 { chunk_from_chars(0x20, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
842 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 16, 2 },
843 /* 3ffe::/16 */
844 { chunk_from_chars(0x3f, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
845 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 16, 12 },
846 /* fec0::/10 */
847 { chunk_from_chars(0xfe, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
848 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 10, 11 },
849 /* fc00::/7 */
850 { chunk_from_chars(0xfc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
851 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 7, 13 },
852 };
853 int i;
854
855 for (i = 0; i < countof(priorities); i++)
856 {
857 if (host_in_subnet(ip, priorities[i].net, priorities[i].prefix))
858 {
859 return priorities[i].label;
860 }
861 }
862 /* ::/0 */
863 return 1;
864}
865
3bf98189
TB
866/**
867 * Returns the length of the common prefix in bits up to the length of a's
868 * prefix, defined by RFC 6724 as the portion of the address not including the
869 * interface ID, which is 64-bit for most unicast addresses (see RFC 4291).
870 */
871static u_char common_prefix(host_t *a, host_t *b)
872{
873 chunk_t aa, ba;
874 u_char byte, bits = 0, match;
875
876 aa = a->get_address(a);
877 ba = b->get_address(b);
878 for (byte = 0; byte < 8; byte++)
879 {
880 if (aa.ptr[byte] != ba.ptr[byte])
881 {
882 match = aa.ptr[byte] ^ ba.ptr[byte];
883 for (bits = 8; match; match >>= 1)
884 {
885 bits--;
886 }
887 break;
888 }
889 }
890 return byte * 8 + bits;
891}
892
893/**
894 * Compare two IP addresses and return TRUE if the second address is the better
895 * choice of the two to reach the destination.
896 * For IPv6 we approximately follow RFC 6724.
897 */
898static bool is_address_better(private_kernel_netlink_net_t *this,
899 addr_entry_t *a, addr_entry_t *b, host_t *d)
900{
7a40162c 901 u_char sa, sb, sd, la, lb, ld, pa, pb;
3bf98189
TB
902
903 /* rule 2: prefer appropriate scope */
904 if (d)
905 {
906 sa = get_scope(a->ip);
907 sb = get_scope(b->ip);
908 sd = get_scope(d);
909 if (sa < sb)
910 {
911 return sa < sd;
912 }
913 else if (sb < sa)
914 {
915 return sb >= sd;
916 }
917 }
918 if (a->ip->get_family(a->ip) == AF_INET)
919 { /* stop here for IPv4, default to addresses found earlier */
920 return FALSE;
921 }
922 /* rule 3: avoid deprecated addresses (RFC 4862) */
923 if ((a->flags & IFA_F_DEPRECATED) != (b->flags & IFA_F_DEPRECATED))
924 {
925 return a->flags & IFA_F_DEPRECATED;
926 }
927 /* rule 4 is not applicable as we don't know if an address is a home or
928 * care-of addresses.
929 * rule 5 does not apply as we only compare addresses from one interface
3bf98189 930 */
7a40162c
TB
931 /* rule 6: prefer matching label */
932 if (d)
933 {
934 la = get_label(a->ip);
935 lb = get_label(b->ip);
936 ld = get_label(d);
937 if (la == ld && lb != ld)
938 {
939 return FALSE;
940 }
941 else if (lb == ld && la != ld)
942 {
943 return TRUE;
944 }
945 }
3bf98189
TB
946 /* rule 7: prefer temporary addresses (WE REVERSE THIS BY DEFAULT!) */
947 if ((a->flags & IFA_F_TEMPORARY) != (b->flags & IFA_F_TEMPORARY))
948 {
949 if (this->prefer_temporary_addrs)
950 {
951 return b->flags & IFA_F_TEMPORARY;
952 }
953 return a->flags & IFA_F_TEMPORARY;
954 }
955 /* rule 8: use longest matching prefix */
956 if (d)
957 {
958 pa = common_prefix(a->ip, d);
959 pb = common_prefix(b->ip, d);
960 if (pa != pb)
961 {
962 return pb > pa;
963 }
964 }
965 /* default to addresses found earlier */
966 return FALSE;
967}
968
969/**
bfc595a3
TB
970 * Get a non-virtual IP address on the given interfaces and optionally in a
971 * given subnet.
3bf98189
TB
972 *
973 * If a candidate address is given, we first search for that address and if not
da6d86dd 974 * found return the address as above.
3bf98189 975 * Returned host is a clone, has to be freed by caller.
a25d536e 976 *
3bf98189 977 * this->lock must be held when calling this function.
29607690 978 */
bfc595a3
TB
979static host_t *get_matching_address(private_kernel_netlink_net_t *this,
980 int *ifindex, int family, chunk_t net,
981 uint8_t mask, host_t *dest,
982 host_t *candidate)
29607690 983{
bfc595a3 984 enumerator_t *ifaces, *addrs;
29607690 985 iface_entry_t *iface;
3bf98189 986 addr_entry_t *addr, *best = NULL;
bfc595a3 987 bool candidate_matched = FALSE;
29607690 988
bfc595a3
TB
989 ifaces = this->ifaces->create_enumerator(this->ifaces);
990 while (ifaces->enumerate(ifaces, &iface))
29607690 991 {
bfc595a3
TB
992 if (iface->usable && (!ifindex || iface->ifindex == *ifindex))
993 { /* only use matching interfaces not excluded by config */
29607690
TB
994 addrs = iface->addrs->create_enumerator(iface->addrs);
995 while (addrs->enumerate(addrs, &addr))
996 {
3bf98189
TB
997 if (addr->refcount ||
998 addr->ip->get_family(addr->ip) != family)
999 { /* ignore virtual IP addresses and ensure family matches */
da6d86dd
TB
1000 continue;
1001 }
bfc595a3
TB
1002 if (net.ptr && !host_in_subnet(addr->ip, net, mask))
1003 { /* optionally match a subnet */
1004 continue;
1005 }
3bf98189
TB
1006 if (candidate && candidate->ip_equals(candidate, addr->ip))
1007 { /* stop if we find the candidate */
1008 best = addr;
bfc595a3 1009 candidate_matched = TRUE;
3bf98189
TB
1010 break;
1011 }
1012 else if (!best || is_address_better(this, best, addr, dest))
da6d86dd 1013 {
3bf98189 1014 best = addr;
29607690
TB
1015 }
1016 }
1017 addrs->destroy(addrs);
bfc595a3
TB
1018 if (ifindex || candidate_matched)
1019 {
1020 break;
1021 }
29607690
TB
1022 }
1023 }
bfc595a3 1024 ifaces->destroy(ifaces);
3bf98189 1025 return best ? best->ip->clone(best->ip) : NULL;
29607690
TB
1026}
1027
bfc595a3
TB
1028/**
1029 * Get a non-virtual IP address on the given interface.
1030 *
1031 * If a candidate address is given, we first search for that address and if not
1032 * found return the address as above.
1033 * Returned host is a clone, has to be freed by caller.
1034 *
1035 * this->lock must be held when calling this function.
1036 */
1037static host_t *get_interface_address(private_kernel_netlink_net_t *this,
1038 int ifindex, int family, host_t *dest,
1039 host_t *candidate)
1040{
1041 return get_matching_address(this, &ifindex, family, chunk_empty, 0, dest,
1042 candidate);
1043}
1044
1045/**
1046 * Get a non-virtual IP address in the given subnet.
1047 *
1048 * If a candidate address is given, we first search for that address and if not
1049 * found return the address as above.
1050 * Returned host is a clone, has to be freed by caller.
1051 *
1052 * this->lock must be held when calling this function.
1053 */
1054static host_t *get_subnet_address(private_kernel_netlink_net_t *this,
1055 int family, chunk_t net, uint8_t mask,
1056 host_t *dest, host_t *candidate)
1057{
1058 return get_matching_address(this, NULL, family, net, mask, dest, candidate);
1059}
1060
507f26f6 1061/**
ba26508d 1062 * callback function that raises the delayed roam event
507f26f6 1063 */
77d4a028 1064static job_requeue_t roam_event(private_kernel_netlink_net_t *this)
ba26508d 1065{
77d4a028
TB
1066 bool address;
1067
1068 this->roam_lock->lock(this->roam_lock);
1069 address = this->roam_address;
1070 this->roam_address = FALSE;
1071 this->roam_lock->unlock(this->roam_lock);
8394ea2a 1072 charon->kernel->roam(charon->kernel, address);
ba26508d
TB
1073 return JOB_REQUEUE_NONE;
1074}
1075
1076/**
1077 * fire a roaming event. we delay it for a bit and fire only one event
1078 * for multiple calls. otherwise we would create too many events.
1079 */
1080static void fire_roam_event(private_kernel_netlink_net_t *this, bool address)
507f26f6 1081{
de578445 1082 timeval_t now;
ba26508d 1083 job_t *job;
7daf5226 1084
37873f99
TB
1085 if (!this->roam_events)
1086 {
1087 return;
1088 }
1089
de578445 1090 time_monotonic(&now);
4134108c 1091 this->roam_lock->lock(this->roam_lock);
11f46853 1092 this->roam_address |= address;
4134108c 1093 if (!timercmp(&now, &this->next_roam, >))
507f26f6 1094 {
4134108c
TB
1095 this->roam_lock->unlock(this->roam_lock);
1096 return;
507f26f6 1097 }
eecd41e3 1098 timeval_add_ms(&now, ROAM_DELAY);
4134108c
TB
1099 this->next_roam = now;
1100 this->roam_lock->unlock(this->roam_lock);
1101
1102 job = (job_t*)callback_job_create((callback_job_cb_t)roam_event,
77d4a028 1103 this, NULL, NULL);
4134108c 1104 lib->scheduler->schedule_job_ms(lib->scheduler, job, ROAM_DELAY);
507f26f6
TB
1105}
1106
940e1b0f
TB
1107/**
1108 * check if an interface with a given index is up and usable
c6b40158 1109 *
a25d536e 1110 * this->lock must be locked when calling this function
940e1b0f
TB
1111 */
1112static bool is_interface_up_and_usable(private_kernel_netlink_net_t *this,
1113 int index)
1114{
1115 iface_entry_t *iface;
1116
2e4d110d
TB
1117 if (this->ifaces->find_first(this->ifaces, iface_entry_by_index,
1118 (void**)&iface, index))
940e1b0f
TB
1119 {
1120 return iface_entry_up_and_usable(iface);
1121 }
1122 return FALSE;
1123}
1124
c6b40158
TB
1125/**
1126 * unregister the current addr_entry_t from the hashtable it is stored in
1127 *
a25d536e 1128 * this->lock must be locked when calling this function
c6b40158 1129 */
8a2e4d4a
TB
1130CALLBACK(addr_entry_unregister, void,
1131 addr_entry_t *addr, va_list args)
c6b40158 1132{
8a2e4d4a
TB
1133 private_kernel_netlink_net_t *this;
1134 iface_entry_t *iface;
1135
1136 VA_ARGS_VGET(args, iface, this);
c6b40158
TB
1137 if (addr->refcount)
1138 {
1139 addr_map_entry_remove(this->vips, addr, iface);
1140 this->condvar->broadcast(this->condvar);
1141 return;
1142 }
1143 addr_map_entry_remove(this->addrs, addr, iface);
1144}
1145
507f26f6
TB
1146/**
1147 * process RTM_NEWLINK/RTM_DELLINK from kernel
1148 */
1149static void process_link(private_kernel_netlink_net_t *this,
1150 struct nlmsghdr *hdr, bool event)
1151{
4c438cf0 1152 struct ifinfomsg* msg = NLMSG_DATA(hdr);
507f26f6
TB
1153 struct rtattr *rta = IFLA_RTA(msg);
1154 size_t rtasize = IFLA_PAYLOAD (hdr);
e13389a7 1155 enumerator_t *enumerator;
507f26f6
TB
1156 iface_entry_t *current, *entry = NULL;
1157 char *name = NULL;
f834249c 1158 bool update = FALSE, update_routes = FALSE;
7daf5226 1159
f834249c 1160 while (RTA_OK(rta, rtasize))
507f26f6
TB
1161 {
1162 switch (rta->rta_type)
1163 {
1164 case IFLA_IFNAME:
1165 name = RTA_DATA(rta);
1166 break;
1167 }
1168 rta = RTA_NEXT(rta, rtasize);
1169 }
1170 if (!name)
1171 {
1172 name = "(unknown)";
1173 }
7daf5226 1174
a25d536e 1175 this->lock->write_lock(this->lock);
507f26f6
TB
1176 switch (hdr->nlmsg_type)
1177 {
1178 case RTM_NEWLINK:
1179 {
2e4d110d
TB
1180 if (!this->ifaces->find_first(this->ifaces, iface_entry_by_index,
1181 (void**)&entry, msg->ifi_index))
507f26f6 1182 {
c6b40158
TB
1183 INIT(entry,
1184 .ifindex = msg->ifi_index,
1185 .addrs = linked_list_create(),
c6b40158 1186 );
507f26f6
TB
1187 this->ifaces->insert_last(this->ifaces, entry);
1188 }
f526b35c 1189 strncpy(entry->ifname, name, IFNAMSIZ);
507f26f6 1190 entry->ifname[IFNAMSIZ-1] = '\0';
062a34e7
TB
1191 entry->usable = charon->kernel->is_interface_usable(charon->kernel,
1192 name);
940e1b0f 1193 if (event && entry->usable)
507f26f6
TB
1194 {
1195 if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
1196 {
f834249c 1197 update = update_routes = TRUE;
507f26f6
TB
1198 DBG1(DBG_KNL, "interface %s activated", name);
1199 }
1200 if ((entry->flags & IFF_UP) && !(msg->ifi_flags & IFF_UP))
1201 {
1202 update = TRUE;
1203 DBG1(DBG_KNL, "interface %s deactivated", name);
1204 }
1205 }
1206 entry->flags = msg->ifi_flags;
507f26f6
TB
1207 break;
1208 }
1209 case RTM_DELLINK:
1210 {
e13389a7
MW
1211 enumerator = this->ifaces->create_enumerator(this->ifaces);
1212 while (enumerator->enumerate(enumerator, &current))
507f26f6
TB
1213 {
1214 if (current->ifindex == msg->ifi_index)
1215 {
940e1b0f 1216 if (event && current->usable)
7b218736
MP
1217 {
1218 update = TRUE;
1219 DBG1(DBG_KNL, "interface %s deleted", current->ifname);
1220 }
c6b40158
TB
1221 /* TODO: move virtual IPs installed on this interface to
1222 * another interface? */
7b218736 1223 this->ifaces->remove_at(this->ifaces, enumerator);
1f97e1aa 1224 current->addrs->invoke_function(current->addrs,
8a2e4d4a 1225 addr_entry_unregister, current, this);
7b218736 1226 iface_entry_destroy(current);
507f26f6
TB
1227 break;
1228 }
1229 }
e13389a7 1230 enumerator->destroy(enumerator);
507f26f6
TB
1231 break;
1232 }
1233 }
a25d536e 1234 this->lock->unlock(this->lock);
7daf5226 1235
f834249c
TB
1236 if (update_routes && event)
1237 {
c732e220 1238 queue_route_reinstall(this, strdup(name));
f834249c
TB
1239 }
1240
507f26f6
TB
1241 if (update && event)
1242 {
ba26508d 1243 fire_roam_event(this, TRUE);
507f26f6
TB
1244 }
1245}
1246
1247/**
1248 * process RTM_NEWADDR/RTM_DELADDR from kernel
1249 */
1250static void process_addr(private_kernel_netlink_net_t *this,
1251 struct nlmsghdr *hdr, bool event)
1252{
4c438cf0 1253 struct ifaddrmsg* msg = NLMSG_DATA(hdr);
507f26f6
TB
1254 struct rtattr *rta = IFA_RTA(msg);
1255 size_t rtasize = IFA_PAYLOAD (hdr);
1256 host_t *host = NULL;
507f26f6 1257 iface_entry_t *iface;
507f26f6 1258 chunk_t local = chunk_empty, address = chunk_empty;
f834249c 1259 char *route_ifname = NULL;
507f26f6 1260 bool update = FALSE, found = FALSE, changed = FALSE;
7daf5226 1261
f834249c 1262 while (RTA_OK(rta, rtasize))
507f26f6
TB
1263 {
1264 switch (rta->rta_type)
1265 {
1266 case IFA_LOCAL:
1267 local.ptr = RTA_DATA(rta);
1268 local.len = RTA_PAYLOAD(rta);
1269 break;
1270 case IFA_ADDRESS:
1271 address.ptr = RTA_DATA(rta);
1272 address.len = RTA_PAYLOAD(rta);
1273 break;
1274 }
1275 rta = RTA_NEXT(rta, rtasize);
1276 }
7daf5226 1277
507f26f6
TB
1278 /* For PPP interfaces, we need the IFA_LOCAL address,
1279 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
1280 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
1281 if (local.ptr)
1282 {
1283 host = host_create_from_chunk(msg->ifa_family, local, 0);
1284 }
1285 else if (address.ptr)
1286 {
1287 host = host_create_from_chunk(msg->ifa_family, address, 0);
1288 }
7daf5226 1289
507f26f6
TB
1290 if (host == NULL)
1291 { /* bad family? */
1292 return;
1293 }
7daf5226 1294
a25d536e 1295 this->lock->write_lock(this->lock);
2e4d110d
TB
1296 if (this->ifaces->find_first(this->ifaces, iface_entry_by_index,
1297 (void**)&iface, msg->ifa_index))
507f26f6 1298 {
c6b40158
TB
1299 addr_map_entry_t *entry, lookup = {
1300 .ip = host,
1301 .iface = iface,
1302 };
1303 addr_entry_t *addr;
1304
1305 entry = this->vips->get(this->vips, &lookup);
1306 if (entry)
507f26f6 1307 {
c6b40158
TB
1308 if (hdr->nlmsg_type == RTM_NEWADDR)
1309 { /* mark as installed and signal waiting threads */
1310 entry->addr->installed = TRUE;
1311 }
1312 else
1313 { /* the address was already marked as uninstalled */
1314 addr = entry->addr;
1315 iface->addrs->remove(iface->addrs, addr, NULL);
1316 addr_map_entry_remove(this->vips, addr, iface);
1317 addr_entry_destroy(addr);
1318 }
1319 /* no roam events etc. for virtual IPs */
1320 this->condvar->broadcast(this->condvar);
a25d536e 1321 this->lock->unlock(this->lock);
c6b40158
TB
1322 host->destroy(host);
1323 return;
1324 }
1325 entry = this->addrs->get(this->addrs, &lookup);
1326 if (entry)
1327 {
1328 if (hdr->nlmsg_type == RTM_DELADDR)
507f26f6 1329 {
c6b40158
TB
1330 found = TRUE;
1331 addr = entry->addr;
1332 iface->addrs->remove(iface->addrs, addr, NULL);
1333 if (iface->usable)
507f26f6 1334 {
c6b40158
TB
1335 changed = TRUE;
1336 DBG1(DBG_KNL, "%H disappeared from %s", host,
1337 iface->ifname);
507f26f6 1338 }
c6b40158
TB
1339 addr_map_entry_remove(this->addrs, addr, iface);
1340 addr_entry_destroy(addr);
507f26f6 1341 }
c6b40158
TB
1342 }
1343 else
1344 {
507f26f6
TB
1345 if (hdr->nlmsg_type == RTM_NEWADDR)
1346 {
c6b40158
TB
1347 found = TRUE;
1348 changed = TRUE;
1349 route_ifname = strdup(iface->ifname);
1350 INIT(addr,
1351 .ip = host->clone(host),
3bf98189 1352 .flags = msg->ifa_flags,
c6b40158
TB
1353 .scope = msg->ifa_scope,
1354 );
1355 iface->addrs->insert_last(iface->addrs, addr);
1356 addr_map_entry_add(this->addrs, addr, iface);
1357 if (event && iface->usable)
507f26f6 1358 {
c6b40158 1359 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
507f26f6
TB
1360 }
1361 }
c6b40158
TB
1362 }
1363 if (found && (iface->flags & IFF_UP))
1364 {
1365 update = TRUE;
1366 }
1367 if (!iface->usable)
1368 { /* ignore events for interfaces excluded by config */
1369 update = changed = FALSE;
507f26f6
TB
1370 }
1371 }
a25d536e 1372 this->lock->unlock(this->lock);
f834249c
TB
1373
1374 if (update && event && route_ifname)
1375 {
c732e220 1376 queue_route_reinstall(this, route_ifname);
f834249c
TB
1377 }
1378 else
1379 {
1380 free(route_ifname);
1381 }
507f26f6 1382 host->destroy(host);
7daf5226 1383
507f26f6
TB
1384 /* send an update to all IKE_SAs */
1385 if (update && event && changed)
1386 {
ba26508d 1387 fire_roam_event(this, TRUE);
507f26f6
TB
1388 }
1389}
1390
1391/**
1392 * process RTM_NEWROUTE and RTM_DELROUTE from kernel
1393 */
1394static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *hdr)
1395{
4c438cf0 1396 struct rtmsg* msg = NLMSG_DATA(hdr);
507f26f6
TB
1397 struct rtattr *rta = RTM_RTA(msg);
1398 size_t rtasize = RTM_PAYLOAD(hdr);
b12c53ce 1399 uint32_t rta_oif = 0;
507f26f6 1400 host_t *host = NULL;
7daf5226 1401
ec0c756d
TB
1402 /* ignore routes added by us or in the local routing table (local addrs) */
1403 if (msg->rtm_table && (msg->rtm_table == this->routing_table ||
1404 msg->rtm_table == RT_TABLE_LOCAL))
85be7e5b
MW
1405 {
1406 return;
1407 }
8ec51f83
TB
1408 else if (msg->rtm_flags & RTM_F_CLONED)
1409 { /* ignore cached routes, seem to be created a lot for IPv6 */
1410 return;
1411 }
7daf5226 1412
507f26f6
TB
1413 while (RTA_OK(rta, rtasize))
1414 {
1415 switch (rta->rta_type)
1416 {
1417 case RTA_PREFSRC:
862ef49f 1418 DESTROY_IF(host);
507f26f6
TB
1419 host = host_create_from_chunk(msg->rtm_family,
1420 chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta)), 0);
1421 break;
29607690
TB
1422 case RTA_OIF:
1423 if (RTA_PAYLOAD(rta) == sizeof(rta_oif))
1424 {
b12c53ce 1425 rta_oif = *(uint32_t*)RTA_DATA(rta);
29607690
TB
1426 }
1427 break;
507f26f6
TB
1428 }
1429 rta = RTA_NEXT(rta, rtasize);
1430 }
a25d536e 1431 this->lock->read_lock(this->lock);
940e1b0f
TB
1432 if (rta_oif && !is_interface_up_and_usable(this, rta_oif))
1433 { /* ignore route changes for interfaces that are ignored or down */
a25d536e 1434 this->lock->unlock(this->lock);
940e1b0f
TB
1435 DESTROY_IF(host);
1436 return;
1437 }
29607690
TB
1438 if (!host && rta_oif)
1439 {
3bf98189
TB
1440 host = get_interface_address(this, rta_oif, msg->rtm_family,
1441 NULL, NULL);
29607690 1442 }
a25d536e
TB
1443 if (!host || is_known_vip(this, host))
1444 { /* ignore routes added for virtual IPs */
1445 this->lock->unlock(this->lock);
1446 DESTROY_IF(host);
1447 return;
507f26f6 1448 }
a25d536e
TB
1449 this->lock->unlock(this->lock);
1450 fire_roam_event(this, FALSE);
1451 host->destroy(host);
507f26f6
TB
1452}
1453
1454/**
1455 * Receives events from kernel
1456 */
f4f77d74
MW
1457static bool receive_events(private_kernel_netlink_net_t *this, int fd,
1458 watcher_event_t event)
507f26f6 1459{
ec331a7d 1460 char response[1536];
507f26f6
TB
1461 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
1462 struct sockaddr_nl addr;
1463 socklen_t addr_len = sizeof(addr);
4a5a5dd2 1464 int len;
7daf5226 1465
f4f77d74
MW
1466 len = recvfrom(this->socket_events, response, sizeof(response),
1467 MSG_DONTWAIT, (struct sockaddr*)&addr, &addr_len);
507f26f6
TB
1468 if (len < 0)
1469 {
1470 switch (errno)
1471 {
1472 case EINTR:
1473 /* interrupted, try again */
f4f77d74 1474 return TRUE;
507f26f6
TB
1475 case EAGAIN:
1476 /* no data ready, select again */
f4f77d74 1477 return TRUE;
507f26f6 1478 default:
a0178fe2
TB
1479 DBG1(DBG_KNL, "unable to receive from RT event socket %s (%d)",
1480 strerror(errno), errno);
507f26f6 1481 sleep(1);
f4f77d74 1482 return TRUE;
507f26f6
TB
1483 }
1484 }
7daf5226 1485
507f26f6
TB
1486 if (addr.nl_pid != 0)
1487 { /* not from kernel. not interested, try another one */
f4f77d74 1488 return TRUE;
507f26f6 1489 }
7daf5226 1490
507f26f6
TB
1491 while (NLMSG_OK(hdr, len))
1492 {
1493 /* looks good so far, dispatch netlink message */
1494 switch (hdr->nlmsg_type)
1495 {
1496 case RTM_NEWADDR:
1497 case RTM_DELADDR:
1498 process_addr(this, hdr, TRUE);
507f26f6
TB
1499 break;
1500 case RTM_NEWLINK:
1501 case RTM_DELLINK:
1502 process_link(this, hdr, TRUE);
507f26f6
TB
1503 break;
1504 case RTM_NEWROUTE:
1505 case RTM_DELROUTE:
1506 if (this->process_route)
1507 {
1508 process_route(this, hdr);
1509 }
1510 break;
1511 default:
1512 break;
1513 }
1514 hdr = NLMSG_NEXT(hdr, len);
1515 }
f4f77d74 1516 return TRUE;
507f26f6
TB
1517}
1518
1519/** enumerator over addresses */
1520typedef struct {
1521 private_kernel_netlink_net_t* this;
4106aea8
TB
1522 /** which addresses to enumerate */
1523 kernel_address_type_t which;
507f26f6
TB
1524} address_enumerator_t;
1525
525cc46c
TB
1526CALLBACK(address_enumerator_destroy, void,
1527 address_enumerator_t *data)
507f26f6 1528{
a25d536e 1529 data->this->lock->unlock(data->this->lock);
507f26f6
TB
1530 free(data);
1531}
1532
525cc46c
TB
1533CALLBACK(filter_addresses, bool,
1534 address_enumerator_t *data, enumerator_t *orig, va_list args)
507f26f6 1535{
525cc46c
TB
1536 addr_entry_t *addr;
1537 host_t **out;
1538
1539 VA_ARGS_VGET(args, out);
1540
1541 while (orig->enumerate(orig, &addr))
1542 {
1543 if (!(data->which & ADDR_TYPE_VIRTUAL) && addr->refcount)
1544 { /* skip virtual interfaces added by us */
1545 continue;
1546 }
1547 if (!(data->which & ADDR_TYPE_REGULAR) && !addr->refcount)
1548 { /* address is regular, but not requested */
1549 continue;
1550 }
1551 if (addr->scope >= RT_SCOPE_LINK)
1552 { /* skip addresses with a unusable scope */
1553 continue;
1554 }
1555 *out = addr->ip;
1556 return TRUE;
507f26f6 1557 }
525cc46c 1558 return FALSE;
507f26f6
TB
1559}
1560
1561/**
1562 * enumerator constructor for interfaces
1563 */
887abfb1
MW
1564static enumerator_t *create_iface_enumerator(iface_entry_t *iface,
1565 address_enumerator_t *data)
507f26f6 1566{
887abfb1 1567 return enumerator_create_filter(
525cc46c
TB
1568 iface->addrs->create_enumerator(iface->addrs),
1569 filter_addresses, data, NULL);
507f26f6
TB
1570}
1571
525cc46c
TB
1572CALLBACK(filter_interfaces, bool,
1573 address_enumerator_t *data, enumerator_t *orig, va_list args)
507f26f6 1574{
525cc46c
TB
1575 iface_entry_t *iface, **out;
1576
1577 VA_ARGS_VGET(args, out);
1578
1579 while (orig->enumerate(orig, &iface))
1580 {
1581 if (!(data->which & ADDR_TYPE_IGNORED) && !iface->usable)
1582 { /* skip interfaces excluded by config */
1583 continue;
1584 }
1585 if (!(data->which & ADDR_TYPE_LOOPBACK) && (iface->flags & IFF_LOOPBACK))
1586 { /* ignore loopback devices */
1587 continue;
1588 }
1589 if (!(data->which & ADDR_TYPE_DOWN) && !(iface->flags & IFF_UP))
1590 { /* skip interfaces not up */
1591 continue;
1592 }
1593 *out = iface;
1594 return TRUE;
507f26f6 1595 }
525cc46c 1596 return FALSE;
507f26f6
TB
1597}
1598
887abfb1 1599METHOD(kernel_net_t, create_address_enumerator, enumerator_t*,
4106aea8 1600 private_kernel_netlink_net_t *this, kernel_address_type_t which)
507f26f6 1601{
1a2a8bff
MW
1602 address_enumerator_t *data;
1603
1604 INIT(data,
1605 .this = this,
1606 .which = which,
1607 );
7daf5226 1608
a25d536e 1609 this->lock->read_lock(this->lock);
507f26f6 1610 return enumerator_create_nested(
887abfb1
MW
1611 enumerator_create_filter(
1612 this->ifaces->create_enumerator(this->ifaces),
525cc46c 1613 filter_interfaces, data, NULL),
887abfb1 1614 (void*)create_iface_enumerator, data,
525cc46c 1615 address_enumerator_destroy);
507f26f6
TB
1616}
1617
9ba36c0f
TB
1618METHOD(kernel_net_t, get_interface_name, bool,
1619 private_kernel_netlink_net_t *this, host_t* ip, char **name)
507f26f6 1620{
1f97e1aa
TB
1621 addr_map_entry_t *entry, lookup = {
1622 .ip = ip,
1623 };
7daf5226 1624
645d7a5e
TB
1625 if (ip->is_anyaddr(ip))
1626 {
1627 return FALSE;
1628 }
a25d536e 1629 this->lock->read_lock(this->lock);
1f97e1aa
TB
1630 /* first try to find it on an up and usable interface */
1631 entry = this->addrs->get_match(this->addrs, &lookup,
1632 (void*)addr_map_entry_match_up_and_usable);
1633 if (entry)
507f26f6 1634 {
1f97e1aa 1635 if (name)
507f26f6 1636 {
1f97e1aa
TB
1637 *name = strdup(entry->iface->ifname);
1638 DBG2(DBG_KNL, "%H is on interface %s", ip, *name);
507f26f6 1639 }
a25d536e 1640 this->lock->unlock(this->lock);
1f97e1aa 1641 return TRUE;
507f26f6 1642 }
544c2e3d
MW
1643 /* in a second step, consider virtual IPs installed by us */
1644 entry = this->vips->get_match(this->vips, &lookup,
1645 (void*)addr_map_entry_match_up_and_usable);
1646 if (entry)
1647 {
1648 if (name)
1649 {
1650 *name = strdup(entry->iface->ifname);
5310f485 1651 DBG2(DBG_KNL, "virtual IP %H is on interface %s", ip, *name);
544c2e3d
MW
1652 }
1653 this->lock->unlock(this->lock);
1654 return TRUE;
1655 }
1f97e1aa
TB
1656 /* maybe it is installed on an ignored interface */
1657 entry = this->addrs->get_match(this->addrs, &lookup,
1658 (void*)addr_map_entry_match_up);
1659 if (!entry)
507f26f6 1660 {
1f97e1aa 1661 DBG2(DBG_KNL, "%H is not a local address or the interface is down", ip);
507f26f6 1662 }
a25d536e 1663 this->lock->unlock(this->lock);
1f97e1aa 1664 return FALSE;
507f26f6
TB
1665}
1666
1667/**
1668 * get the index of an interface by name
1669 */
1670static int get_interface_index(private_kernel_netlink_net_t *this, char* name)
1671{
507f26f6
TB
1672 iface_entry_t *iface;
1673 int ifindex = 0;
7daf5226 1674
507f26f6 1675 DBG2(DBG_KNL, "getting iface index for %s", name);
7daf5226 1676
a25d536e 1677 this->lock->read_lock(this->lock);
2e4d110d
TB
1678 if (this->ifaces->find_first(this->ifaces, iface_entry_by_name,
1679 (void**)&iface, name))
507f26f6 1680 {
c6b40158 1681 ifindex = iface->ifindex;
507f26f6 1682 }
a25d536e 1683 this->lock->unlock(this->lock);
507f26f6
TB
1684
1685 if (ifindex == 0)
1686 {
1687 DBG1(DBG_KNL, "unable to get interface index for %s", name);
1688 }
1689 return ifindex;
1690}
1691
66e9165b
TB
1692/**
1693 * get the name of an interface by index (allocated)
1694 */
1695static char *get_interface_name_by_index(private_kernel_netlink_net_t *this,
1696 int index)
1697{
1698 iface_entry_t *iface;
1699 char *name = NULL;
1700
1701 DBG2(DBG_KNL, "getting iface name for index %d", index);
1702
1703 this->lock->read_lock(this->lock);
2e4d110d
TB
1704 if (this->ifaces->find_first(this->ifaces, iface_entry_by_index,
1705 (void**)&iface, index))
66e9165b
TB
1706 {
1707 name = strdup(iface->ifname);
1708 }
1709 this->lock->unlock(this->lock);
1710
1711 if (!name)
1712 {
1713 DBG1(DBG_KNL, "unable to get interface name for %d", index);
1714 }
1715 return name;
1716}
1717
66253465
TB
1718/**
1719 * Store information about a route retrieved via RTNETLINK
1720 */
1721typedef struct {
1722 chunk_t gtw;
bfc595a3 1723 chunk_t pref_src;
66253465 1724 chunk_t dst;
bfc595a3 1725 chunk_t src;
66253465 1726 host_t *src_host;
b12c53ce 1727 uint8_t dst_len;
bfc595a3 1728 uint8_t src_len;
b12c53ce
AS
1729 uint32_t table;
1730 uint32_t oif;
1731 uint32_t priority;
66253465
TB
1732} rt_entry_t;
1733
1734/**
1735 * Free a route entry
1736 */
1737static void rt_entry_destroy(rt_entry_t *this)
1738{
1739 DESTROY_IF(this->src_host);
1740 free(this);
1741}
1742
6716c652
TB
1743/**
1744 * Check if the route received with RTM_NEWROUTE is usable based on its type.
1745 */
1746static bool route_usable(struct nlmsghdr *hdr)
1747{
1748 struct rtmsg *msg;
1749
1750 msg = NLMSG_DATA(hdr);
1751 switch (msg->rtm_type)
1752 {
1753 case RTN_BLACKHOLE:
1754 case RTN_UNREACHABLE:
1755 case RTN_PROHIBIT:
1756 case RTN_THROW:
1757 return FALSE;
1758 default:
1759 return TRUE;
1760 }
1761}
1762
66253465
TB
1763/**
1764 * Parse route received with RTM_NEWROUTE. The given rt_entry_t object will be
1765 * reused if not NULL.
1766 *
1767 * Returned chunks point to internal data of the Netlink message.
1768 */
1769static rt_entry_t *parse_route(struct nlmsghdr *hdr, rt_entry_t *route)
1770{
1771 struct rtattr *rta;
1772 struct rtmsg *msg;
1773 size_t rtasize;
1774
4c438cf0 1775 msg = NLMSG_DATA(hdr);
66253465
TB
1776 rta = RTM_RTA(msg);
1777 rtasize = RTM_PAYLOAD(hdr);
1778
1779 if (route)
1780 {
1781 route->gtw = chunk_empty;
bfc595a3 1782 route->pref_src = chunk_empty;
66253465
TB
1783 route->dst = chunk_empty;
1784 route->dst_len = msg->rtm_dst_len;
bfc595a3
TB
1785 route->src = chunk_empty;
1786 route->src_len = msg->rtm_src_len;
66253465
TB
1787 route->table = msg->rtm_table;
1788 route->oif = 0;
6b577902 1789 route->priority = 0;
66253465
TB
1790 }
1791 else
1792 {
1793 INIT(route,
1794 .dst_len = msg->rtm_dst_len,
bfc595a3 1795 .src_len = msg->rtm_src_len,
66253465
TB
1796 .table = msg->rtm_table,
1797 );
1798 }
1799
1800 while (RTA_OK(rta, rtasize))
1801 {
1802 switch (rta->rta_type)
1803 {
1804 case RTA_PREFSRC:
bfc595a3 1805 route->pref_src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
66253465
TB
1806 break;
1807 case RTA_GATEWAY:
1808 route->gtw = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1809 break;
1810 case RTA_DST:
1811 route->dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1812 break;
bfc595a3
TB
1813 case RTA_SRC:
1814 route->src = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
1815 break;
66253465
TB
1816 case RTA_OIF:
1817 if (RTA_PAYLOAD(rta) == sizeof(route->oif))
1818 {
b12c53ce 1819 route->oif = *(uint32_t*)RTA_DATA(rta);
66253465
TB
1820 }
1821 break;
6b577902
MW
1822 case RTA_PRIORITY:
1823 if (RTA_PAYLOAD(rta) == sizeof(route->priority))
1824 {
b12c53ce 1825 route->priority = *(uint32_t*)RTA_DATA(rta);
6b577902
MW
1826 }
1827 break;
66253465
TB
1828#ifdef HAVE_RTA_TABLE
1829 case RTA_TABLE:
1830 if (RTA_PAYLOAD(rta) == sizeof(route->table))
1831 {
b12c53ce 1832 route->table = *(uint32_t*)RTA_DATA(rta);
66253465
TB
1833 }
1834 break;
1835#endif /* HAVE_RTA_TABLE*/
1836 }
1837 rta = RTA_NEXT(rta, rtasize);
1838 }
1839 return route;
1840}
1841
507f26f6
TB
1842/**
1843 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
1844 */
1845static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
de7cb6de 1846 int prefix, bool nexthop, host_t *candidate,
99a57aa5 1847 char **iface, u_int recursion)
507f26f6 1848{
21bf86f7 1849 netlink_buf_t request;
507f26f6
TB
1850 struct nlmsghdr *hdr, *out, *current;
1851 struct rtmsg *msg;
1852 chunk_t chunk;
1853 size_t len;
66253465
TB
1854 linked_list_t *routes;
1855 rt_entry_t *route = NULL, *best = NULL;
d266e895 1856 enumerator_t *enumerator;
66253465 1857 host_t *addr = NULL;
de7cb6de
TB
1858 bool match_net;
1859 int family;
7daf5226 1860
cbd52e7d
TB
1861 if (recursion > MAX_ROUTE_RECURSION)
1862 {
1863 return NULL;
1864 }
de7cb6de
TB
1865 chunk = dest->get_address(dest);
1866 len = chunk.len * 8;
1867 prefix = prefix < 0 ? len : min(prefix, len);
1868 match_net = prefix != len;
cbd52e7d 1869
507f26f6
TB
1870 memset(&request, 0, sizeof(request));
1871
de7cb6de 1872 family = dest->get_family(dest);
0404a29b 1873 hdr = &request.hdr;
5be75c2c 1874 hdr->nlmsg_flags = NLM_F_REQUEST;
507f26f6
TB
1875 hdr->nlmsg_type = RTM_GETROUTE;
1876 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1877
4c438cf0 1878 msg = NLMSG_DATA(hdr);
de7cb6de 1879 msg->rtm_family = family;
6bd1216e
TB
1880 if (!match_net && this->rta_mark && this->routing_mark.value)
1881 {
1882 /* if our routing rule excludes packets with a certain mark we can
1883 * get the preferred route without having to dump all routes */
1884 chunk = chunk_from_thing(this->routing_mark.value);
1885 netlink_add_attribute(hdr, RTA_MARK, chunk, sizeof(request));
1886 }
1887 else if (family == AF_INET || this->rta_prefsrc_for_ipv6 ||
1888 this->routing_table || match_net)
1889 { /* kernels prior to 3.0 do not support RTA_PREFSRC for IPv6 routes.
1890 * as we want to ignore routes with virtual IPs we cannot use DUMP
1891 * if these routes are not installed in a separate table */
558691b3
MW
1892 if (this->install_routes)
1893 {
1894 hdr->nlmsg_flags |= NLM_F_DUMP;
1895 }
6bd1216e 1896 }
ce5b1708
MW
1897 if (candidate)
1898 {
1899 chunk = candidate->get_address(candidate);
395500b8
MW
1900 if (hdr->nlmsg_flags & NLM_F_DUMP)
1901 {
1902 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
1903 }
1904 else
1905 {
1906 netlink_add_attribute(hdr, RTA_SRC, chunk, sizeof(request));
1907 }
ce5b1708 1908 }
0ed9430d
TB
1909 /* we use this below to match against the routes */
1910 chunk = dest->get_address(dest);
de7cb6de
TB
1911 if (!match_net)
1912 {
de7cb6de
TB
1913 netlink_add_attribute(hdr, RTA_DST, chunk, sizeof(request));
1914 }
7daf5226 1915
507f26f6
TB
1916 if (this->socket->send(this->socket, hdr, &out, &len) != SUCCESS)
1917 {
de7cb6de
TB
1918 DBG2(DBG_KNL, "getting %s to reach %H/%d failed",
1919 nexthop ? "nexthop" : "address", dest, prefix);
507f26f6
TB
1920 return NULL;
1921 }
66253465 1922 routes = linked_list_create();
a25d536e 1923 this->lock->read_lock(this->lock);
36b7ba5e
MW
1924
1925 for (current = out; NLMSG_OK(current, len);
1926 current = NLMSG_NEXT(current, len))
507f26f6
TB
1927 {
1928 switch (current->nlmsg_type)
1929 {
1930 case NLMSG_DONE:
1931 break;
1932 case RTM_NEWROUTE:
1933 {
66253465 1934 rt_entry_t *other;
d266e895 1935 uintptr_t table;
7daf5226 1936
6716c652
TB
1937 if (!route_usable(current))
1938 {
1939 continue;
1940 }
66253465
TB
1941 route = parse_route(current, route);
1942
1943 table = (uintptr_t)route->table;
1944 if (this->rt_exclude->find_first(this->rt_exclude, NULL,
2e4d110d 1945 (void**)&table))
66253465 1946 { /* route is from an excluded routing table */
d266e895
TE
1947 continue;
1948 }
fb6c8591 1949 if (this->routing_table != 0 &&
66253465 1950 route->table == this->routing_table)
fb6c8591 1951 { /* route is from our own ipsec routing table */
36b7ba5e 1952 continue;
fb6c8591 1953 }
940e1b0f 1954 if (route->oif && !is_interface_up_and_usable(this, route->oif))
d1769942 1955 { /* interface is down */
36b7ba5e 1956 continue;
fb6c8591 1957 }
de7cb6de 1958 if (!addr_in_subnet(chunk, prefix, route->dst, route->dst_len))
d1769942 1959 { /* route destination does not contain dest */
36b7ba5e 1960 continue;
fb6c8591 1961 }
bfc595a3 1962 if (route->pref_src.ptr)
66253465
TB
1963 { /* verify source address, if any */
1964 host_t *src = host_create_from_chunk(msg->rtm_family,
bfc595a3 1965 route->pref_src, 0);
c6b40158 1966 if (src && is_known_vip(this, src))
66253465
TB
1967 { /* ignore routes installed by us */
1968 src->destroy(src);
1969 continue;
507f26f6 1970 }
66253465 1971 route->src_host = src;
fb6c8591 1972 }
3f4cc30b 1973 /* insert route, sorted by network prefix and priority */
66253465
TB
1974 enumerator = routes->create_enumerator(routes);
1975 while (enumerator->enumerate(enumerator, &other))
1976 {
3f4cc30b 1977 if (route->dst_len > other->dst_len)
6b577902
MW
1978 {
1979 break;
1980 }
3f4cc30b
TB
1981 if (route->dst_len == other->dst_len &&
1982 route->priority < other->priority)
507f26f6 1983 {
66253465 1984 break;
507f26f6
TB
1985 }
1986 }
66253465
TB
1987 routes->insert_before(routes, enumerator, route);
1988 enumerator->destroy(enumerator);
1989 route = NULL;
36b7ba5e 1990 continue;
507f26f6
TB
1991 }
1992 default:
507f26f6
TB
1993 continue;
1994 }
1995 break;
1996 }
66253465
TB
1997 if (route)
1998 {
1999 rt_entry_destroy(route);
2000 }
2001
2002 /* now we have a list of routes matching dest, sorted by net prefix.
2003 * we will look for source addresses for these routes and select the one
2004 * with the preferred source address, if possible */
2005 enumerator = routes->create_enumerator(routes);
2006 while (enumerator->enumerate(enumerator, &route))
2007 {
2008 if (route->src_host)
2009 { /* got a source address with the route, if no preferred source
2010 * is given or it matches we are done, as this is the best route */
2011 if (!candidate || candidate->ip_equals(candidate, route->src_host))
2012 {
2013 best = route;
2014 break;
2015 }
2016 else if (route->oif)
2017 { /* no match yet, maybe it is assigned to the same interface */
2018 host_t *src = get_interface_address(this, route->oif,
3bf98189 2019 msg->rtm_family, dest, candidate);
66253465
TB
2020 if (src && src->ip_equals(src, candidate))
2021 {
2022 route->src_host->destroy(route->src_host);
2023 route->src_host = src;
2024 best = route;
2025 break;
2026 }
2027 DESTROY_IF(src);
2028 }
2029 /* no luck yet with the source address. if this is the best (first)
2030 * route we store it as fallback in case we don't find a route with
2031 * the preferred source */
2032 best = best ?: route;
2033 continue;
2034 }
bfc595a3
TB
2035 if (route->src.ptr)
2036 { /* no src, but a source selector, try to find a matching address */
2037 route->src_host = get_subnet_address(this, msg->rtm_family,
2038 route->src, route->src_len, dest,
2039 candidate);
2040 if (route->src_host)
2041 { /* we handle this address the same as the one above */
2042 if (!candidate ||
2043 candidate->ip_equals(candidate, route->src_host))
2044 {
2045 best = route;
2046 break;
2047 }
2048 best = best ?: route;
2049 continue;
2050 }
2051 }
66253465
TB
2052 if (route->oif)
2053 { /* no src, but an interface - get address from it */
2054 route->src_host = get_interface_address(this, route->oif,
3bf98189 2055 msg->rtm_family, dest, candidate);
66253465 2056 if (route->src_host)
bfc595a3 2057 { /* more of the same */
66253465
TB
2058 if (!candidate ||
2059 candidate->ip_equals(candidate, route->src_host))
2060 {
2061 best = route;
2062 break;
2063 }
2064 best = best ?: route;
2065 continue;
2066 }
2067 }
2068 if (route->gtw.ptr)
2069 { /* no src, no iface, but a gateway - lookup src to reach gtw */
2070 host_t *gtw;
2071
2072 gtw = host_create_from_chunk(msg->rtm_family, route->gtw, 0);
5be88ca6
TB
2073 if (gtw && !gtw->ip_equals(gtw, dest))
2074 {
de7cb6de 2075 route->src_host = get_route(this, gtw, -1, FALSE, candidate,
99a57aa5 2076 iface, recursion + 1);
5be88ca6
TB
2077 }
2078 DESTROY_IF(gtw);
66253465
TB
2079 if (route->src_host)
2080 { /* more of the same */
2081 if (!candidate ||
2082 candidate->ip_equals(candidate, route->src_host))
2083 {
2084 best = route;
2085 break;
2086 }
2087 best = best ?: route;
2088 }
2089 }
2090 }
2091 enumerator->destroy(enumerator);
7daf5226 2092
507f26f6 2093 if (nexthop)
66e9165b 2094 { /* nexthop lookup, return gateway and oif if any */
99a57aa5
TB
2095 if (iface)
2096 {
2097 *iface = NULL;
2098 }
66253465
TB
2099 if (best || routes->get_first(routes, (void**)&best) == SUCCESS)
2100 {
2101 addr = host_create_from_chunk(msg->rtm_family, best->gtw, 0);
a63a7af1 2102 if (iface && best->oif)
66e9165b 2103 {
a63a7af1 2104 *iface = get_interface_name_by_index(this, best->oif);
66e9165b 2105 }
66253465 2106 }
de7cb6de
TB
2107 if (!addr && !match_net)
2108 { /* fallback to destination address */
2109 addr = dest->clone(dest);
2110 }
66253465
TB
2111 }
2112 else
507f26f6 2113 {
66253465 2114 if (best)
507f26f6 2115 {
66253465 2116 addr = best->src_host->clone(best->src_host);
507f26f6 2117 }
507f26f6 2118 }
a25d536e 2119 this->lock->unlock(this->lock);
66253465
TB
2120 routes->destroy_function(routes, (void*)rt_entry_destroy);
2121 free(out);
2122
2123 if (addr)
2124 {
66e9165b
TB
2125 if (nexthop && iface && *iface)
2126 {
2127 DBG2(DBG_KNL, "using %H as nexthop and %s as dev to reach %H/%d",
2128 addr, *iface, dest, prefix);
2129 }
2130 else
2131 {
2132 DBG2(DBG_KNL, "using %H as %s to reach %H/%d", addr,
2133 nexthop ? "nexthop" : "address", dest, prefix);
2134 }
66253465 2135 }
cbd52e7d 2136 else if (!recursion)
66253465 2137 {
de7cb6de
TB
2138 DBG2(DBG_KNL, "no %s found to reach %H/%d",
2139 nexthop ? "nexthop" : "address", dest, prefix);
66253465
TB
2140 }
2141 return addr;
507f26f6
TB
2142}
2143
887abfb1
MW
2144METHOD(kernel_net_t, get_source_addr, host_t*,
2145 private_kernel_netlink_net_t *this, host_t *dest, host_t *src)
507f26f6 2146{
99a57aa5 2147 return get_route(this, dest, -1, FALSE, src, NULL, 0);
507f26f6
TB
2148}
2149
887abfb1 2150METHOD(kernel_net_t, get_nexthop, host_t*,
99a57aa5
TB
2151 private_kernel_netlink_net_t *this, host_t *dest, int prefix, host_t *src,
2152 char **iface)
507f26f6 2153{
99a57aa5 2154 return get_route(this, dest, prefix, TRUE, src, iface, 0);
507f26f6
TB
2155}
2156
eac584a3
TB
2157/** enumerator over subnets */
2158typedef struct {
2159 enumerator_t public;
2160 private_kernel_netlink_net_t *private;
2161 /** message from the kernel */
2162 struct nlmsghdr *msg;
2163 /** current message from the kernel */
2164 struct nlmsghdr *current;
2165 /** remaining length */
2166 size_t len;
2167 /** last subnet enumerated */
2168 host_t *net;
24064741
TB
2169 /** interface of current net */
2170 char ifname[IFNAMSIZ];
eac584a3
TB
2171} subnet_enumerator_t;
2172
2173METHOD(enumerator_t, destroy_subnet_enumerator, void,
2174 subnet_enumerator_t *this)
2175{
2176 DESTROY_IF(this->net);
2177 free(this->msg);
2178 free(this);
2179}
2180
2181METHOD(enumerator_t, enumerate_subnets, bool,
95a63bf2 2182 subnet_enumerator_t *this, va_list args)
eac584a3 2183{
95a63bf2
TB
2184 host_t **net;
2185 uint8_t *mask;
2186 char **ifname;
2187
2188 VA_ARGS_VGET(args, net, mask, ifname);
2189
eac584a3
TB
2190 if (!this->current)
2191 {
2192 this->current = this->msg;
2193 }
2194 else
2195 {
2196 this->current = NLMSG_NEXT(this->current, this->len);
2197 DESTROY_IF(this->net);
2198 this->net = NULL;
2199 }
2200
2201 while (NLMSG_OK(this->current, this->len))
2202 {
2203 switch (this->current->nlmsg_type)
2204 {
2205 case NLMSG_DONE:
2206 break;
2207 case RTM_NEWROUTE:
2208 {
2209 struct rtmsg *msg;
2210 struct rtattr *rta;
2211 size_t rtasize;
2212 chunk_t dst = chunk_empty;
24064741 2213 uint32_t oif = 0;
eac584a3
TB
2214
2215 msg = NLMSG_DATA(this->current);
2216
2217 if (!route_usable(this->current))
2218 {
2219 break;
2220 }
2221 else if (msg->rtm_table && (
2222 msg->rtm_table == RT_TABLE_LOCAL ||
2223 msg->rtm_table == this->private->routing_table))
2224 { /* ignore our own and the local routing tables */
2225 break;
2226 }
2227
2228 rta = RTM_RTA(msg);
2229 rtasize = RTM_PAYLOAD(this->current);
2230 while (RTA_OK(rta, rtasize))
2231 {
24064741 2232 switch (rta->rta_type)
eac584a3 2233 {
24064741
TB
2234 case RTA_DST:
2235 dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
2236 break;
2237 case RTA_OIF:
2238 if (RTA_PAYLOAD(rta) == sizeof(oif))
2239 {
2240 oif = *(uint32_t*)RTA_DATA(rta);
2241 }
2242 break;
eac584a3
TB
2243 }
2244 rta = RTA_NEXT(rta, rtasize);
2245 }
2246
24064741 2247 if (dst.ptr && oif && if_indextoname(oif, this->ifname))
eac584a3
TB
2248 {
2249 this->net = host_create_from_chunk(msg->rtm_family, dst, 0);
2250 *net = this->net;
2251 *mask = msg->rtm_dst_len;
24064741 2252 *ifname = this->ifname;
eac584a3
TB
2253 return TRUE;
2254 }
2255 break;
2256 }
2257 default:
2258 break;
2259 }
2260 this->current = NLMSG_NEXT(this->current, this->len);
2261 }
2262 return FALSE;
2263}
2264
2265METHOD(kernel_net_t, create_local_subnet_enumerator, enumerator_t*,
2266 private_kernel_netlink_net_t *this)
2267{
2268 netlink_buf_t request;
2269 struct nlmsghdr *hdr, *out;
2270 struct rtmsg *msg;
2271 size_t len;
2272 subnet_enumerator_t *enumerator;
2273
2274 memset(&request, 0, sizeof(request));
2275
2276 hdr = &request.hdr;
2277 hdr->nlmsg_flags = NLM_F_REQUEST;
2278 hdr->nlmsg_type = RTM_GETROUTE;
2279 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2280 hdr->nlmsg_flags |= NLM_F_DUMP;
2281
2282 msg = NLMSG_DATA(hdr);
2283 msg->rtm_scope = RT_SCOPE_LINK;
2284
2285 if (this->socket->send(this->socket, hdr, &out, &len) != SUCCESS)
2286 {
2287 DBG2(DBG_KNL, "enumerating local subnets failed");
2288 return enumerator_create_empty();
2289 }
2290
2291 INIT(enumerator,
2292 .public = {
95a63bf2
TB
2293 .enumerate = enumerator_enumerate_default,
2294 .venumerate = _enumerate_subnets,
eac584a3
TB
2295 .destroy = _destroy_subnet_enumerator,
2296 },
2297 .private = this,
2298 .msg = out,
2299 .len = len,
2300 );
2301 return &enumerator->public;
2302}
2303
507f26f6
TB
2304/**
2305 * Manages the creation and deletion of ip addresses on an interface.
2306 * By setting the appropriate nlmsg_type, the ip will be set or unset.
2307 */
2308static status_t manage_ipaddr(private_kernel_netlink_net_t *this, int nlmsg_type,
50bd7558 2309 int flags, int if_index, host_t *ip, int prefix)
507f26f6 2310{
21bf86f7 2311 netlink_buf_t request;
507f26f6
TB
2312 struct nlmsghdr *hdr;
2313 struct ifaddrmsg *msg;
2314 chunk_t chunk;
7daf5226 2315
507f26f6 2316 memset(&request, 0, sizeof(request));
7daf5226 2317
507f26f6 2318 chunk = ip->get_address(ip);
7daf5226 2319
0404a29b 2320 hdr = &request.hdr;
507f26f6 2321 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
7daf5226 2322 hdr->nlmsg_type = nlmsg_type;
507f26f6 2323 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
7daf5226 2324
4c438cf0 2325 msg = NLMSG_DATA(hdr);
323f9f99
MW
2326 msg->ifa_family = ip->get_family(ip);
2327 msg->ifa_flags = 0;
50bd7558 2328 msg->ifa_prefixlen = prefix < 0 ? chunk.len * 8 : prefix;
323f9f99
MW
2329 msg->ifa_scope = RT_SCOPE_UNIVERSE;
2330 msg->ifa_index = if_index;
7daf5226 2331
507f26f6
TB
2332 netlink_add_attribute(hdr, IFA_LOCAL, chunk, sizeof(request));
2333
b062d3cc
TB
2334 if (ip->get_family(ip) == AF_INET6)
2335 {
2336 msg->ifa_flags |= IFA_F_NODAD;
2337 if (this->rta_prefsrc_for_ipv6)
2338 {
2339 /* if source routes are possible we let the virtual IP get
2340 * deprecated immediately (but mark it as valid forever) so it gets
2341 * only used if forced by our route, and not by the default IPv6
2342 * address selection */
2343 struct ifa_cacheinfo cache = {
2344 .ifa_valid = 0xFFFFFFFF,
2345 .ifa_prefered = 0,
2346 };
2347 netlink_add_attribute(hdr, IFA_CACHEINFO, chunk_from_thing(cache),
2348 sizeof(request));
2349 }
90854d28 2350 }
507f26f6
TB
2351 return this->socket->send_ack(this->socket, hdr);
2352}
2353
887abfb1 2354METHOD(kernel_net_t, add_ip, status_t,
50bd7558 2355 private_kernel_netlink_net_t *this, host_t *virtual_ip, int prefix,
b185cdd1 2356 char *iface_name)
507f26f6 2357{
c6b40158
TB
2358 addr_map_entry_t *entry, lookup = {
2359 .ip = virtual_ip,
2360 };
e8e9048f 2361 iface_entry_t *iface = NULL;
7daf5226 2362
9474a0d9
MW
2363 if (!this->install_virtual_ip)
2364 { /* disabled by config */
2365 return SUCCESS;
2366 }
7daf5226 2367
a25d536e 2368 this->lock->write_lock(this->lock);
c6b40158
TB
2369 /* the virtual IP might actually be installed as regular IP, in which case
2370 * we don't track it as virtual IP */
2371 entry = this->addrs->get_match(this->addrs, &lookup,
2372 (void*)addr_map_entry_match);
2373 if (!entry)
2374 { /* otherwise it might already be installed as virtual IP */
2375 entry = this->vips->get_match(this->vips, &lookup,
2376 (void*)addr_map_entry_match);
2377 if (entry)
2378 { /* the vip we found can be in one of three states: 1) installed and
2379 * ready, 2) just added by another thread, but not yet confirmed to
2380 * be installed by the kernel, 3) just deleted, but not yet gone.
2381 * Then while we wait below, several things could happen (as we
a25d536e 2382 * release the lock). For instance, the interface could disappear,
e8e9048f 2383 * or the IP is finally deleted, and it reappears on a different
c6b40158
TB
2384 * interface. All these cases are handled by the call below. */
2385 while (!is_vip_installed_or_gone(this, virtual_ip, &entry))
507f26f6 2386 {
a25d536e 2387 this->condvar->wait(this->condvar, this->lock);
507f26f6 2388 }
c6b40158 2389 if (entry)
507f26f6 2390 {
c6b40158 2391 entry->addr->refcount++;
507f26f6
TB
2392 }
2393 }
c6b40158
TB
2394 }
2395 if (entry)
2396 {
2397 DBG2(DBG_KNL, "virtual IP %H is already installed on %s", virtual_ip,
2398 entry->iface->ifname);
a25d536e 2399 this->lock->unlock(this->lock);
c6b40158
TB
2400 return SUCCESS;
2401 }
e8e9048f
TB
2402 /* try to find the target interface, either by config or via src ip */
2403 if (!this->install_virtual_ip_on ||
2e4d110d
TB
2404 !this->ifaces->find_first(this->ifaces, iface_entry_by_name,
2405 (void**)&iface, this->install_virtual_ip_on))
e8e9048f 2406 {
2e4d110d
TB
2407 if (!this->ifaces->find_first(this->ifaces, iface_entry_by_name,
2408 (void**)&iface, iface_name))
e8e9048f
TB
2409 { /* if we don't find the requested interface we just use the first */
2410 this->ifaces->get_first(this->ifaces, (void**)&iface);
2411 }
c6b40158 2412 }
c6b40158
TB
2413 if (iface)
2414 {
2415 addr_entry_t *addr;
9b43dddf
MW
2416 char *ifname;
2417 int ifi;
7daf5226 2418
c6b40158
TB
2419 INIT(addr,
2420 .ip = virtual_ip->clone(virtual_ip),
2421 .refcount = 1,
2422 .scope = RT_SCOPE_UNIVERSE,
2423 );
2424 iface->addrs->insert_last(iface->addrs, addr);
2425 addr_map_entry_add(this->vips, addr, iface);
9b43dddf
MW
2426 ifi = iface->ifindex;
2427 this->lock->unlock(this->lock);
c6b40158 2428 if (manage_ipaddr(this, RTM_NEWADDR, NLM_F_CREATE | NLM_F_EXCL,
9b43dddf 2429 ifi, virtual_ip, prefix) == SUCCESS)
507f26f6 2430 {
9b43dddf 2431 this->lock->write_lock(this->lock);
c6b40158
TB
2432 while (!is_vip_installed_or_gone(this, virtual_ip, &entry))
2433 { /* wait until address appears */
a25d536e 2434 this->condvar->wait(this->condvar, this->lock);
c6b40158
TB
2435 }
2436 if (entry)
2437 { /* we fail if the interface got deleted in the meantime */
9b43dddf 2438 ifname = strdup(entry->iface->ifname);
a25d536e 2439 this->lock->unlock(this->lock);
9b43dddf
MW
2440 DBG2(DBG_KNL, "virtual IP %H installed on %s",
2441 virtual_ip, ifname);
3dc9d427
MW
2442 /* during IKEv1 reauthentication, children get moved from
2443 * old the new SA before the virtual IP is available. This
2444 * kills the route for our virtual IP, reinstall. */
9b43dddf 2445 queue_route_reinstall(this, ifname);
507f26f6
TB
2446 return SUCCESS;
2447 }
9b43dddf 2448 this->lock->unlock(this->lock);
507f26f6 2449 }
c6b40158
TB
2450 DBG1(DBG_KNL, "adding virtual IP %H failed", virtual_ip);
2451 return FAILED;
507f26f6 2452 }
a25d536e 2453 this->lock->unlock(this->lock);
c6b40158
TB
2454 DBG1(DBG_KNL, "no interface available, unable to install virtual IP %H",
2455 virtual_ip);
507f26f6
TB
2456 return FAILED;
2457}
2458
887abfb1 2459METHOD(kernel_net_t, del_ip, status_t,
d88597f0
MW
2460 private_kernel_netlink_net_t *this, host_t *virtual_ip, int prefix,
2461 bool wait)
507f26f6 2462{
c6b40158
TB
2463 addr_map_entry_t *entry, lookup = {
2464 .ip = virtual_ip,
2465 };
7daf5226 2466
9474a0d9
MW
2467 if (!this->install_virtual_ip)
2468 { /* disabled by config */
2469 return SUCCESS;
2470 }
7daf5226 2471
507f26f6 2472 DBG2(DBG_KNL, "deleting virtual IP %H", virtual_ip);
7daf5226 2473
a25d536e 2474 this->lock->write_lock(this->lock);
c6b40158
TB
2475 entry = this->vips->get_match(this->vips, &lookup,
2476 (void*)addr_map_entry_match);
2477 if (!entry)
2478 { /* we didn't install this IP as virtual IP */
2479 entry = this->addrs->get_match(this->addrs, &lookup,
2480 (void*)addr_map_entry_match);
2481 if (entry)
507f26f6 2482 {
c6b40158
TB
2483 DBG2(DBG_KNL, "not deleting existing IP %H on %s", virtual_ip,
2484 entry->iface->ifname);
a25d536e 2485 this->lock->unlock(this->lock);
c6b40158
TB
2486 return SUCCESS;
2487 }
2488 DBG2(DBG_KNL, "virtual IP %H not cached, unable to delete", virtual_ip);
a25d536e 2489 this->lock->unlock(this->lock);
c6b40158
TB
2490 return FAILED;
2491 }
2492 if (entry->addr->refcount == 1)
2493 {
2494 status_t status;
9b43dddf 2495 int ifi;
c6b40158
TB
2496
2497 /* we set this flag so that threads calling add_ip will block and wait
2498 * until the entry is gone, also so we can wait below */
2499 entry->addr->installed = FALSE;
9b43dddf
MW
2500 ifi = entry->iface->ifindex;
2501 this->lock->unlock(this->lock);
2502 status = manage_ipaddr(this, RTM_DELADDR, 0, ifi, virtual_ip, prefix);
d88597f0 2503 if (status == SUCCESS && wait)
c6b40158 2504 { /* wait until the address is really gone */
9b43dddf 2505 this->lock->write_lock(this->lock);
c6b40158 2506 while (is_known_vip(this, virtual_ip))
507f26f6 2507 {
a25d536e 2508 this->condvar->wait(this->condvar, this->lock);
507f26f6 2509 }
9b43dddf 2510 this->lock->unlock(this->lock);
507f26f6 2511 }
c6b40158 2512 return status;
507f26f6 2513 }
c6b40158
TB
2514 else
2515 {
2516 entry->addr->refcount--;
2517 }
2518 DBG2(DBG_KNL, "virtual IP %H used by other SAs, not deleting",
2519 virtual_ip);
a25d536e 2520 this->lock->unlock(this->lock);
c6b40158 2521 return SUCCESS;
507f26f6
TB
2522}
2523
2524/**
2525 * Manages source routes in the routing table.
2526 * By setting the appropriate nlmsg_type, the route gets added or removed.
2527 */
74ba22c9
TB
2528static status_t manage_srcroute(private_kernel_netlink_net_t *this,
2529 int nlmsg_type, int flags, chunk_t dst_net,
b12c53ce 2530 uint8_t prefixlen, host_t *gateway,
74ba22c9 2531 host_t *src_ip, char *if_name)
507f26f6 2532{
21bf86f7 2533 netlink_buf_t request;
507f26f6
TB
2534 struct nlmsghdr *hdr;
2535 struct rtmsg *msg;
c1adf7e0 2536 struct rtattr *rta;
507f26f6
TB
2537 int ifindex;
2538 chunk_t chunk;
2539
2540 /* if route is 0.0.0.0/0, we can't install it, as it would
2541 * overwrite the default route. Instead, we add two routes:
2542 * 0.0.0.0/1 and 128.0.0.0/1 */
2543 if (this->routing_table == 0 && prefixlen == 0)
2544 {
2545 chunk_t half_net;
b12c53ce 2546 uint8_t half_prefixlen;
507f26f6 2547 status_t status;
7daf5226 2548
507f26f6
TB
2549 half_net = chunk_alloca(dst_net.len);
2550 memset(half_net.ptr, 0, half_net.len);
2551 half_prefixlen = 1;
7daf5226 2552
507f26f6
TB
2553 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
2554 gateway, src_ip, if_name);
2555 half_net.ptr[0] |= 0x80;
2556 status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
2557 gateway, src_ip, if_name);
2558 return status;
2559 }
7daf5226 2560
507f26f6
TB
2561 memset(&request, 0, sizeof(request));
2562
0404a29b 2563 hdr = &request.hdr;
507f26f6
TB
2564 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
2565 hdr->nlmsg_type = nlmsg_type;
2566 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2567
4c438cf0 2568 msg = NLMSG_DATA(hdr);
507f26f6
TB
2569 msg->rtm_family = src_ip->get_family(src_ip);
2570 msg->rtm_dst_len = prefixlen;
2571 msg->rtm_table = this->routing_table;
2572 msg->rtm_protocol = RTPROT_STATIC;
2573 msg->rtm_type = RTN_UNICAST;
2574 msg->rtm_scope = RT_SCOPE_UNIVERSE;
7daf5226 2575
507f26f6
TB
2576 netlink_add_attribute(hdr, RTA_DST, dst_net, sizeof(request));
2577 chunk = src_ip->get_address(src_ip);
2578 netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
5be75c2c
MW
2579 if (gateway && gateway->get_family(gateway) == src_ip->get_family(src_ip))
2580 {
2581 chunk = gateway->get_address(gateway);
2582 netlink_add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
2583 }
507f26f6
TB
2584 ifindex = get_interface_index(this, if_name);
2585 chunk.ptr = (char*)&ifindex;
2586 chunk.len = sizeof(ifindex);
2587 netlink_add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
2588
47a0e289 2589 if (this->mtu || this->mss)
c1adf7e0 2590 {
47a0e289 2591 chunk = chunk_alloca(RTA_LENGTH((sizeof(struct rtattr) +
b12c53ce 2592 sizeof(uint32_t)) * 2));
47a0e289 2593 chunk.len = 0;
c1adf7e0 2594 rta = (struct rtattr*)chunk.ptr;
47a0e289
TB
2595 if (this->mtu)
2596 {
2597 rta->rta_type = RTAX_MTU;
b12c53ce
AS
2598 rta->rta_len = RTA_LENGTH(sizeof(uint32_t));
2599 memcpy(RTA_DATA(rta), &this->mtu, sizeof(uint32_t));
47a0e289
TB
2600 chunk.len = rta->rta_len;
2601 }
2602 if (this->mss)
2603 {
2604 rta = (struct rtattr*)(chunk.ptr + RTA_ALIGN(chunk.len));
2605 rta->rta_type = RTAX_ADVMSS;
b12c53ce
AS
2606 rta->rta_len = RTA_LENGTH(sizeof(uint32_t));
2607 memcpy(RTA_DATA(rta), &this->mss, sizeof(uint32_t));
47a0e289
TB
2608 chunk.len = RTA_ALIGN(chunk.len) + rta->rta_len;
2609 }
c1adf7e0
TB
2610 netlink_add_attribute(hdr, RTA_METRICS, chunk, sizeof(request));
2611 }
2612
507f26f6
TB
2613 return this->socket->send_ack(this->socket, hdr);
2614}
2615
887abfb1 2616METHOD(kernel_net_t, add_route, status_t,
b12c53ce 2617 private_kernel_netlink_net_t *this, chunk_t dst_net, uint8_t prefixlen,
887abfb1 2618 host_t *gateway, host_t *src_ip, char *if_name)
507f26f6 2619{
74ba22c9
TB
2620 status_t status;
2621 route_entry_t *found, route = {
2622 .dst_net = dst_net,
2623 .prefixlen = prefixlen,
2624 .gateway = gateway,
2625 .src_ip = src_ip,
2626 .if_name = if_name,
2627 };
2628
16d62305 2629 this->routes_lock->lock(this->routes_lock);
74ba22c9
TB
2630 found = this->routes->get(this->routes, &route);
2631 if (found)
2632 {
16d62305 2633 this->routes_lock->unlock(this->routes_lock);
74ba22c9
TB
2634 return ALREADY_DONE;
2635 }
74ba22c9
TB
2636 status = manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
2637 dst_net, prefixlen, gateway, src_ip, if_name);
f0f78b74
TB
2638 if (status == SUCCESS)
2639 {
2640 found = route_entry_clone(&route);
2641 this->routes->put(this->routes, found, found);
2642 }
16d62305 2643 this->routes_lock->unlock(this->routes_lock);
74ba22c9 2644 return status;
507f26f6 2645}
7daf5226 2646
887abfb1 2647METHOD(kernel_net_t, del_route, status_t,
b12c53ce 2648 private_kernel_netlink_net_t *this, chunk_t dst_net, uint8_t prefixlen,
887abfb1 2649 host_t *gateway, host_t *src_ip, char *if_name)
507f26f6 2650{
74ba22c9
TB
2651 status_t status;
2652 route_entry_t *found, route = {
2653 .dst_net = dst_net,
2654 .prefixlen = prefixlen,
2655 .gateway = gateway,
2656 .src_ip = src_ip,
2657 .if_name = if_name,
2658 };
2659
16d62305 2660 this->routes_lock->lock(this->routes_lock);
74ba22c9
TB
2661 found = this->routes->get(this->routes, &route);
2662 if (!found)
2663 {
16d62305 2664 this->routes_lock->unlock(this->routes_lock);
74ba22c9
TB
2665 return NOT_FOUND;
2666 }
2667 this->routes->remove(this->routes, found);
2668 route_entry_destroy(found);
2669 status = manage_srcroute(this, RTM_DELROUTE, 0, dst_net, prefixlen,
2670 gateway, src_ip, if_name);
16d62305 2671 this->routes_lock->unlock(this->routes_lock);
74ba22c9 2672 return status;
507f26f6
TB
2673}
2674
2675/**
2676 * Initialize a list of local addresses.
2677 */
2678static status_t init_address_list(private_kernel_netlink_net_t *this)
2679{
21bf86f7 2680 netlink_buf_t request;
507f26f6
TB
2681 struct nlmsghdr *out, *current, *in;
2682 struct rtgenmsg *msg;
2683 size_t len;
e13389a7 2684 enumerator_t *ifaces, *addrs;
507f26f6
TB
2685 iface_entry_t *iface;
2686 addr_entry_t *addr;
7daf5226 2687
31a0e24b 2688 DBG2(DBG_KNL, "known interfaces and IP addresses:");
7daf5226 2689
507f26f6
TB
2690 memset(&request, 0, sizeof(request));
2691
0404a29b 2692 in = &request.hdr;
507f26f6
TB
2693 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
2694 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_MATCH | NLM_F_ROOT;
4c438cf0 2695 msg = NLMSG_DATA(in);
507f26f6 2696 msg->rtgen_family = AF_UNSPEC;
7daf5226 2697
507f26f6
TB
2698 /* get all links */
2699 in->nlmsg_type = RTM_GETLINK;
2700 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
2701 {
2702 return FAILED;
2703 }
2704 current = out;
2705 while (NLMSG_OK(current, len))
2706 {
2707 switch (current->nlmsg_type)
2708 {
2709 case NLMSG_DONE:
2710 break;
2711 case RTM_NEWLINK:
2712 process_link(this, current, FALSE);
2713 /* fall through */
2714 default:
2715 current = NLMSG_NEXT(current, len);
2716 continue;
2717 }
2718 break;
2719 }
2720 free(out);
7daf5226 2721
507f26f6
TB
2722 /* get all interface addresses */
2723 in->nlmsg_type = RTM_GETADDR;
2724 if (this->socket->send(this->socket, in, &out, &len) != SUCCESS)
2725 {
2726 return FAILED;
2727 }
2728 current = out;
2729 while (NLMSG_OK(current, len))
2730 {
2731 switch (current->nlmsg_type)
2732 {
2733 case NLMSG_DONE:
2734 break;
2735 case RTM_NEWADDR:
2736 process_addr(this, current, FALSE);
2737 /* fall through */
2738 default:
2739 current = NLMSG_NEXT(current, len);
2740 continue;
2741 }
2742 break;
2743 }
2744 free(out);
7daf5226 2745
a25d536e 2746 this->lock->read_lock(this->lock);
e13389a7
MW
2747 ifaces = this->ifaces->create_enumerator(this->ifaces);
2748 while (ifaces->enumerate(ifaces, &iface))
507f26f6 2749 {
940e1b0f 2750 if (iface_entry_up_and_usable(iface))
507f26f6 2751 {
31a0e24b 2752 DBG2(DBG_KNL, " %s", iface->ifname);
e13389a7
MW
2753 addrs = iface->addrs->create_enumerator(iface->addrs);
2754 while (addrs->enumerate(addrs, (void**)&addr))
507f26f6 2755 {
31a0e24b 2756 DBG2(DBG_KNL, " %H", addr->ip);
507f26f6
TB
2757 }
2758 addrs->destroy(addrs);
2759 }
2760 }
2761 ifaces->destroy(ifaces);
a25d536e 2762 this->lock->unlock(this->lock);
507f26f6
TB
2763 return SUCCESS;
2764}
2765
2766/**
2767 * create or delete a rule to use our routing table
2768 */
2769static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
b12c53ce 2770 int family, uint32_t table, uint32_t prio)
507f26f6 2771{
21bf86f7 2772 netlink_buf_t request;
507f26f6
TB
2773 struct nlmsghdr *hdr;
2774 struct rtmsg *msg;
2775 chunk_t chunk;
51fefe46 2776 char *fwmark;
507f26f6 2777
7daf5226 2778 memset(&request, 0, sizeof(request));
0404a29b 2779 hdr = &request.hdr;
507f26f6 2780 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
7daf5226 2781 hdr->nlmsg_type = nlmsg_type;
507f26f6
TB
2782 if (nlmsg_type == RTM_NEWRULE)
2783 {
2784 hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
2785 }
2786 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2787
4c438cf0 2788 msg = NLMSG_DATA(hdr);
507f26f6 2789 msg->rtm_table = table;
5be75c2c 2790 msg->rtm_family = family;
507f26f6
TB
2791 msg->rtm_protocol = RTPROT_BOOT;
2792 msg->rtm_scope = RT_SCOPE_UNIVERSE;
2793 msg->rtm_type = RTN_UNICAST;
2794
2795 chunk = chunk_from_thing(prio);
2796 netlink_add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
2797
51fefe46 2798 fwmark = lib->settings->get_str(lib->settings,
d347a130 2799 "%s.plugins.kernel-netlink.fwmark", NULL, lib->ns);
51fefe46
TB
2800 if (fwmark)
2801 {
8e8e97d1
TB
2802#ifdef HAVE_LINUX_FIB_RULES_H
2803 mark_t mark;
2804
51fefe46
TB
2805 if (fwmark[0] == '!')
2806 {
2807 msg->rtm_flags |= FIB_RULE_INVERT;
2808 fwmark++;
2809 }
2810 if (mark_from_string(fwmark, &mark))
2811 {
2812 chunk = chunk_from_thing(mark.value);
2813 netlink_add_attribute(hdr, FRA_FWMARK, chunk, sizeof(request));
2814 chunk = chunk_from_thing(mark.mask);
2815 netlink_add_attribute(hdr, FRA_FWMASK, chunk, sizeof(request));
6bd1216e
TB
2816 if (msg->rtm_flags & FIB_RULE_INVERT)
2817 {
2818 this->routing_mark = mark;
2819 }
51fefe46 2820 }
8e8e97d1
TB
2821#else
2822 DBG1(DBG_KNL, "setting firewall mark on routing rule is not supported");
2823#endif
51fefe46 2824 }
507f26f6
TB
2825 return this->socket->send_ack(this->socket, hdr);
2826}
2827
7beb31aa
TB
2828/**
2829 * check for kernel features (currently only via version number)
2830 */
2831static void check_kernel_features(private_kernel_netlink_net_t *this)
2832{
2833 struct utsname utsname;
2834 int a, b, c;
2835
2836 if (uname(&utsname) == 0)
2837 {
2838 switch(sscanf(utsname.release, "%d.%d.%d", &a, &b, &c))
2839 {
2840 case 3:
2841 if (a == 2)
2842 {
6bd1216e
TB
2843 if (b == 6 && c >= 36)
2844 {
2845 this->rta_mark = TRUE;
2846 }
7beb31aa
TB
2847 DBG2(DBG_KNL, "detected Linux %d.%d.%d, no support for "
2848 "RTA_PREFSRC for IPv6 routes", a, b, c);
2849 break;
2850 }
2851 /* fall-through */
2852 case 2:
2853 /* only 3.x+ uses two part version numbers */
2854 this->rta_prefsrc_for_ipv6 = TRUE;
6bd1216e 2855 this->rta_mark = TRUE;
7beb31aa
TB
2856 break;
2857 default:
2858 break;
2859 }
2860 }
2861}
2862
c6b40158
TB
2863/**
2864 * Destroy an address to iface map
2865 */
2866static void addr_map_destroy(hashtable_t *map)
2867{
2868 enumerator_t *enumerator;
2869 addr_map_entry_t *addr;
2870
2871 enumerator = map->create_enumerator(map);
2872 while (enumerator->enumerate(enumerator, NULL, (void**)&addr))
2873 {
2874 free(addr);
2875 }
2876 enumerator->destroy(enumerator);
2877 map->destroy(map);
2878}
2879
887abfb1
MW
2880METHOD(kernel_net_t, destroy, void,
2881 private_kernel_netlink_net_t *this)
507f26f6 2882{
74ba22c9
TB
2883 enumerator_t *enumerator;
2884 route_entry_t *route;
2885
507f26f6
TB
2886 if (this->routing_table)
2887 {
5be75c2c
MW
2888 manage_rule(this, RTM_DELRULE, AF_INET, this->routing_table,
2889 this->routing_table_prio);
2890 manage_rule(this, RTM_DELRULE, AF_INET6, this->routing_table,
507f26f6
TB
2891 this->routing_table_prio);
2892 }
d6a27ec6
MW
2893 if (this->socket_events > 0)
2894 {
f4f77d74 2895 lib->watcher->remove(lib->watcher, this->socket_events);
d6a27ec6
MW
2896 close(this->socket_events);
2897 }
74ba22c9
TB
2898 enumerator = this->routes->create_enumerator(this->routes);
2899 while (enumerator->enumerate(enumerator, NULL, (void**)&route))
2900 {
2901 manage_srcroute(this, RTM_DELROUTE, 0, route->dst_net, route->prefixlen,
2902 route->gateway, route->src_ip, route->if_name);
2903 route_entry_destroy(route);
2904 }
2905 enumerator->destroy(enumerator);
2906 this->routes->destroy(this->routes);
16d62305 2907 this->routes_lock->destroy(this->routes_lock);
9e19cb91 2908 DESTROY_IF(this->socket);
74ba22c9 2909
f834249c
TB
2910 net_changes_clear(this);
2911 this->net_changes->destroy(this->net_changes);
2912 this->net_changes_lock->destroy(this->net_changes_lock);
2913
c6b40158
TB
2914 addr_map_destroy(this->addrs);
2915 addr_map_destroy(this->vips);
1f97e1aa 2916
507f26f6 2917 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
d266e895 2918 this->rt_exclude->destroy(this->rt_exclude);
4134108c 2919 this->roam_lock->destroy(this->roam_lock);
3ac5a0db 2920 this->condvar->destroy(this->condvar);
a25d536e 2921 this->lock->destroy(this->lock);
507f26f6
TB
2922 free(this);
2923}
2924
2925/*
2926 * Described in header.
2927 */
2928kernel_netlink_net_t *kernel_netlink_net_create()
2929{
887abfb1 2930 private_kernel_netlink_net_t *this;
d266e895 2931 enumerator_t *enumerator;
05ca5655 2932 bool register_for_events = TRUE;
d266e895 2933 char *exclude;
7daf5226 2934
887abfb1
MW
2935 INIT(this,
2936 .public = {
2937 .interface = {
2938 .get_interface = _get_interface_name,
2939 .create_address_enumerator = _create_address_enumerator,
eac584a3 2940 .create_local_subnet_enumerator = _create_local_subnet_enumerator,
887abfb1
MW
2941 .get_source_addr = _get_source_addr,
2942 .get_nexthop = _get_nexthop,
2943 .add_ip = _add_ip,
2944 .del_ip = _del_ip,
2945 .add_route = _add_route,
2946 .del_route = _del_route,
2947 .destroy = _destroy,
2948 },
2949 },
6c58fabe
MW
2950 .socket = netlink_socket_create(NETLINK_ROUTE, rt_msg_names,
2951 lib->settings->get_bool(lib->settings,
2952 "%s.plugins.kernel-netlink.parallel_route", FALSE, lib->ns)),
887abfb1 2953 .rt_exclude = linked_list_create(),
74ba22c9
TB
2954 .routes = hashtable_create((hashtable_hash_t)route_entry_hash,
2955 (hashtable_equals_t)route_entry_equals, 16),
f834249c
TB
2956 .net_changes = hashtable_create(
2957 (hashtable_hash_t)net_change_hash,
2958 (hashtable_equals_t)net_change_equals, 16),
1f97e1aa
TB
2959 .addrs = hashtable_create(
2960 (hashtable_hash_t)addr_map_entry_hash,
2961 (hashtable_equals_t)addr_map_entry_equals, 16),
c6b40158
TB
2962 .vips = hashtable_create((hashtable_hash_t)addr_map_entry_hash,
2963 (hashtable_equals_t)addr_map_entry_equals, 16),
16d62305 2964 .routes_lock = mutex_create(MUTEX_TYPE_DEFAULT),
f834249c 2965 .net_changes_lock = mutex_create(MUTEX_TYPE_DEFAULT),
887abfb1 2966 .ifaces = linked_list_create(),
a25d536e
TB
2967 .lock = rwlock_create(RWLOCK_TYPE_DEFAULT),
2968 .condvar = rwlock_condvar_create(),
4134108c 2969 .roam_lock = spinlock_create(),
887abfb1 2970 .routing_table = lib->settings->get_int(lib->settings,
d347a130 2971 "%s.routing_table", ROUTING_TABLE, lib->ns),
887abfb1 2972 .routing_table_prio = lib->settings->get_int(lib->settings,
d347a130 2973 "%s.routing_table_prio", ROUTING_TABLE_PRIO, lib->ns),
887abfb1 2974 .process_route = lib->settings->get_bool(lib->settings,
d347a130 2975 "%s.process_route", TRUE, lib->ns),
558691b3
MW
2976 .install_routes = lib->settings->get_bool(lib->settings,
2977 "%s.install_routes", TRUE, lib->ns),
887abfb1 2978 .install_virtual_ip = lib->settings->get_bool(lib->settings,
d347a130 2979 "%s.install_virtual_ip", TRUE, lib->ns),
e8e9048f 2980 .install_virtual_ip_on = lib->settings->get_str(lib->settings,
d347a130 2981 "%s.install_virtual_ip_on", NULL, lib->ns),
3bf98189
TB
2982 .prefer_temporary_addrs = lib->settings->get_bool(lib->settings,
2983 "%s.prefer_temporary_addrs", FALSE, lib->ns),
37873f99 2984 .roam_events = lib->settings->get_bool(lib->settings,
d347a130 2985 "%s.plugins.kernel-netlink.roam_events", TRUE, lib->ns),
c1adf7e0
TB
2986 .mtu = lib->settings->get_int(lib->settings,
2987 "%s.plugins.kernel-netlink.mtu", 0, lib->ns),
47a0e289
TB
2988 .mss = lib->settings->get_int(lib->settings,
2989 "%s.plugins.kernel-netlink.mss", 0, lib->ns),
887abfb1 2990 );
f834249c 2991 timerclear(&this->last_route_reinstall);
4134108c 2992 timerclear(&this->next_roam);
887abfb1 2993
7beb31aa
TB
2994 check_kernel_features(this);
2995
d347a130 2996 if (streq(lib->ns, "starter"))
05ca5655
TB
2997 { /* starter has no threads, so we do not register for kernel events */
2998 register_for_events = FALSE;
2999 }
3000
d266e895 3001 exclude = lib->settings->get_str(lib->settings,
d347a130 3002 "%s.ignore_routing_tables", NULL, lib->ns);
d266e895
TE
3003 if (exclude)
3004 {
3005 char *token;
3006 uintptr_t table;
3007
3008 enumerator = enumerator_create_token(exclude, " ", " ");
3009 while (enumerator->enumerate(enumerator, &token))
3010 {
3011 errno = 0;
3012 table = strtoul(token, NULL, 10);
3013
3014 if (errno == 0)
3015 {
3016 this->rt_exclude->insert_last(this->rt_exclude, (void*)table);
3017 }
3018 }
3019 enumerator->destroy(enumerator);
3020 }
3021
05ca5655 3022 if (register_for_events)
507f26f6 3023 {
05ca5655
TB
3024 struct sockaddr_nl addr;
3025
3026 memset(&addr, 0, sizeof(addr));
3027 addr.nl_family = AF_NETLINK;
7daf5226 3028
05ca5655
TB
3029 /* create and bind RT socket for events (address/interface/route changes) */
3030 this->socket_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
3031 if (this->socket_events < 0)
3032 {
a0178fe2
TB
3033 DBG1(DBG_KNL, "unable to create RT event socket: %s (%d)",
3034 strerror(errno), errno);
05ca5655
TB
3035 destroy(this);
3036 return NULL;
3037 }
3038 addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
3039 RTMGRP_IPV4_ROUTE | RTMGRP_IPV6_ROUTE | RTMGRP_LINK;
3040 if (bind(this->socket_events, (struct sockaddr*)&addr, sizeof(addr)))
3041 {
a0178fe2
TB
3042 DBG1(DBG_KNL, "unable to bind RT event socket: %s (%d)",
3043 strerror(errno), errno);
05ca5655
TB
3044 destroy(this);
3045 return NULL;
3046 }
3047
f4f77d74
MW
3048 lib->watcher->add(lib->watcher, this->socket_events, WATCHER_READ,
3049 (watcher_cb_t)receive_events, this);
05ca5655 3050 }
7daf5226 3051
507f26f6
TB
3052 if (init_address_list(this) != SUCCESS)
3053 {
d6a27ec6
MW
3054 DBG1(DBG_KNL, "unable to get interface list");
3055 destroy(this);
3056 return NULL;
507f26f6 3057 }
7daf5226 3058
507f26f6
TB
3059 if (this->routing_table)
3060 {
5be75c2c
MW
3061 if (manage_rule(this, RTM_NEWRULE, AF_INET, this->routing_table,
3062 this->routing_table_prio) != SUCCESS)
3063 {
3064 DBG1(DBG_KNL, "unable to create IPv4 routing table rule");
3065 }
3066 if (manage_rule(this, RTM_NEWRULE, AF_INET6, this->routing_table,
507f26f6
TB
3067 this->routing_table_prio) != SUCCESS)
3068 {
5be75c2c 3069 DBG1(DBG_KNL, "unable to create IPv6 routing table rule");
507f26f6
TB
3070 }
3071 }
7daf5226 3072
507f26f6
TB
3073 return &this->public;
3074}