2 * Copyright (C) 2008-2019 Tobias Brunner
3 * Copyright (C) 2005-2008 Martin Willi
5 * Copyright (C) secunet Security Networks AG
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2 of the License, or (at your
10 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 * Copyright (C) 2010 Thomas Egerer
21 * Permission is hereby granted, free of charge, to any person obtaining a copy
22 * of this software and associated documentation files (the "Software"), to deal
23 * in the Software without restriction, including without limitation the rights
24 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25 * copies of the Software, and to permit persons to whom the Software is
26 * furnished to do so, subject to the following conditions:
28 * The above copyright notice and this permission notice shall be included in
29 * all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
40 #include <sys/socket.h>
41 #include <sys/utsname.h>
42 #include <linux/netlink.h>
43 #include <linux/rtnetlink.h>
44 #include <linux/if_addrlabel.h>
48 #ifdef HAVE_LINUX_FIB_RULES_H
49 #include <linux/fib_rules.h>
52 #include "kernel_netlink_net.h"
53 #include "kernel_netlink_shared.h"
56 #include <utils/debug.h>
57 #include <threading/mutex.h>
58 #include <threading/rwlock.h>
59 #include <threading/rwlock_condvar.h>
60 #include <threading/spinlock.h>
61 #include <collections/hashtable.h>
62 #include <collections/linked_list.h>
63 #include <processing/jobs/callback_job.h>
65 /** delay before firing roam events (ms) */
66 #define ROAM_DELAY 100
68 /** delay before reinstalling routes (ms) */
69 #define ROUTE_DELAY 100
71 /** maximum recursion when searching for addresses in get_route() */
72 #define MAX_ROUTE_RECURSION 2
75 #define ROUTING_TABLE 0
78 #ifndef ROUTING_TABLE_PRIO
79 #define ROUTING_TABLE_PRIO 0
82 /** multicast groups (for groups > 31 setsockopt has to be used) */
83 #define nl_group(group) (1 << (group - 1))
85 ENUM(rt_msg_names
, RTM_NEWLINK
, RTM_GETRULE
,
107 typedef struct addr_entry_t addr_entry_t
;
110 * IP address in an iface_entry_t
112 struct addr_entry_t
{
114 /** the ip address */
120 /** scope of the address */
123 /** number of times this IP is used, if virtual (i.e. managed by us) */
126 /** TRUE once it is installed, if virtual */
131 * destroy a addr_entry_t object
133 static void addr_entry_destroy(addr_entry_t
*this)
135 this->ip
->destroy(this->ip
);
139 typedef struct iface_entry_t iface_entry_t
;
142 * A network interface on this system, containing addr_entry_t's
144 struct iface_entry_t
{
146 /** interface index */
149 /** name of the interface */
150 char ifname
[IFNAMSIZ
];
152 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
155 /** list of addresses as host_t */
156 linked_list_t
*addrs
;
158 /** TRUE if usable by config */
163 * destroy an interface entry
165 static void iface_entry_destroy(iface_entry_t
*this)
167 this->addrs
->destroy_function(this->addrs
, (void*)addr_entry_destroy
);
171 CALLBACK(iface_entry_by_index
, bool,
172 iface_entry_t
*this, va_list args
)
176 VA_ARGS_VGET(args
, ifindex
);
177 return this->ifindex
== ifindex
;
180 CALLBACK(iface_entry_by_name
, bool,
181 iface_entry_t
*this, va_list args
)
185 VA_ARGS_VGET(args
, ifname
);
186 return streq(this->ifname
, ifname
);
190 * check if an interface is up
192 static inline bool iface_entry_up(iface_entry_t
*iface
)
194 return (iface
->flags
& IFF_UP
) == IFF_UP
;
198 * check if an interface is up and usable
200 static inline bool iface_entry_up_and_usable(iface_entry_t
*iface
)
202 return iface
->usable
&& iface_entry_up(iface
);
205 typedef struct addr_map_entry_t addr_map_entry_t
;
208 * Entry that maps an IP address to an interface entry
210 struct addr_map_entry_t
{
211 /** The IP address */
214 /** The address entry for this IP address */
217 /** The interface this address is installed on */
218 iface_entry_t
*iface
;
222 * Hash a addr_map_entry_t object, all entries with the same IP address
223 * are stored in the same bucket
225 static u_int
addr_map_entry_hash(addr_map_entry_t
*this)
227 return chunk_hash(this->ip
->get_address(this->ip
));
231 * Compare two addr_map_entry_t objects, two entries are equal if they are
232 * installed on the same interface
234 static bool addr_map_entry_equals(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
236 return a
->iface
->ifindex
== b
->iface
->ifindex
&&
237 a
->ip
->ip_equals(a
->ip
, b
->ip
);
241 * Used with get_match this finds an address entry if it is installed on
242 * an up and usable interface
244 static bool addr_map_entry_match_up_and_usable(addr_map_entry_t
*a
,
247 return iface_entry_up_and_usable(b
->iface
) &&
248 a
->ip
->ip_equals(a
->ip
, b
->ip
);
252 * Used with get_match this finds an address entry if it is installed on
253 * any active local interface
255 static bool addr_map_entry_match_up(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
257 return iface_entry_up(b
->iface
) && a
->ip
->ip_equals(a
->ip
, b
->ip
);
261 * Used with get_match this finds an address entry if it is installed on
262 * any local interface
264 static bool addr_map_entry_match(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
266 return a
->ip
->ip_equals(a
->ip
, b
->ip
);
269 typedef struct net_change_t net_change_t
;
272 * Queued network changes
274 struct net_change_t
{
275 /** Name of the interface that got activated (or an IP appeared on) */
280 * Destroy a net_change_t object
282 static void net_change_destroy(net_change_t
*this)
289 * Hash a net_change_t object
291 static u_int
net_change_hash(net_change_t
*this)
293 return chunk_hash(chunk_create(this->if_name
, strlen(this->if_name
)));
297 * Compare two net_change_t objects
299 static bool net_change_equals(net_change_t
*a
, net_change_t
*b
)
301 return streq(a
->if_name
, b
->if_name
);
304 typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t
;
307 * Private variables and functions of kernel_netlink_net class.
309 struct private_kernel_netlink_net_t
{
311 * Public part of the kernel_netlink_net_t object.
313 kernel_netlink_net_t
public;
316 * lock to access various lists and maps
321 * condition variable to signal virtual IP add/removal
323 rwlock_condvar_t
*condvar
;
326 * Cached list of interfaces and its addresses (iface_entry_t)
328 linked_list_t
*ifaces
;
331 * Map for IP addresses to iface_entry_t objects (addr_map_entry_t)
336 * Map for virtual IP addresses to iface_entry_t objects (addr_map_entry_t)
341 * netlink rt socket (routing)
343 netlink_socket_t
*socket
;
346 * Netlink rt socket to receive address change events
351 * earliest time of the next roam event
356 * roam event due to address change
361 * lock to check and update roam event time
363 spinlock_t
*roam_lock
;
366 * routing table to install routes
368 uint32_t routing_table
;
371 * priority of used routing table
373 uint32_t routing_table_prio
;
383 mutex_t
*routes_lock
;
386 * interface changes which may trigger route reinstallation
388 hashtable_t
*net_changes
;
391 * mutex for route reinstallation triggers
393 mutex_t
*net_changes_lock
;
396 * time of last route reinstallation
398 timeval_t last_route_reinstall
;
401 * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
406 * whether to react to RTM_NEWRULE or RTM_DELRULE events
411 * whether to trigger roam events
416 * whether to install IPsec policy routes
421 * whether to actually install virtual IPs
423 bool install_virtual_ip
;
426 * the name of the interface virtual IP addresses are installed on
428 char *install_virtual_ip_on
;
431 * whether preferred source addresses can be specified for IPv6 routes
433 bool rta_prefsrc_for_ipv6
;
436 * whether marks can be used in route lookups
441 * the mark excluded from the routing rule used for virtual IPs
446 * whether to prefer temporary IPv6 addresses over public ones
448 bool prefer_temporary_addrs
;
451 * list with routing tables to be excluded from route lookup
453 linked_list_t
*rt_exclude
;
456 * MTU to set on installed routes
461 * MSS to set on installed routes
467 * Forward declaration
469 static status_t
manage_srcroute(private_kernel_netlink_net_t
*this,
470 int nlmsg_type
, int flags
, chunk_t dst_net
,
471 uint8_t prefixlen
, host_t
*gateway
,
472 host_t
*src_ip
, char *if_name
, bool pass
);
475 * Clear the queued network changes.
477 static void net_changes_clear(private_kernel_netlink_net_t
*this)
479 enumerator_t
*enumerator
;
480 net_change_t
*change
;
482 enumerator
= this->net_changes
->create_enumerator(this->net_changes
);
483 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&change
))
485 this->net_changes
->remove_at(this->net_changes
, enumerator
);
486 net_change_destroy(change
);
488 enumerator
->destroy(enumerator
);
492 * Act upon queued network changes.
494 static job_requeue_t
reinstall_routes(private_kernel_netlink_net_t
*this)
496 enumerator_t
*enumerator
;
497 route_entry_t
*route
;
499 this->net_changes_lock
->lock(this->net_changes_lock
);
500 this->routes_lock
->lock(this->routes_lock
);
502 enumerator
= this->routes
->ht
.create_enumerator(&this->routes
->ht
);
503 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&route
))
505 net_change_t
*change
, lookup
= {
506 .if_name
= route
->if_name
,
508 if (route
->pass
|| !route
->if_name
)
509 { /* no need to reinstall these, they don't reference interfaces */
512 /* check if a change for the outgoing interface is queued */
513 change
= this->net_changes
->get(this->net_changes
, &lookup
);
515 { /* in case src_ip is not on the outgoing interface */
516 if (this->public.interface
.get_interface(&this->public.interface
,
517 route
->src_ip
, &lookup
.if_name
))
519 if (!streq(lookup
.if_name
, route
->if_name
))
521 change
= this->net_changes
->get(this->net_changes
, &lookup
);
523 free(lookup
.if_name
);
528 manage_srcroute(this, RTM_NEWROUTE
, NLM_F_CREATE
| NLM_F_EXCL
,
529 route
->dst_net
, route
->prefixlen
, route
->gateway
,
530 route
->src_ip
, route
->if_name
, route
->pass
);
533 enumerator
->destroy(enumerator
);
534 this->routes_lock
->unlock(this->routes_lock
);
536 net_changes_clear(this);
537 this->net_changes_lock
->unlock(this->net_changes_lock
);
538 return JOB_REQUEUE_NONE
;
542 * Queue route reinstallation caused by network changes for a given interface.
544 * The route reinstallation is delayed for a while and only done once for
545 * several calls during this delay, in order to avoid doing it too often.
546 * The interface name is freed.
548 static void queue_route_reinstall(private_kernel_netlink_net_t
*this,
551 net_change_t
*update
, *found
;
559 this->net_changes_lock
->lock(this->net_changes_lock
);
560 found
= this->net_changes
->put(this->net_changes
, update
, update
);
563 net_change_destroy(found
);
565 time_monotonic(&now
);
566 if (timercmp(&now
, &this->last_route_reinstall
, >))
568 timeval_add_ms(&now
, ROUTE_DELAY
);
569 this->last_route_reinstall
= now
;
571 job
= (job_t
*)callback_job_create((callback_job_cb_t
)reinstall_routes
,
573 lib
->scheduler
->schedule_job_ms(lib
->scheduler
, job
, ROUTE_DELAY
);
575 this->net_changes_lock
->unlock(this->net_changes_lock
);
579 * check if the given IP is known as virtual IP and currently installed
581 * this function will also return TRUE if the virtual IP entry disappeared.
582 * in that case the returned entry will be NULL.
584 * this->lock must be held when calling this function
586 static bool is_vip_installed_or_gone(private_kernel_netlink_net_t
*this,
587 host_t
*ip
, addr_map_entry_t
**entry
)
589 addr_map_entry_t lookup
= {
593 *entry
= this->vips
->get_match(this->vips
, &lookup
,
594 (void*)addr_map_entry_match
);
596 { /* the virtual IP disappeared */
599 return (*entry
)->addr
->installed
;
603 * check if the given IP is known as virtual IP
605 * this->lock must be held when calling this function
607 static bool is_known_vip(private_kernel_netlink_net_t
*this, host_t
*ip
)
609 addr_map_entry_t lookup
= {
613 return this->vips
->get_match(this->vips
, &lookup
,
614 (void*)addr_map_entry_match
) != NULL
;
618 * Add an address map entry
620 static void addr_map_entry_add(hashlist_t
*map
, addr_entry_t
*addr
,
621 iface_entry_t
*iface
)
623 addr_map_entry_t
*entry
;
630 entry
= map
->ht
.put(&map
->ht
, entry
, entry
);
635 * Remove an address map entry
637 static void addr_map_entry_remove(hashlist_t
*map
, addr_entry_t
*addr
,
638 iface_entry_t
*iface
)
640 addr_map_entry_t
*entry
, lookup
= {
646 entry
= map
->ht
.remove(&map
->ht
, &lookup
);
651 * Check if an address or net (addr with prefix net bits) is in
652 * subnet (net with net_len net bits)
654 static bool addr_in_subnet(chunk_t addr
, int prefix
, chunk_t net
, int net_len
)
656 static const u_char mask
[] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
660 { /* any address matches a /0 network */
663 if (addr
.len
!= net
.len
|| net_len
> 8 * net
.len
|| prefix
< net_len
)
667 /* scan through all bytes in network order */
672 return (mask
[net_len
] & addr
.ptr
[byte
]) == (mask
[net_len
] & net
.ptr
[byte
]);
676 if (addr
.ptr
[byte
] != net
.ptr
[byte
])
688 * Check if the given address is in subnet (net with net_len net bits)
690 static bool host_in_subnet(host_t
*host
, chunk_t net
, int net_len
)
694 addr
= host
->get_address(host
);
695 return addr_in_subnet(addr
, addr
.len
* 8, net
, net_len
);
699 * Determine the type or scope of the given unicast IP address. This is not
700 * the same thing returned in rtm_scope/ifa_scope.
702 * We use return values as defined in RFC 6724 (referring to RFC 4291).
704 static u_char
get_scope(host_t
*ip
)
708 addr
= ip
->get_address(ip
);
712 /* we use the mapping defined in RFC 6724, 3.2 */
713 if (addr
.ptr
[0] == 127)
714 { /* link-local, same as the IPv6 loopback address */
717 if (addr
.ptr
[0] == 169 && addr
.ptr
[1] == 254)
723 if (IN6_IS_ADDR_LOOPBACK((struct in6_addr
*)addr
.ptr
))
724 { /* link-local, according to RFC 4291, 2.5.3 */
727 if (IN6_IS_ADDR_LINKLOCAL((struct in6_addr
*)addr
.ptr
))
731 if (IN6_IS_ADDR_SITELOCAL((struct in6_addr
*)addr
.ptr
))
732 { /* deprecated, according to RFC 4291, 2.5.7 */
744 * Determine the label of the given unicast IP address.
746 * We currently only support the default table given in RFC 6724:
748 * Prefix Precedence Label
759 static u_char
get_label(host_t
*ip
)
766 /* priority table ordered by prefix */
768 { chunk_from_chars(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
769 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01), 128, 0 },
771 { chunk_from_chars(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
772 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00), 96, 4 },
774 { chunk_from_chars(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
775 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 96, 3 },
777 { chunk_from_chars(0x20, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
778 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 32, 5 },
780 { chunk_from_chars(0x20, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
781 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 16, 2 },
783 { chunk_from_chars(0x3f, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
784 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 16, 12 },
786 { chunk_from_chars(0xfe, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
787 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 10, 11 },
789 { chunk_from_chars(0xfc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
790 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 7, 13 },
794 for (i
= 0; i
< countof(priorities
); i
++)
796 if (host_in_subnet(ip
, priorities
[i
].net
, priorities
[i
].prefix
))
798 return priorities
[i
].label
;
806 * Returns the length of the common prefix in bits up to the length of a's
807 * prefix, defined by RFC 6724 as the portion of the address not including the
808 * interface ID, which is 64-bit for most unicast addresses (see RFC 4291).
810 static u_char
common_prefix(host_t
*a
, host_t
*b
)
813 u_char byte
, bits
= 0, match
;
815 aa
= a
->get_address(a
);
816 ba
= b
->get_address(b
);
817 for (byte
= 0; byte
< 8; byte
++)
819 if (aa
.ptr
[byte
] != ba
.ptr
[byte
])
821 match
= aa
.ptr
[byte
] ^ ba
.ptr
[byte
];
822 for (bits
= 8; match
; match
>>= 1)
829 return byte
* 8 + bits
;
833 * Compare two IP addresses and return TRUE if the second address is the better
834 * choice of the two to reach the destination.
835 * For IPv6 we approximately follow RFC 6724.
837 static bool is_address_better(private_kernel_netlink_net_t
*this,
838 addr_entry_t
*a
, addr_entry_t
*b
, host_t
*d
)
840 u_char sa
, sb
, sd
, la
, lb
, ld
, pa
, pb
;
842 /* rule 2: prefer appropriate scope */
845 sa
= get_scope(a
->ip
);
846 sb
= get_scope(b
->ip
);
857 if (a
->ip
->get_family(a
->ip
) == AF_INET
)
858 { /* stop here for IPv4, default to addresses found earlier */
861 /* rule 3: avoid deprecated addresses (RFC 4862) */
862 if ((a
->flags
& IFA_F_DEPRECATED
) != (b
->flags
& IFA_F_DEPRECATED
))
864 return a
->flags
& IFA_F_DEPRECATED
;
866 /* rule 4 is not applicable as we don't know if an address is a home or
868 * rule 5 does not apply as we only compare addresses from one interface
870 /* rule 6: prefer matching label */
873 la
= get_label(a
->ip
);
874 lb
= get_label(b
->ip
);
876 if (la
== ld
&& lb
!= ld
)
880 else if (lb
== ld
&& la
!= ld
)
885 /* rule 7: prefer temporary addresses (WE REVERSE THIS BY DEFAULT!) */
886 if ((a
->flags
& IFA_F_TEMPORARY
) != (b
->flags
& IFA_F_TEMPORARY
))
888 if (this->prefer_temporary_addrs
)
890 return b
->flags
& IFA_F_TEMPORARY
;
892 return a
->flags
& IFA_F_TEMPORARY
;
894 /* rule 8: use longest matching prefix */
897 pa
= common_prefix(a
->ip
, d
);
898 pb
= common_prefix(b
->ip
, d
);
904 /* default to addresses found earlier */
909 * Get a non-virtual IP address on the given interfaces and optionally in a
912 * If a candidate address is given, we first search for that address and if not
913 * found return the address as above.
914 * Returned host is a clone, has to be freed by caller.
916 * this->lock must be held when calling this function.
918 static host_t
*get_matching_address(private_kernel_netlink_net_t
*this,
919 int *ifindex
, int family
, chunk_t net
,
920 uint8_t mask
, host_t
*dest
,
923 enumerator_t
*ifaces
, *addrs
;
924 iface_entry_t
*iface
;
925 addr_entry_t
*addr
, *best
= NULL
;
926 bool candidate_matched
= FALSE
;
928 ifaces
= this->ifaces
->create_enumerator(this->ifaces
);
929 while (ifaces
->enumerate(ifaces
, &iface
))
931 if (iface
->usable
&& (!ifindex
|| iface
->ifindex
== *ifindex
))
932 { /* only use matching interfaces not excluded by config */
933 addrs
= iface
->addrs
->create_enumerator(iface
->addrs
);
934 while (addrs
->enumerate(addrs
, &addr
))
936 if (addr
->refcount
||
937 addr
->ip
->get_family(addr
->ip
) != family
)
938 { /* ignore virtual IP addresses and ensure family matches */
941 if (net
.ptr
&& !host_in_subnet(addr
->ip
, net
, mask
))
942 { /* optionally match a subnet */
945 if (candidate
&& candidate
->ip_equals(candidate
, addr
->ip
) &&
946 !(addr
->flags
& IFA_F_DEPRECATED
))
947 { /* stop if we find the candidate and it's not deprecated */
949 candidate_matched
= TRUE
;
952 else if (!best
|| is_address_better(this, best
, addr
, dest
))
957 addrs
->destroy(addrs
);
958 if (ifindex
|| candidate_matched
)
964 ifaces
->destroy(ifaces
);
965 return best
? best
->ip
->clone(best
->ip
) : NULL
;
969 * Get a non-virtual IP address on the given interface.
971 * If a candidate address is given, we first search for that address and if not
972 * found return the address as above.
973 * Returned host is a clone, has to be freed by caller.
975 * this->lock must be held when calling this function.
977 static host_t
*get_interface_address(private_kernel_netlink_net_t
*this,
978 int ifindex
, int family
, host_t
*dest
,
981 return get_matching_address(this, &ifindex
, family
, chunk_empty
, 0, dest
,
986 * Get a non-virtual IP address in the given subnet.
988 * If a candidate address is given, we first search for that address and if not
989 * found return the address as above.
990 * Returned host is a clone, has to be freed by caller.
992 * this->lock must be held when calling this function.
994 static host_t
*get_subnet_address(private_kernel_netlink_net_t
*this,
995 int family
, chunk_t net
, uint8_t mask
,
996 host_t
*dest
, host_t
*candidate
)
998 return get_matching_address(this, NULL
, family
, net
, mask
, dest
, candidate
);
1002 * callback function that raises the delayed roam event
1004 static job_requeue_t
roam_event(private_kernel_netlink_net_t
*this)
1008 this->roam_lock
->lock(this->roam_lock
);
1009 address
= this->roam_address
;
1010 this->roam_address
= FALSE
;
1011 this->roam_lock
->unlock(this->roam_lock
);
1012 charon
->kernel
->roam(charon
->kernel
, address
);
1013 return JOB_REQUEUE_NONE
;
1017 * fire a roaming event. we delay it for a bit and fire only one event
1018 * for multiple calls. otherwise we would create too many events.
1020 static void fire_roam_event(private_kernel_netlink_net_t
*this, bool address
)
1025 if (!this->roam_events
)
1030 time_monotonic(&now
);
1031 this->roam_lock
->lock(this->roam_lock
);
1032 this->roam_address
|= address
;
1033 if (!timercmp(&now
, &this->next_roam
, >))
1035 this->roam_lock
->unlock(this->roam_lock
);
1038 timeval_add_ms(&now
, ROAM_DELAY
);
1039 this->next_roam
= now
;
1040 this->roam_lock
->unlock(this->roam_lock
);
1042 job
= (job_t
*)callback_job_create((callback_job_cb_t
)roam_event
,
1044 lib
->scheduler
->schedule_job_ms(lib
->scheduler
, job
, ROAM_DELAY
);
1048 * check if an interface with a given index is up and usable
1050 * this->lock must be locked when calling this function
1052 static bool is_interface_up_and_usable(private_kernel_netlink_net_t
*this,
1055 iface_entry_t
*iface
;
1057 if (this->ifaces
->find_first(this->ifaces
, iface_entry_by_index
,
1058 (void**)&iface
, index
))
1060 return iface_entry_up_and_usable(iface
);
1066 * unregister the current addr_entry_t from the hashtable it is stored in
1068 * this->lock must be locked when calling this function
1070 CALLBACK(addr_entry_unregister
, void,
1071 addr_entry_t
*addr
, va_list args
)
1073 private_kernel_netlink_net_t
*this;
1074 iface_entry_t
*iface
;
1076 VA_ARGS_VGET(args
, iface
, this);
1079 addr_map_entry_remove(this->vips
, addr
, iface
);
1080 this->condvar
->broadcast(this->condvar
);
1083 addr_map_entry_remove(this->addrs
, addr
, iface
);
1087 * process RTM_NEWLINK/RTM_DELLINK from kernel
1089 static void process_link(private_kernel_netlink_net_t
*this,
1090 struct nlmsghdr
*hdr
, bool event
)
1092 struct ifinfomsg
* msg
= NLMSG_DATA(hdr
);
1093 struct rtattr
*rta
= IFLA_RTA(msg
);
1094 size_t rtasize
= IFLA_PAYLOAD (hdr
);
1095 enumerator_t
*enumerator
;
1096 iface_entry_t
*current
, *entry
= NULL
;
1098 bool update
= FALSE
, update_routes
= FALSE
;
1100 while (RTA_OK(rta
, rtasize
))
1102 switch (rta
->rta_type
)
1105 name
= RTA_DATA(rta
);
1108 rta
= RTA_NEXT(rta
, rtasize
);
1115 this->lock
->write_lock(this->lock
);
1116 switch (hdr
->nlmsg_type
)
1120 if (!this->ifaces
->find_first(this->ifaces
, iface_entry_by_index
,
1121 (void**)&entry
, msg
->ifi_index
))
1124 .ifindex
= msg
->ifi_index
,
1125 .addrs
= linked_list_create(),
1127 this->ifaces
->insert_last(this->ifaces
, entry
);
1129 strncpy(entry
->ifname
, name
, IFNAMSIZ
);
1130 entry
->ifname
[IFNAMSIZ
-1] = '\0';
1131 entry
->usable
= charon
->kernel
->is_interface_usable(charon
->kernel
,
1133 if (event
&& entry
->usable
)
1135 if (!(entry
->flags
& IFF_UP
) && (msg
->ifi_flags
& IFF_UP
))
1137 update
= update_routes
= TRUE
;
1138 DBG1(DBG_KNL
, "interface %s activated", name
);
1140 if ((entry
->flags
& IFF_UP
) && !(msg
->ifi_flags
& IFF_UP
))
1143 DBG1(DBG_KNL
, "interface %s deactivated", name
);
1146 entry
->flags
= msg
->ifi_flags
;
1151 enumerator
= this->ifaces
->create_enumerator(this->ifaces
);
1152 while (enumerator
->enumerate(enumerator
, ¤t
))
1154 if (current
->ifindex
== msg
->ifi_index
)
1156 if (event
&& current
->usable
)
1159 DBG1(DBG_KNL
, "interface %s deleted", current
->ifname
);
1161 /* TODO: move virtual IPs installed on this interface to
1162 * another interface? */
1163 this->ifaces
->remove_at(this->ifaces
, enumerator
);
1164 current
->addrs
->invoke_function(current
->addrs
,
1165 addr_entry_unregister
, current
, this);
1166 iface_entry_destroy(current
);
1170 enumerator
->destroy(enumerator
);
1174 this->lock
->unlock(this->lock
);
1176 if (update_routes
&& event
)
1178 queue_route_reinstall(this, strdup(name
));
1181 if (update
&& event
)
1183 fire_roam_event(this, TRUE
);
1188 * process RTM_NEWADDR/RTM_DELADDR from kernel
1190 static void process_addr(private_kernel_netlink_net_t
*this,
1191 struct nlmsghdr
*hdr
, bool event
)
1193 struct ifaddrmsg
* msg
= NLMSG_DATA(hdr
);
1194 struct rtattr
*rta
= IFA_RTA(msg
);
1195 size_t rtasize
= IFA_PAYLOAD (hdr
);
1196 host_t
*host
= NULL
;
1197 iface_entry_t
*iface
;
1198 chunk_t local
= chunk_empty
, address
= chunk_empty
;
1199 char *route_ifname
= NULL
;
1200 bool update
= FALSE
, found
= FALSE
, changed
= FALSE
;
1202 while (RTA_OK(rta
, rtasize
))
1204 switch (rta
->rta_type
)
1207 local
.ptr
= RTA_DATA(rta
);
1208 local
.len
= RTA_PAYLOAD(rta
);
1211 address
.ptr
= RTA_DATA(rta
);
1212 address
.len
= RTA_PAYLOAD(rta
);
1215 rta
= RTA_NEXT(rta
, rtasize
);
1218 /* For PPP interfaces, we need the IFA_LOCAL address,
1219 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
1220 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
1223 host
= host_create_from_chunk(msg
->ifa_family
, local
, 0);
1225 else if (address
.ptr
)
1227 host
= host_create_from_chunk(msg
->ifa_family
, address
, 0);
1235 this->lock
->write_lock(this->lock
);
1236 if (this->ifaces
->find_first(this->ifaces
, iface_entry_by_index
,
1237 (void**)&iface
, msg
->ifa_index
))
1239 addr_map_entry_t
*entry
, lookup
= {
1245 entry
= this->vips
->ht
.get(&this->vips
->ht
, &lookup
);
1248 if (hdr
->nlmsg_type
== RTM_NEWADDR
)
1249 { /* mark as installed and signal waiting threads */
1250 entry
->addr
->installed
= TRUE
;
1253 { /* the address was already marked as uninstalled */
1255 iface
->addrs
->remove(iface
->addrs
, addr
, NULL
);
1256 addr_map_entry_remove(this->vips
, addr
, iface
);
1257 addr_entry_destroy(addr
);
1259 /* no roam events etc. for virtual IPs */
1260 this->condvar
->broadcast(this->condvar
);
1261 this->lock
->unlock(this->lock
);
1262 host
->destroy(host
);
1265 entry
= this->addrs
->ht
.get(&this->addrs
->ht
, &lookup
);
1268 if (hdr
->nlmsg_type
== RTM_DELADDR
)
1272 iface
->addrs
->remove(iface
->addrs
, addr
, NULL
);
1276 DBG1(DBG_KNL
, "%H disappeared from %s", host
,
1279 addr_map_entry_remove(this->addrs
, addr
, iface
);
1280 addr_entry_destroy(addr
);
1282 else if (entry
->addr
->flags
!= msg
->ifa_flags
)
1285 entry
->addr
->flags
= msg
->ifa_flags
;
1286 if (event
&& iface
->usable
)
1289 DBG1(DBG_KNL
, "flags changed for %H on %s", host
,
1296 if (hdr
->nlmsg_type
== RTM_NEWADDR
)
1300 route_ifname
= strdup(iface
->ifname
);
1302 .ip
= host
->clone(host
),
1303 .flags
= msg
->ifa_flags
,
1304 .scope
= msg
->ifa_scope
,
1306 iface
->addrs
->insert_last(iface
->addrs
, addr
);
1307 addr_map_entry_add(this->addrs
, addr
, iface
);
1308 if (event
&& iface
->usable
)
1310 DBG1(DBG_KNL
, "%H appeared on %s", host
, iface
->ifname
);
1314 if (found
&& (iface
->flags
& IFF_UP
))
1319 { /* ignore events for interfaces excluded by config */
1320 update
= changed
= FALSE
;
1323 this->lock
->unlock(this->lock
);
1325 if (update
&& event
&& route_ifname
)
1327 queue_route_reinstall(this, route_ifname
);
1333 host
->destroy(host
);
1335 /* send an update to all IKE_SAs */
1336 if (update
&& event
&& changed
)
1338 fire_roam_event(this, TRUE
);
1343 * process RTM_NEWROUTE and RTM_DELROUTE from kernel
1345 static void process_route(private_kernel_netlink_net_t
*this,
1346 struct nlmsghdr
*hdr
)
1348 struct rtmsg
* msg
= NLMSG_DATA(hdr
);
1349 struct rtattr
*rta
= RTM_RTA(msg
);
1350 size_t rtasize
= RTM_PAYLOAD(hdr
);
1351 uint32_t rta_oif
= 0;
1352 host_t
*host
= NULL
;
1354 /* ignore routes added by us or in the local routing table (local addrs) */
1355 if (msg
->rtm_table
&& (msg
->rtm_table
== this->routing_table
||
1356 msg
->rtm_table
== RT_TABLE_LOCAL
))
1360 else if (msg
->rtm_flags
& RTM_F_CLONED
)
1361 { /* ignore cached routes, seem to be created a lot for IPv6 */
1365 while (RTA_OK(rta
, rtasize
))
1367 switch (rta
->rta_type
)
1369 #ifdef HAVE_RTA_TABLE
1371 /* also check against extended table ID */
1372 if (RTA_PAYLOAD(rta
) == sizeof(uint32_t) &&
1373 this->routing_table
== *(uint32_t*)RTA_DATA(rta
))
1379 #endif /* HAVE_RTA_TABLE */
1382 host
= host_create_from_chunk(msg
->rtm_family
,
1383 chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
)), 0);
1386 if (RTA_PAYLOAD(rta
) == sizeof(rta_oif
))
1388 rta_oif
= *(uint32_t*)RTA_DATA(rta
);
1392 rta
= RTA_NEXT(rta
, rtasize
);
1394 this->lock
->read_lock(this->lock
);
1395 if (rta_oif
&& !is_interface_up_and_usable(this, rta_oif
))
1396 { /* ignore route changes for interfaces that are ignored or down */
1397 this->lock
->unlock(this->lock
);
1401 if (!host
&& rta_oif
)
1403 host
= get_interface_address(this, rta_oif
, msg
->rtm_family
,
1406 if (!host
|| is_known_vip(this, host
))
1407 { /* ignore routes added for virtual IPs */
1408 this->lock
->unlock(this->lock
);
1412 this->lock
->unlock(this->lock
);
1413 fire_roam_event(this, FALSE
);
1414 host
->destroy(host
);
1418 * process RTM_NEW|DELRULE from kernel
1420 static void process_rule(private_kernel_netlink_net_t
*this,
1421 struct nlmsghdr
*hdr
)
1423 #ifdef HAVE_LINUX_FIB_RULES_H
1424 struct rtmsg
* msg
= NLMSG_DATA(hdr
);
1425 struct rtattr
*rta
= RTM_RTA(msg
);
1426 size_t rtasize
= RTM_PAYLOAD(hdr
);
1428 /* ignore rules added by us or in the local routing table (local addrs) */
1429 if (msg
->rtm_table
&& (msg
->rtm_table
== this->routing_table
||
1430 msg
->rtm_table
== RT_TABLE_LOCAL
))
1435 while (RTA_OK(rta
, rtasize
))
1437 switch (rta
->rta_type
)
1440 /* also check against extended table ID */
1441 if (RTA_PAYLOAD(rta
) == sizeof(uint32_t) &&
1442 this->routing_table
== *(uint32_t*)RTA_DATA(rta
))
1448 rta
= RTA_NEXT(rta
, rtasize
);
1450 fire_roam_event(this, FALSE
);
1455 * Receives events from kernel
1457 static bool receive_events(private_kernel_netlink_net_t
*this, int fd
,
1458 watcher_event_t event
)
1460 char response
[netlink_get_buflen()];
1461 struct nlmsghdr
*hdr
= (struct nlmsghdr
*)response
;
1462 struct sockaddr_nl addr
;
1463 socklen_t addr_len
= sizeof(addr
);
1466 len
= recvfrom(this->socket_events
, response
, sizeof(response
),
1467 MSG_DONTWAIT
, (struct sockaddr
*)&addr
, &addr_len
);
1473 /* interrupted, try again */
1476 /* no data ready, select again */
1479 DBG1(DBG_KNL
, "unable to receive from RT event socket %s (%d)",
1480 strerror(errno
), errno
);
1486 if (addr
.nl_pid
!= 0)
1487 { /* not from kernel. not interested, try another one */
1491 while (NLMSG_OK(hdr
, len
))
1493 /* looks good so far, dispatch netlink message */
1494 switch (hdr
->nlmsg_type
)
1498 process_addr(this, hdr
, TRUE
);
1502 process_link(this, hdr
, TRUE
);
1506 if (this->process_route
)
1508 process_route(this, hdr
);
1513 if (this->process_rules
)
1515 process_rule(this, hdr
);
1521 hdr
= NLMSG_NEXT(hdr
, len
);
1526 /** enumerator over addresses */
1528 private_kernel_netlink_net_t
* this;
1529 /** which addresses to enumerate */
1530 kernel_address_type_t which
;
1531 } address_enumerator_t
;
1533 CALLBACK(address_enumerator_destroy
, void,
1534 address_enumerator_t
*data
)
1536 data
->this->lock
->unlock(data
->this->lock
);
1540 CALLBACK(filter_addresses
, bool,
1541 address_enumerator_t
*data
, enumerator_t
*orig
, va_list args
)
1546 VA_ARGS_VGET(args
, out
);
1548 while (orig
->enumerate(orig
, &addr
))
1550 if (!(data
->which
& ADDR_TYPE_VIRTUAL
) && addr
->refcount
)
1551 { /* skip virtual interfaces added by us */
1554 if (!(data
->which
& ADDR_TYPE_REGULAR
) && !addr
->refcount
)
1555 { /* address is regular, but not requested */
1558 if (addr
->flags
& IFA_F_DEPRECATED
||
1559 addr
->scope
>= RT_SCOPE_LINK
)
1560 { /* skip deprecated addresses or those with an unusable scope */
1563 if (!addr
->refcount
&& addr
->ip
->get_family(addr
->ip
) == AF_INET6
)
1564 { /* handle non-VIP temporary IPv6 addresses according to config */
1565 bool temporary
= (addr
->flags
& IFA_F_TEMPORARY
) == IFA_F_TEMPORARY
;
1566 if (data
->this->prefer_temporary_addrs
!= temporary
)
1578 * enumerator constructor for interfaces
1580 static enumerator_t
*create_iface_enumerator(iface_entry_t
*iface
,
1581 address_enumerator_t
*data
)
1583 return enumerator_create_filter(
1584 iface
->addrs
->create_enumerator(iface
->addrs
),
1585 filter_addresses
, data
, NULL
);
1588 CALLBACK(filter_interfaces
, bool,
1589 address_enumerator_t
*data
, enumerator_t
*orig
, va_list args
)
1591 iface_entry_t
*iface
, **out
;
1593 VA_ARGS_VGET(args
, out
);
1595 while (orig
->enumerate(orig
, &iface
))
1597 if (!(data
->which
& ADDR_TYPE_IGNORED
) && !iface
->usable
)
1598 { /* skip interfaces excluded by config */
1601 if (!(data
->which
& ADDR_TYPE_LOOPBACK
) && (iface
->flags
& IFF_LOOPBACK
))
1602 { /* ignore loopback devices */
1605 if (!(data
->which
& ADDR_TYPE_DOWN
) && !(iface
->flags
& IFF_UP
))
1606 { /* skip interfaces not up */
1615 METHOD(kernel_net_t
, create_address_enumerator
, enumerator_t
*,
1616 private_kernel_netlink_net_t
*this, kernel_address_type_t which
)
1618 address_enumerator_t
*data
;
1625 this->lock
->read_lock(this->lock
);
1626 return enumerator_create_nested(
1627 enumerator_create_filter(
1628 this->ifaces
->create_enumerator(this->ifaces
),
1629 filter_interfaces
, data
, NULL
),
1630 (void*)create_iface_enumerator
, data
,
1631 address_enumerator_destroy
);
1634 METHOD(kernel_net_t
, get_interface_name
, bool,
1635 private_kernel_netlink_net_t
*this, host_t
* ip
, char **name
)
1637 addr_map_entry_t
*entry
, lookup
= {
1641 if (ip
->is_anyaddr(ip
))
1645 this->lock
->read_lock(this->lock
);
1646 /* first try to find it on an up and usable interface */
1647 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1648 (void*)addr_map_entry_match_up_and_usable
);
1653 *name
= strdup(entry
->iface
->ifname
);
1654 DBG2(DBG_KNL
, "%H is on interface %s", ip
, *name
);
1656 this->lock
->unlock(this->lock
);
1659 /* in a second step, consider virtual IPs installed by us */
1660 entry
= this->vips
->get_match(this->vips
, &lookup
,
1661 (void*)addr_map_entry_match_up_and_usable
);
1666 *name
= strdup(entry
->iface
->ifname
);
1667 DBG2(DBG_KNL
, "virtual IP %H is on interface %s", ip
, *name
);
1669 this->lock
->unlock(this->lock
);
1672 /* maybe it is installed on an ignored interface */
1673 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1674 (void*)addr_map_entry_match_up
);
1677 DBG2(DBG_KNL
, "%H is not a local address or the interface is down", ip
);
1679 this->lock
->unlock(this->lock
);
1684 * get the index of an interface by name
1686 static int get_interface_index(private_kernel_netlink_net_t
*this, char* name
)
1688 iface_entry_t
*iface
;
1691 DBG2(DBG_KNL
, "getting iface index for %s", name
);
1693 this->lock
->read_lock(this->lock
);
1694 if (this->ifaces
->find_first(this->ifaces
, iface_entry_by_name
,
1695 (void**)&iface
, name
))
1697 ifindex
= iface
->ifindex
;
1699 this->lock
->unlock(this->lock
);
1703 DBG1(DBG_KNL
, "unable to get interface index for %s", name
);
1709 * get the name of an interface by index (allocated)
1711 static char *get_interface_name_by_index(private_kernel_netlink_net_t
*this,
1714 iface_entry_t
*iface
;
1717 DBG2(DBG_KNL
, "getting iface name for index %d", index
);
1719 this->lock
->read_lock(this->lock
);
1720 if (this->ifaces
->find_first(this->ifaces
, iface_entry_by_index
,
1721 (void**)&iface
, index
))
1723 name
= strdup(iface
->ifname
);
1725 this->lock
->unlock(this->lock
);
1729 DBG1(DBG_KNL
, "unable to get interface name for %d", index
);
1735 * Store information about a route retrieved via RTNETLINK
1751 * Free a route entry
1753 static void rt_entry_destroy(rt_entry_t
*this)
1755 DESTROY_IF(this->src_host
);
1760 * Check if the route received with RTM_NEWROUTE is usable based on its type.
1762 static bool route_usable(struct nlmsghdr
*hdr
, bool allow_local
)
1766 msg
= NLMSG_DATA(hdr
);
1767 switch (msg
->rtm_type
)
1770 case RTN_UNREACHABLE
:
1782 * Parse route received with RTM_NEWROUTE. The given rt_entry_t object will be
1783 * reused if not NULL.
1785 * Returned chunks point to internal data of the Netlink message.
1787 static rt_entry_t
*parse_route(struct nlmsghdr
*hdr
, rt_entry_t
*route
)
1793 msg
= NLMSG_DATA(hdr
);
1795 rtasize
= RTM_PAYLOAD(hdr
);
1799 *route
= (rt_entry_t
){
1800 .dst_len
= msg
->rtm_dst_len
,
1801 .src_len
= msg
->rtm_src_len
,
1802 .table
= msg
->rtm_table
,
1808 .dst_len
= msg
->rtm_dst_len
,
1809 .src_len
= msg
->rtm_src_len
,
1810 .table
= msg
->rtm_table
,
1814 while (RTA_OK(rta
, rtasize
))
1816 switch (rta
->rta_type
)
1819 route
->pref_src
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1822 route
->gtw
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1825 route
->dst
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1828 route
->src
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1831 if (RTA_PAYLOAD(rta
) == sizeof(route
->oif
))
1833 route
->oif
= *(uint32_t*)RTA_DATA(rta
);
1837 if (RTA_PAYLOAD(rta
) == sizeof(route
->priority
))
1839 route
->priority
= *(uint32_t*)RTA_DATA(rta
);
1842 #ifdef HAVE_RTA_TABLE
1844 if (RTA_PAYLOAD(rta
) == sizeof(route
->table
))
1846 route
->table
= *(uint32_t*)RTA_DATA(rta
);
1849 #endif /* HAVE_RTA_TABLE*/
1851 rta
= RTA_NEXT(rta
, rtasize
);
1857 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
1859 static host_t
*get_route(private_kernel_netlink_net_t
*this, host_t
*dest
,
1860 int prefix
, bool nexthop
, host_t
*candidate
,
1861 char **iface
, u_int recursion
)
1863 netlink_buf_t request
;
1864 struct nlmsghdr
*hdr
, *out
, *current
;
1868 linked_list_t
*routes
;
1869 rt_entry_t
*route
= NULL
, *best
= NULL
;
1870 enumerator_t
*enumerator
;
1871 host_t
*addr
= NULL
;
1875 if (recursion
> MAX_ROUTE_RECURSION
)
1879 chunk
= dest
->get_address(dest
);
1880 len
= chunk
.len
* 8;
1881 prefix
= prefix
< 0 ? len
: min(prefix
, len
);
1882 match_net
= prefix
!= len
;
1884 memset(&request
, 0, sizeof(request
));
1886 family
= dest
->get_family(dest
);
1888 hdr
->nlmsg_flags
= NLM_F_REQUEST
;
1889 hdr
->nlmsg_type
= RTM_GETROUTE
;
1890 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
1892 msg
= NLMSG_DATA(hdr
);
1893 msg
->rtm_family
= family
;
1894 if (!match_net
&& this->rta_mark
&& this->routing_mark
.value
)
1896 /* if our routing rule excludes packets with a certain mark we can
1897 * get the preferred route without having to dump all routes */
1898 chunk
= chunk_from_thing(this->routing_mark
.value
);
1899 netlink_add_attribute(hdr
, RTA_MARK
, chunk
, sizeof(request
));
1901 else if (family
== AF_INET
|| this->rta_prefsrc_for_ipv6
||
1902 this->routing_table
|| match_net
)
1903 { /* kernels prior to 3.0 do not support RTA_PREFSRC for IPv6 routes.
1904 * as we want to ignore routes with virtual IPs we cannot use DUMP
1905 * if these routes are not installed in a separate table */
1906 if (this->install_routes
)
1908 hdr
->nlmsg_flags
|= NLM_F_DUMP
;
1913 chunk
= candidate
->get_address(candidate
);
1914 if (hdr
->nlmsg_flags
& NLM_F_DUMP
)
1916 netlink_add_attribute(hdr
, RTA_PREFSRC
, chunk
, sizeof(request
));
1920 netlink_add_attribute(hdr
, RTA_SRC
, chunk
, sizeof(request
));
1923 /* we use this below to match against the routes */
1924 chunk
= dest
->get_address(dest
);
1927 netlink_add_attribute(hdr
, RTA_DST
, chunk
, sizeof(request
));
1930 if (this->socket
->send(this->socket
, hdr
, &out
, &len
) != SUCCESS
)
1932 DBG2(DBG_KNL
, "getting %s to reach %H/%d failed",
1933 nexthop
? "nexthop" : "address", dest
, prefix
);
1936 routes
= linked_list_create();
1937 this->lock
->read_lock(this->lock
);
1939 for (current
= out
; NLMSG_OK(current
, len
);
1940 current
= NLMSG_NEXT(current
, len
))
1942 switch (current
->nlmsg_type
)
1951 if (!route_usable(current
, TRUE
))
1955 route
= parse_route(current
, route
);
1957 table
= (uintptr_t)route
->table
;
1958 if (this->rt_exclude
->find_first(this->rt_exclude
, NULL
,
1960 { /* route is from an excluded routing table */
1963 if (this->routing_table
!= 0 &&
1964 route
->table
== this->routing_table
)
1965 { /* route is from our own ipsec routing table */
1968 if (route
->oif
&& !is_interface_up_and_usable(this, route
->oif
))
1969 { /* interface is down */
1972 if (!addr_in_subnet(chunk
, prefix
, route
->dst
, route
->dst_len
))
1973 { /* route destination does not contain dest */
1976 if (route
->pref_src
.ptr
)
1977 { /* verify source address, if any */
1978 host_t
*src
= host_create_from_chunk(msg
->rtm_family
,
1979 route
->pref_src
, 0);
1980 if (src
&& is_known_vip(this, src
))
1981 { /* ignore routes installed by us */
1985 route
->src_host
= src
;
1987 /* insert route, sorted by network prefix and priority */
1988 enumerator
= routes
->create_enumerator(routes
);
1989 while (enumerator
->enumerate(enumerator
, &other
))
1991 if (route
->dst_len
> other
->dst_len
)
1995 if (route
->dst_len
== other
->dst_len
&&
1996 route
->priority
< other
->priority
)
2001 routes
->insert_before(routes
, enumerator
, route
);
2002 enumerator
->destroy(enumerator
);
2013 rt_entry_destroy(route
);
2016 /* now we have a list of routes matching dest, sorted by net prefix.
2017 * we will look for source addresses for these routes and select the one
2018 * with the preferred source address, if possible */
2019 enumerator
= routes
->create_enumerator(routes
);
2020 while (enumerator
->enumerate(enumerator
, &route
))
2022 if (route
->src_host
)
2023 { /* got a source address with the route, if no preferred source
2024 * is given or it matches we are done, as this is the best route */
2025 if (!candidate
|| candidate
->ip_equals(candidate
, route
->src_host
))
2030 else if (route
->oif
)
2031 { /* no match yet, maybe it is assigned to the same interface */
2032 host_t
*src
= get_interface_address(this, route
->oif
,
2033 msg
->rtm_family
, dest
, candidate
);
2034 if (src
&& src
->ip_equals(src
, candidate
))
2036 route
->src_host
->destroy(route
->src_host
);
2037 route
->src_host
= src
;
2043 /* no luck yet with the source address. if this is the best (first)
2044 * route we store it as fallback in case we don't find a route with
2045 * the preferred source */
2046 best
= best
?: route
;
2050 { /* no src, but a source selector, try to find a matching address */
2051 route
->src_host
= get_subnet_address(this, msg
->rtm_family
,
2052 route
->src
, route
->src_len
, dest
,
2054 if (route
->src_host
)
2055 { /* we handle this address the same as the one above */
2057 candidate
->ip_equals(candidate
, route
->src_host
))
2062 best
= best
?: route
;
2067 { /* no src, but an interface - get address from it */
2068 route
->src_host
= get_interface_address(this, route
->oif
,
2069 msg
->rtm_family
, dest
, candidate
);
2070 if (route
->src_host
)
2071 { /* more of the same */
2073 candidate
->ip_equals(candidate
, route
->src_host
))
2078 best
= best
?: route
;
2083 { /* no src, no iface, but a gateway - lookup src to reach gtw */
2086 gtw
= host_create_from_chunk(msg
->rtm_family
, route
->gtw
, 0);
2087 if (gtw
&& !gtw
->ip_equals(gtw
, dest
))
2089 route
->src_host
= get_route(this, gtw
, -1, FALSE
, candidate
,
2090 iface
, recursion
+ 1);
2093 if (route
->src_host
)
2094 { /* more of the same */
2096 candidate
->ip_equals(candidate
, route
->src_host
))
2101 best
= best
?: route
;
2105 enumerator
->destroy(enumerator
);
2108 { /* nexthop lookup, return gateway and oif if any */
2113 if (best
|| routes
->get_first(routes
, (void**)&best
) == SUCCESS
)
2115 addr
= host_create_from_chunk(msg
->rtm_family
, best
->gtw
, 0);
2116 if (iface
&& best
->oif
)
2118 *iface
= get_interface_name_by_index(this, best
->oif
);
2121 if (!addr
&& !match_net
)
2122 { /* fallback to destination address */
2123 addr
= dest
->clone(dest
);
2130 addr
= best
->src_host
->clone(best
->src_host
);
2133 this->lock
->unlock(this->lock
);
2134 routes
->destroy_function(routes
, (void*)rt_entry_destroy
);
2139 if (nexthop
&& iface
&& *iface
)
2141 DBG2(DBG_KNL
, "using %H as nexthop and %s as dev to reach %H/%d",
2142 addr
, *iface
, dest
, prefix
);
2146 DBG2(DBG_KNL
, "using %H as %s to reach %H/%d", addr
,
2147 nexthop
? "nexthop" : "address", dest
, prefix
);
2150 else if (!recursion
)
2152 DBG2(DBG_KNL
, "no %s found to reach %H/%d",
2153 nexthop
? "nexthop" : "address", dest
, prefix
);
2158 METHOD(kernel_net_t
, get_source_addr
, host_t
*,
2159 private_kernel_netlink_net_t
*this, host_t
*dest
, host_t
*src
)
2161 return get_route(this, dest
, -1, FALSE
, src
, NULL
, 0);
2164 METHOD(kernel_net_t
, get_nexthop
, host_t
*,
2165 private_kernel_netlink_net_t
*this, host_t
*dest
, int prefix
, host_t
*src
,
2168 return get_route(this, dest
, prefix
, TRUE
, src
, iface
, 0);
2171 /** enumerator over subnets */
2173 enumerator_t
public;
2174 private_kernel_netlink_net_t
*private;
2175 /** message from the kernel */
2176 struct nlmsghdr
*msg
;
2177 /** current message from the kernel */
2178 struct nlmsghdr
*current
;
2179 /** remaining length */
2181 /** last subnet enumerated */
2183 /** interface of current net */
2184 char ifname
[IFNAMSIZ
];
2185 } subnet_enumerator_t
;
2187 METHOD(enumerator_t
, destroy_subnet_enumerator
, void,
2188 subnet_enumerator_t
*this)
2190 DESTROY_IF(this->net
);
2195 METHOD(enumerator_t
, enumerate_subnets
, bool,
2196 subnet_enumerator_t
*this, va_list args
)
2202 VA_ARGS_VGET(args
, net
, mask
, ifname
);
2206 this->current
= this->msg
;
2210 this->current
= NLMSG_NEXT(this->current
, this->len
);
2211 DESTROY_IF(this->net
);
2215 while (NLMSG_OK(this->current
, this->len
))
2217 switch (this->current
->nlmsg_type
)
2225 if (!route_usable(this->current
, FALSE
))
2229 parse_route(this->current
, &route
);
2231 if (route
.table
&& (
2232 route
.table
== RT_TABLE_LOCAL
||
2233 route
.table
== this->private->routing_table
))
2234 { /* ignore our own and the local routing tables */
2237 else if (route
.gtw
.ptr
)
2238 { /* ignore routes via gateway/next hop */
2242 if (route
.dst
.ptr
&& route
.oif
&&
2243 if_indextoname(route
.oif
, this->ifname
))
2245 this->net
= host_create_from_chunk(AF_UNSPEC
, route
.dst
, 0);
2247 *mask
= route
.dst_len
;
2248 *ifname
= this->ifname
;
2256 this->current
= NLMSG_NEXT(this->current
, this->len
);
2261 METHOD(kernel_net_t
, create_local_subnet_enumerator
, enumerator_t
*,
2262 private_kernel_netlink_net_t
*this)
2264 netlink_buf_t request
;
2265 struct nlmsghdr
*hdr
, *out
;
2268 subnet_enumerator_t
*enumerator
;
2270 memset(&request
, 0, sizeof(request
));
2273 hdr
->nlmsg_flags
= NLM_F_REQUEST
;
2274 hdr
->nlmsg_type
= RTM_GETROUTE
;
2275 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
2276 hdr
->nlmsg_flags
|= NLM_F_DUMP
;
2278 msg
= NLMSG_DATA(hdr
);
2279 msg
->rtm_scope
= RT_SCOPE_LINK
;
2281 if (this->socket
->send(this->socket
, hdr
, &out
, &len
) != SUCCESS
)
2283 DBG2(DBG_KNL
, "enumerating local subnets failed");
2284 return enumerator_create_empty();
2289 .enumerate
= enumerator_enumerate_default
,
2290 .venumerate
= _enumerate_subnets
,
2291 .destroy
= _destroy_subnet_enumerator
,
2297 return &enumerator
->public;
2301 * Manages the creation and deletion of IPv6 address labels for virtual IPs.
2302 * By setting the appropriate nlmsg_type the label is either added or removed.
2304 static status_t
manage_addrlabel(private_kernel_netlink_net_t
*this,
2305 int nlmsg_type
, host_t
*ip
)
2307 netlink_buf_t request
;
2308 struct nlmsghdr
*hdr
;
2309 struct ifaddrlblmsg
*msg
;
2313 memset(&request
, 0, sizeof(request
));
2315 chunk
= ip
->get_address(ip
);
2318 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
2319 if (nlmsg_type
== RTM_NEWADDRLABEL
)
2321 hdr
->nlmsg_flags
|= NLM_F_CREATE
| NLM_F_EXCL
;
2323 hdr
->nlmsg_type
= nlmsg_type
;
2324 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct ifaddrlblmsg
));
2326 msg
= NLMSG_DATA(hdr
);
2327 msg
->ifal_family
= ip
->get_family(ip
);
2328 msg
->ifal_prefixlen
= chunk
.len
* 8;
2330 netlink_add_attribute(hdr
, IFAL_ADDRESS
, chunk
, sizeof(request
));
2331 /* doesn't really matter as default labels are < 20 but this makes it kinda
2334 netlink_add_attribute(hdr
, IFAL_LABEL
, chunk_from_thing(label
),
2337 return this->socket
->send_ack(this->socket
, hdr
);
2341 * Manages the creation and deletion of ip addresses on an interface.
2342 * By setting the appropriate nlmsg_type, the ip will be set or unset.
2344 static status_t
manage_ipaddr(private_kernel_netlink_net_t
*this, int nlmsg_type
,
2345 int flags
, int if_index
, host_t
*ip
, int prefix
)
2347 netlink_buf_t request
;
2348 struct nlmsghdr
*hdr
;
2349 struct ifaddrmsg
*msg
;
2352 memset(&request
, 0, sizeof(request
));
2354 chunk
= ip
->get_address(ip
);
2357 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
| flags
;
2358 hdr
->nlmsg_type
= nlmsg_type
;
2359 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct ifaddrmsg
));
2361 msg
= NLMSG_DATA(hdr
);
2362 msg
->ifa_family
= ip
->get_family(ip
);
2364 msg
->ifa_prefixlen
= prefix
< 0 ? chunk
.len
* 8 : prefix
;
2365 msg
->ifa_scope
= RT_SCOPE_UNIVERSE
;
2366 msg
->ifa_index
= if_index
;
2368 netlink_add_attribute(hdr
, IFA_LOCAL
, chunk
, sizeof(request
));
2370 if (ip
->get_family(ip
) == AF_INET6
)
2373 msg
->ifa_flags
|= IFA_F_NODAD
;
2375 if (this->rta_prefsrc_for_ipv6
)
2377 /* if source routes are possible we set a label for this virtual IP
2378 * so it gets only used if forced by our route, and not by the
2379 * default IPv6 address selection */
2380 int labelop
= nlmsg_type
== RTM_NEWADDR
? RTM_NEWADDRLABEL
2382 if (manage_addrlabel(this, labelop
, ip
) != SUCCESS
)
2384 /* if we can't use address labels we let the virtual IP get
2385 * deprecated immediately (but mark it as valid forever), which
2386 * should also avoid that it gets used by the default address
2388 struct ifa_cacheinfo cache
= {
2389 .ifa_valid
= 0xFFFFFFFF,
2392 netlink_add_attribute(hdr
, IFA_CACHEINFO
,
2393 chunk_from_thing(cache
), sizeof(request
));
2397 return this->socket
->send_ack(this->socket
, hdr
);
2400 METHOD(kernel_net_t
, add_ip
, status_t
,
2401 private_kernel_netlink_net_t
*this, host_t
*virtual_ip
, int prefix
,
2404 addr_map_entry_t
*entry
, lookup
= {
2407 iface_entry_t
*iface
= NULL
;
2409 if (!this->install_virtual_ip
)
2410 { /* disabled by config */
2414 this->lock
->write_lock(this->lock
);
2415 /* the virtual IP might actually be installed as regular IP, in which case
2416 * we don't track it as virtual IP */
2417 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
2418 (void*)addr_map_entry_match
);
2420 { /* otherwise it might already be installed as virtual IP */
2421 entry
= this->vips
->get_match(this->vips
, &lookup
,
2422 (void*)addr_map_entry_match
);
2424 { /* the vip we found can be in one of three states: 1) installed and
2425 * ready, 2) just added by another thread, but not yet confirmed to
2426 * be installed by the kernel, 3) just deleted, but not yet gone.
2427 * Then while we wait below, several things could happen (as we
2428 * release the lock). For instance, the interface could disappear,
2429 * or the IP is finally deleted, and it reappears on a different
2430 * interface. All these cases are handled by the call below. */
2431 while (!is_vip_installed_or_gone(this, virtual_ip
, &entry
))
2433 this->condvar
->wait(this->condvar
, this->lock
);
2437 entry
->addr
->refcount
++;
2443 DBG2(DBG_KNL
, "virtual IP %H is already installed on %s", virtual_ip
,
2444 entry
->iface
->ifname
);
2445 this->lock
->unlock(this->lock
);
2448 /* try to find the target interface, either by config or via src ip */
2449 if (!this->install_virtual_ip_on
||
2450 !this->ifaces
->find_first(this->ifaces
, iface_entry_by_name
,
2451 (void**)&iface
, this->install_virtual_ip_on
))
2453 if (!this->ifaces
->find_first(this->ifaces
, iface_entry_by_name
,
2454 (void**)&iface
, iface_name
))
2455 { /* if we don't find the requested interface we just use the first */
2456 this->ifaces
->get_first(this->ifaces
, (void**)&iface
);
2466 .ip
= virtual_ip
->clone(virtual_ip
),
2468 .scope
= RT_SCOPE_UNIVERSE
,
2470 iface
->addrs
->insert_last(iface
->addrs
, addr
);
2471 addr_map_entry_add(this->vips
, addr
, iface
);
2472 ifi
= iface
->ifindex
;
2473 this->lock
->unlock(this->lock
);
2474 if (manage_ipaddr(this, RTM_NEWADDR
, NLM_F_CREATE
| NLM_F_EXCL
,
2475 ifi
, virtual_ip
, prefix
) == SUCCESS
)
2477 this->lock
->write_lock(this->lock
);
2478 while (!is_vip_installed_or_gone(this, virtual_ip
, &entry
))
2479 { /* wait until address appears */
2480 this->condvar
->wait(this->condvar
, this->lock
);
2483 { /* we fail if the interface got deleted in the meantime */
2484 ifname
= strdup(entry
->iface
->ifname
);
2485 this->lock
->unlock(this->lock
);
2486 DBG2(DBG_KNL
, "virtual IP %H installed on %s",
2487 virtual_ip
, ifname
);
2488 /* during IKEv1 reauthentication, children get moved from
2489 * old the new SA before the virtual IP is available. This
2490 * kills the route for our virtual IP, reinstall. */
2491 queue_route_reinstall(this, ifname
);
2494 this->lock
->unlock(this->lock
);
2496 DBG1(DBG_KNL
, "adding virtual IP %H failed", virtual_ip
);
2499 this->lock
->unlock(this->lock
);
2500 DBG1(DBG_KNL
, "no interface available, unable to install virtual IP %H",
2505 METHOD(kernel_net_t
, del_ip
, status_t
,
2506 private_kernel_netlink_net_t
*this, host_t
*virtual_ip
, int prefix
,
2509 addr_map_entry_t
*entry
, lookup
= {
2513 if (!this->install_virtual_ip
)
2514 { /* disabled by config */
2518 DBG2(DBG_KNL
, "deleting virtual IP %H", virtual_ip
);
2520 this->lock
->write_lock(this->lock
);
2521 entry
= this->vips
->get_match(this->vips
, &lookup
,
2522 (void*)addr_map_entry_match
);
2524 { /* we didn't install this IP as virtual IP */
2525 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
2526 (void*)addr_map_entry_match
);
2529 DBG2(DBG_KNL
, "not deleting existing IP %H on %s", virtual_ip
,
2530 entry
->iface
->ifname
);
2531 this->lock
->unlock(this->lock
);
2534 DBG2(DBG_KNL
, "virtual IP %H not cached, unable to delete", virtual_ip
);
2535 this->lock
->unlock(this->lock
);
2538 if (entry
->addr
->refcount
== 1)
2543 /* we set this flag so that threads calling add_ip will block and wait
2544 * until the entry is gone, also so we can wait below */
2545 entry
->addr
->installed
= FALSE
;
2546 ifi
= entry
->iface
->ifindex
;
2547 this->lock
->unlock(this->lock
);
2548 status
= manage_ipaddr(this, RTM_DELADDR
, 0, ifi
, virtual_ip
, prefix
);
2549 if (status
== SUCCESS
&& wait
)
2550 { /* wait until the address is really gone */
2551 this->lock
->write_lock(this->lock
);
2552 while (is_known_vip(this, virtual_ip
) &&
2553 lib
->watcher
->get_state(lib
->watcher
) != WATCHER_STOPPED
)
2554 { /* don't wait during deinit when we can't get notified,
2555 * re-evaluate watcher state if we have to wait longer */
2556 this->condvar
->timed_wait(this->condvar
, this->lock
, 1000);
2558 this->lock
->unlock(this->lock
);
2564 entry
->addr
->refcount
--;
2566 DBG2(DBG_KNL
, "virtual IP %H used by other SAs, not deleting",
2568 this->lock
->unlock(this->lock
);
2573 * Manages source routes in the routing table.
2574 * By setting the appropriate nlmsg_type, the route gets added or removed.
2576 static status_t
manage_srcroute(private_kernel_netlink_net_t
*this,
2577 int nlmsg_type
, int flags
, chunk_t dst_net
,
2578 uint8_t prefixlen
, host_t
*gateway
,
2579 host_t
*src_ip
, char *if_name
, bool pass
)
2581 netlink_buf_t request
;
2582 struct nlmsghdr
*hdr
;
2588 /* if route is 0.0.0.0/0, we can't install it, as it would
2589 * overwrite the default route. Instead, we add two routes:
2590 * 0.0.0.0/1 and 128.0.0.0/1 */
2591 if (this->routing_table
== 0 && prefixlen
== 0)
2594 uint8_t half_prefixlen
;
2597 half_net
= chunk_alloca(dst_net
.len
);
2598 memset(half_net
.ptr
, 0, half_net
.len
);
2600 /* no throw routes in the main table */
2601 status
= manage_srcroute(this, nlmsg_type
, flags
, half_net
,
2602 half_prefixlen
, gateway
, src_ip
, if_name
, FALSE
);
2603 half_net
.ptr
[0] |= 0x80;
2604 status
|= manage_srcroute(this, nlmsg_type
, flags
, half_net
,
2605 half_prefixlen
, gateway
, src_ip
, if_name
, FALSE
);
2609 memset(&request
, 0, sizeof(request
));
2612 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
| flags
;
2613 hdr
->nlmsg_type
= nlmsg_type
;
2614 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
2616 msg
= NLMSG_DATA(hdr
);
2617 msg
->rtm_family
= (dst_net
.len
== 4) ? AF_INET
: AF_INET6
;
2618 msg
->rtm_dst_len
= prefixlen
;
2619 msg
->rtm_protocol
= RTPROT_STATIC
;
2620 msg
->rtm_type
= pass
? RTN_THROW
: RTN_UNICAST
;
2621 msg
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2623 if (this->routing_table
< 256)
2625 msg
->rtm_table
= this->routing_table
;
2629 #ifdef HAVE_RTA_TABLE
2630 chunk
= chunk_from_thing(this->routing_table
);
2631 netlink_add_attribute(hdr
, RTA_TABLE
, chunk
, sizeof(request
));
2633 DBG1(DBG_KNL
, "routing table IDs > 255 are not supported");
2635 #endif /* HAVE_RTA_TABLE */
2637 netlink_add_attribute(hdr
, RTA_DST
, dst_net
, sizeof(request
));
2639 /* only when installing regular routes do we need all the parameters,
2640 * deletes are done by destination net (except if metrics are used, which
2641 * we don't support), for throw routes we don't need any of them either */
2642 if (nlmsg_type
== RTM_NEWROUTE
&& !pass
)
2644 chunk
= src_ip
->get_address(src_ip
);
2645 netlink_add_attribute(hdr
, RTA_PREFSRC
, chunk
, sizeof(request
));
2646 if (gateway
&& gateway
->get_family(gateway
) == src_ip
->get_family(src_ip
))
2648 chunk
= gateway
->get_address(gateway
);
2649 netlink_add_attribute(hdr
, RTA_GATEWAY
, chunk
, sizeof(request
));
2651 ifindex
= get_interface_index(this, if_name
);
2652 chunk
.ptr
= (char*)&ifindex
;
2653 chunk
.len
= sizeof(ifindex
);
2654 netlink_add_attribute(hdr
, RTA_OIF
, chunk
, sizeof(request
));
2656 if (this->mtu
|| this->mss
)
2658 chunk
= chunk_alloca(RTA_LENGTH((sizeof(struct rtattr
) +
2659 sizeof(uint32_t)) * 2));
2661 rta
= (struct rtattr
*)chunk
.ptr
;
2664 rta
->rta_type
= RTAX_MTU
;
2665 rta
->rta_len
= RTA_LENGTH(sizeof(uint32_t));
2666 memcpy(RTA_DATA(rta
), &this->mtu
, sizeof(uint32_t));
2667 chunk
.len
= rta
->rta_len
;
2671 rta
= (struct rtattr
*)(chunk
.ptr
+ RTA_ALIGN(chunk
.len
));
2672 rta
->rta_type
= RTAX_ADVMSS
;
2673 rta
->rta_len
= RTA_LENGTH(sizeof(uint32_t));
2674 memcpy(RTA_DATA(rta
), &this->mss
, sizeof(uint32_t));
2675 chunk
.len
= RTA_ALIGN(chunk
.len
) + rta
->rta_len
;
2677 netlink_add_attribute(hdr
, RTA_METRICS
, chunk
, sizeof(request
));
2680 return this->socket
->send_ack(this->socket
, hdr
);
2684 * Helper struct used to check routes
2687 /** the entry we look for */
2688 route_entry_t route
;
2689 /** kernel interface */
2690 private_kernel_netlink_net_t
*this;
2691 } route_entry_lookup_t
;
2694 * Check if a matching route entry has a VIP associated
2696 static bool route_with_vip(route_entry_lookup_t
*a
, route_entry_t
*b
)
2698 if (chunk_equals(a
->route
.dst_net
, b
->dst_net
) &&
2699 a
->route
.prefixlen
== b
->prefixlen
&&
2700 is_known_vip(a
->this, b
->src_ip
))
2708 * Check if there is any route entry with a matching destination
2710 static bool route_with_dst(route_entry_lookup_t
*a
, route_entry_t
*b
)
2712 if (chunk_equals(a
->route
.dst_net
, b
->dst_net
) &&
2713 a
->route
.prefixlen
== b
->prefixlen
)
2720 METHOD(kernel_net_t
, add_route
, status_t
,
2721 private_kernel_netlink_net_t
*this, chunk_t dst_net
, uint8_t prefixlen
,
2722 host_t
*gateway
, host_t
*src_ip
, char *if_name
, bool pass
)
2725 route_entry_t
*found
;
2726 route_entry_lookup_t lookup
= {
2729 .prefixlen
= prefixlen
,
2738 if (!this->routing_table
)
2739 { /* treat these as regular routes if installing in the main table */
2740 pass
= lookup
.route
.pass
= FALSE
;
2743 this->routes_lock
->lock(this->routes_lock
);
2744 found
= this->routes
->ht
.get(&this->routes
->ht
, &lookup
.route
);
2747 this->routes_lock
->unlock(this->routes_lock
);
2748 return ALREADY_DONE
;
2751 /* don't replace the route if we already have one with a VIP installed,
2752 * but keep track of it in case that other route is uninstalled */
2753 this->lock
->read_lock(this->lock
);
2754 if (!is_known_vip(this, src_ip
))
2756 found
= this->routes
->get_match(this->routes
, &lookup
,
2757 (void*)route_with_vip
);
2759 this->lock
->unlock(this->lock
);
2766 status
= manage_srcroute(this, RTM_NEWROUTE
, NLM_F_CREATE
|NLM_F_REPLACE
,
2767 dst_net
, prefixlen
, gateway
, src_ip
, if_name
,
2770 if (status
== SUCCESS
)
2772 found
= route_entry_clone(&lookup
.route
);
2773 this->routes
->ht
.put(&this->routes
->ht
, found
, found
);
2775 this->routes_lock
->unlock(this->routes_lock
);
2779 METHOD(kernel_net_t
, del_route
, status_t
,
2780 private_kernel_netlink_net_t
*this, chunk_t dst_net
, uint8_t prefixlen
,
2781 host_t
*gateway
, host_t
*src_ip
, char *if_name
, bool pass
)
2784 route_entry_t
*found
;
2785 route_entry_lookup_t lookup
= {
2788 .prefixlen
= prefixlen
,
2797 if (!this->routing_table
)
2798 { /* treat these as regular routes if installing in the main table */
2799 pass
= lookup
.route
.pass
= FALSE
;
2802 this->routes_lock
->lock(this->routes_lock
);
2803 found
= this->routes
->ht
.remove(&this->routes
->ht
, &lookup
.route
);
2806 this->routes_lock
->unlock(this->routes_lock
);
2809 route_entry_destroy(found
);
2811 /* check if there are any other routes for the same destination and if
2812 * so update the route, otherwise uninstall it */
2813 this->lock
->read_lock(this->lock
);
2814 found
= this->routes
->get_match(this->routes
, &lookup
,
2815 (void*)route_with_vip
);
2816 this->lock
->unlock(this->lock
);
2819 found
= this->routes
->get_match(this->routes
, &lookup
,
2820 (void*)route_with_dst
);
2824 status
= manage_srcroute(this, RTM_NEWROUTE
, NLM_F_CREATE
|NLM_F_REPLACE
,
2825 found
->dst_net
, found
->prefixlen
, found
->gateway
,
2826 found
->src_ip
, found
->if_name
, found
->pass
);
2830 status
= manage_srcroute(this, RTM_DELROUTE
, 0, dst_net
, prefixlen
,
2831 gateway
, src_ip
, if_name
, pass
);
2833 this->routes_lock
->unlock(this->routes_lock
);
2838 * Initialize a list of local addresses.
2840 static status_t
init_address_list(private_kernel_netlink_net_t
*this)
2842 netlink_buf_t request
;
2843 struct nlmsghdr
*out
, *current
, *in
;
2844 struct rtgenmsg
*msg
;
2846 enumerator_t
*ifaces
, *addrs
;
2847 iface_entry_t
*iface
;
2850 DBG2(DBG_KNL
, "known interfaces and IP addresses:");
2852 memset(&request
, 0, sizeof(request
));
2855 in
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtgenmsg
));
2856 in
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_MATCH
| NLM_F_ROOT
;
2857 msg
= NLMSG_DATA(in
);
2858 msg
->rtgen_family
= AF_UNSPEC
;
2861 in
->nlmsg_type
= RTM_GETLINK
;
2862 if (this->socket
->send(this->socket
, in
, &out
, &len
) != SUCCESS
)
2867 while (NLMSG_OK(current
, len
))
2869 switch (current
->nlmsg_type
)
2874 process_link(this, current
, FALSE
);
2877 current
= NLMSG_NEXT(current
, len
);
2884 /* get all interface addresses */
2885 in
->nlmsg_type
= RTM_GETADDR
;
2886 if (this->socket
->send(this->socket
, in
, &out
, &len
) != SUCCESS
)
2891 while (NLMSG_OK(current
, len
))
2893 switch (current
->nlmsg_type
)
2898 process_addr(this, current
, FALSE
);
2901 current
= NLMSG_NEXT(current
, len
);
2908 this->lock
->read_lock(this->lock
);
2909 ifaces
= this->ifaces
->create_enumerator(this->ifaces
);
2910 while (ifaces
->enumerate(ifaces
, &iface
))
2912 if (iface_entry_up_and_usable(iface
))
2914 DBG2(DBG_KNL
, " %s", iface
->ifname
);
2915 addrs
= iface
->addrs
->create_enumerator(iface
->addrs
);
2916 while (addrs
->enumerate(addrs
, (void**)&addr
))
2918 DBG2(DBG_KNL
, " %H", addr
->ip
);
2920 addrs
->destroy(addrs
);
2923 ifaces
->destroy(ifaces
);
2924 this->lock
->unlock(this->lock
);
2929 * create or delete a rule to use our routing table
2931 static status_t
manage_rule(private_kernel_netlink_net_t
*this, int nlmsg_type
,
2932 int family
, uint32_t table
, uint32_t prio
)
2934 netlink_buf_t request
;
2935 struct nlmsghdr
*hdr
;
2940 memset(&request
, 0, sizeof(request
));
2942 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
2943 hdr
->nlmsg_type
= nlmsg_type
;
2944 if (nlmsg_type
== RTM_NEWRULE
)
2946 hdr
->nlmsg_flags
|= NLM_F_CREATE
| NLM_F_EXCL
;
2948 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
2950 msg
= NLMSG_DATA(hdr
);
2951 msg
->rtm_family
= family
;
2952 msg
->rtm_protocol
= RTPROT_BOOT
;
2953 msg
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2954 msg
->rtm_type
= RTN_UNICAST
;
2956 if (this->routing_table
< 256)
2958 msg
->rtm_table
= table
;
2962 #ifdef HAVE_LINUX_FIB_RULES_H
2963 chunk
= chunk_from_thing(table
);
2964 netlink_add_attribute(hdr
, FRA_TABLE
, chunk
, sizeof(request
));
2966 DBG1(DBG_KNL
, "routing table IDs > 255 are not supported");
2968 #endif /* HAVE_LINUX_FIB_RULES_H */
2970 chunk
= chunk_from_thing(prio
);
2971 netlink_add_attribute(hdr
, RTA_PRIORITY
, chunk
, sizeof(request
));
2973 fwmark
= lib
->settings
->get_str(lib
->settings
,
2974 "%s.plugins.kernel-netlink.fwmark", NULL
, lib
->ns
);
2977 #ifdef HAVE_LINUX_FIB_RULES_H
2980 if (fwmark
[0] == '!')
2982 msg
->rtm_flags
|= FIB_RULE_INVERT
;
2985 if (mark_from_string(fwmark
, MARK_OP_NONE
, &mark
))
2987 chunk
= chunk_from_thing(mark
.value
);
2988 netlink_add_attribute(hdr
, FRA_FWMARK
, chunk
, sizeof(request
));
2989 chunk
= chunk_from_thing(mark
.mask
);
2990 netlink_add_attribute(hdr
, FRA_FWMASK
, chunk
, sizeof(request
));
2991 if (msg
->rtm_flags
& FIB_RULE_INVERT
)
2993 this->routing_mark
= mark
;
2997 DBG1(DBG_KNL
, "setting firewall mark on routing rule is not supported");
2998 #endif /* HAVE_LINUX_FIB_RULES_H */
3000 return this->socket
->send_ack(this->socket
, hdr
);
3004 * check for kernel features (currently only via version number)
3006 static void check_kernel_features(private_kernel_netlink_net_t
*this)
3008 struct utsname utsname
;
3011 if (uname(&utsname
) == 0)
3013 switch(sscanf(utsname
.release
, "%d.%d.%d", &a
, &b
, &c
))
3018 if (b
== 6 && c
>= 36)
3020 this->rta_mark
= TRUE
;
3022 DBG2(DBG_KNL
, "detected Linux %d.%d.%d, no support for "
3023 "RTA_PREFSRC for IPv6 routes", a
, b
, c
);
3028 /* only 3.x+ uses two part version numbers */
3029 this->rta_prefsrc_for_ipv6
= TRUE
;
3030 this->rta_mark
= TRUE
;
3039 * Destroy an address to iface map
3041 static void addr_map_destroy(hashlist_t
*map
)
3043 map
->ht
.destroy_function(&map
->ht
, (void*)free
);
3046 METHOD(kernel_net_t
, destroy
, void,
3047 private_kernel_netlink_net_t
*this)
3049 enumerator_t
*enumerator
;
3050 route_entry_t
*route
;
3052 if (this->routing_table
&& this->socket
)
3054 manage_rule(this, RTM_DELRULE
, AF_INET
, this->routing_table
,
3055 this->routing_table_prio
);
3056 manage_rule(this, RTM_DELRULE
, AF_INET6
, this->routing_table
,
3057 this->routing_table_prio
);
3059 if (this->socket_events
> 0)
3061 lib
->watcher
->remove(lib
->watcher
, this->socket_events
);
3062 close(this->socket_events
);
3064 enumerator
= this->routes
->ht
.create_enumerator(&this->routes
->ht
);
3065 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&route
))
3067 manage_srcroute(this, RTM_DELROUTE
, 0, route
->dst_net
, route
->prefixlen
,
3068 route
->gateway
, route
->src_ip
, route
->if_name
,
3070 route_entry_destroy(route
);
3072 enumerator
->destroy(enumerator
);
3073 this->routes
->destroy(this->routes
);
3074 this->routes_lock
->destroy(this->routes_lock
);
3075 DESTROY_IF(this->socket
);
3077 net_changes_clear(this);
3078 this->net_changes
->destroy(this->net_changes
);
3079 this->net_changes_lock
->destroy(this->net_changes_lock
);
3081 addr_map_destroy(this->addrs
);
3082 addr_map_destroy(this->vips
);
3084 this->ifaces
->destroy_function(this->ifaces
, (void*)iface_entry_destroy
);
3085 this->rt_exclude
->destroy(this->rt_exclude
);
3086 this->roam_lock
->destroy(this->roam_lock
);
3087 this->condvar
->destroy(this->condvar
);
3088 this->lock
->destroy(this->lock
);
3093 * Described in header.
3095 kernel_netlink_net_t
*kernel_netlink_net_create()
3097 private_kernel_netlink_net_t
*this;
3098 enumerator_t
*enumerator
;
3099 bool register_for_events
= TRUE
;
3105 .get_interface
= _get_interface_name
,
3106 .create_address_enumerator
= _create_address_enumerator
,
3107 .create_local_subnet_enumerator
= _create_local_subnet_enumerator
,
3108 .get_source_addr
= _get_source_addr
,
3109 .get_nexthop
= _get_nexthop
,
3112 .add_route
= _add_route
,
3113 .del_route
= _del_route
,
3114 .destroy
= _destroy
,
3117 .socket
= netlink_socket_create(NETLINK_ROUTE
, rt_msg_names
,
3118 lib
->settings
->get_bool(lib
->settings
,
3119 "%s.plugins.kernel-netlink.parallel_route", FALSE
, lib
->ns
)),
3120 .rt_exclude
= linked_list_create(),
3121 .routes
= hashlist_create((hashtable_hash_t
)route_entry_hash
,
3122 (hashtable_equals_t
)route_entry_equals
, 16),
3123 .net_changes
= hashtable_create(
3124 (hashtable_hash_t
)net_change_hash
,
3125 (hashtable_equals_t
)net_change_equals
, 16),
3126 .addrs
= hashlist_create(
3127 (hashtable_hash_t
)addr_map_entry_hash
,
3128 (hashtable_equals_t
)addr_map_entry_equals
, 16),
3129 .vips
= hashlist_create((hashtable_hash_t
)addr_map_entry_hash
,
3130 (hashtable_equals_t
)addr_map_entry_equals
, 16),
3131 .routes_lock
= mutex_create(MUTEX_TYPE_DEFAULT
),
3132 .net_changes_lock
= mutex_create(MUTEX_TYPE_DEFAULT
),
3133 .ifaces
= linked_list_create(),
3134 .lock
= rwlock_create(RWLOCK_TYPE_DEFAULT
),
3135 .condvar
= rwlock_condvar_create(),
3136 .roam_lock
= spinlock_create(),
3137 .routing_table
= lib
->settings
->get_int(lib
->settings
,
3138 "%s.routing_table", ROUTING_TABLE
, lib
->ns
),
3139 .routing_table_prio
= lib
->settings
->get_int(lib
->settings
,
3140 "%s.routing_table_prio", ROUTING_TABLE_PRIO
, lib
->ns
),
3141 .process_route
= lib
->settings
->get_bool(lib
->settings
,
3142 "%s.process_route", TRUE
, lib
->ns
),
3143 .install_routes
= lib
->settings
->get_bool(lib
->settings
,
3144 "%s.install_routes", TRUE
, lib
->ns
),
3145 .install_virtual_ip
= lib
->settings
->get_bool(lib
->settings
,
3146 "%s.install_virtual_ip", TRUE
, lib
->ns
),
3147 .install_virtual_ip_on
= lib
->settings
->get_str(lib
->settings
,
3148 "%s.install_virtual_ip_on", NULL
, lib
->ns
),
3149 .prefer_temporary_addrs
= lib
->settings
->get_bool(lib
->settings
,
3150 "%s.prefer_temporary_addrs", FALSE
, lib
->ns
),
3151 .roam_events
= lib
->settings
->get_bool(lib
->settings
,
3152 "%s.plugins.kernel-netlink.roam_events", TRUE
, lib
->ns
),
3153 .process_rules
= lib
->settings
->get_bool(lib
->settings
,
3154 "%s.plugins.kernel-netlink.process_rules", FALSE
, lib
->ns
),
3155 .mtu
= lib
->settings
->get_int(lib
->settings
,
3156 "%s.plugins.kernel-netlink.mtu", 0, lib
->ns
),
3157 .mss
= lib
->settings
->get_int(lib
->settings
,
3158 "%s.plugins.kernel-netlink.mss", 0, lib
->ns
),
3160 timerclear(&this->last_route_reinstall
);
3161 timerclear(&this->next_roam
);
3163 check_kernel_features(this);
3171 if (streq(lib
->ns
, "starter"))
3172 { /* starter has no threads, so we do not register for kernel events */
3173 register_for_events
= FALSE
;
3176 exclude
= lib
->settings
->get_str(lib
->settings
,
3177 "%s.ignore_routing_tables", NULL
, lib
->ns
);
3183 enumerator
= enumerator_create_token(exclude
, " ", " ");
3184 while (enumerator
->enumerate(enumerator
, &token
))
3187 table
= strtoul(token
, NULL
, 10);
3191 this->rt_exclude
->insert_last(this->rt_exclude
, (void*)table
);
3194 enumerator
->destroy(enumerator
);
3197 if (register_for_events
)
3199 struct sockaddr_nl addr
;
3201 memset(&addr
, 0, sizeof(addr
));
3202 addr
.nl_family
= AF_NETLINK
;
3204 /* create and bind RT socket for events (address/interface/route changes) */
3205 this->socket_events
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
3206 if (this->socket_events
< 0)
3208 DBG1(DBG_KNL
, "unable to create RT event socket: %s (%d)",
3209 strerror(errno
), errno
);
3213 addr
.nl_groups
= nl_group(RTNLGRP_IPV4_IFADDR
) |
3214 nl_group(RTNLGRP_IPV6_IFADDR
) |
3215 nl_group(RTNLGRP_LINK
);
3216 if (this->process_route
)
3218 addr
.nl_groups
|= nl_group(RTNLGRP_IPV4_ROUTE
) |
3219 nl_group(RTNLGRP_IPV6_ROUTE
);
3221 if (this->process_rules
)
3223 addr
.nl_groups
|= nl_group(RTNLGRP_IPV4_RULE
) |
3224 nl_group(RTNLGRP_IPV6_RULE
);
3226 if (bind(this->socket_events
, (struct sockaddr
*)&addr
, sizeof(addr
)))
3228 DBG1(DBG_KNL
, "unable to bind RT event socket: %s (%d)",
3229 strerror(errno
), errno
);
3234 lib
->watcher
->add(lib
->watcher
, this->socket_events
, WATCHER_READ
,
3235 (watcher_cb_t
)receive_events
, this);
3238 if (init_address_list(this) != SUCCESS
)
3240 DBG1(DBG_KNL
, "unable to get interface list");
3245 if (this->routing_table
)
3247 if (manage_rule(this, RTM_NEWRULE
, AF_INET
, this->routing_table
,
3248 this->routing_table_prio
) != SUCCESS
)
3250 DBG1(DBG_KNL
, "unable to create IPv4 routing table rule");
3252 if (manage_rule(this, RTM_NEWRULE
, AF_INET6
, this->routing_table
,
3253 this->routing_table_prio
) != SUCCESS
)
3255 DBG1(DBG_KNL
, "unable to create IPv6 routing table rule");
3259 return &this->public;