2 * Copyright (C) 2008-2019 Tobias Brunner
3 * Copyright (C) 2005-2008 Martin Willi
5 * Copyright (C) secunet Security Networks AG
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2 of the License, or (at your
10 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 * Copyright (C) 2010 Thomas Egerer
21 * Permission is hereby granted, free of charge, to any person obtaining a copy
22 * of this software and associated documentation files (the "Software"), to deal
23 * in the Software without restriction, including without limitation the rights
24 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25 * copies of the Software, and to permit persons to whom the Software is
26 * furnished to do so, subject to the following conditions:
28 * The above copyright notice and this permission notice shall be included in
29 * all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
40 #include <sys/socket.h>
41 #include <sys/utsname.h>
42 #include <linux/netlink.h>
43 #include <linux/rtnetlink.h>
44 #include <linux/if_addrlabel.h>
48 #ifdef HAVE_LINUX_FIB_RULES_H
49 #include <linux/fib_rules.h>
52 #include "kernel_netlink_net.h"
53 #include "kernel_netlink_shared.h"
56 #include <utils/debug.h>
57 #include <threading/mutex.h>
58 #include <threading/rwlock.h>
59 #include <threading/rwlock_condvar.h>
60 #include <threading/spinlock.h>
61 #include <collections/hashtable.h>
62 #include <collections/linked_list.h>
63 #include <processing/jobs/callback_job.h>
65 /** delay before firing roam events (ms) */
66 #define ROAM_DELAY 100
68 /** delay before reinstalling routes (ms) */
69 #define ROUTE_DELAY 100
71 /** maximum recursion when searching for addresses in get_route() */
72 #define MAX_ROUTE_RECURSION 2
75 #define ROUTING_TABLE 0
78 #ifndef ROUTING_TABLE_PRIO
79 #define ROUTING_TABLE_PRIO 0
82 ENUM(rt_msg_names
, RTM_NEWLINK
, RTM_GETRULE
,
104 typedef struct addr_entry_t addr_entry_t
;
107 * IP address in an iface_entry_t
109 struct addr_entry_t
{
111 /** the ip address */
117 /** scope of the address */
120 /** number of times this IP is used, if virtual (i.e. managed by us) */
123 /** TRUE once it is installed, if virtual */
128 * destroy a addr_entry_t object
130 static void addr_entry_destroy(addr_entry_t
*this)
132 this->ip
->destroy(this->ip
);
136 typedef struct iface_entry_t iface_entry_t
;
139 * A network interface on this system, containing addr_entry_t's
141 struct iface_entry_t
{
143 /** interface index */
146 /** name of the interface */
147 char ifname
[IFNAMSIZ
];
149 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
152 /** list of addresses as host_t */
153 linked_list_t
*addrs
;
155 /** TRUE if usable by config */
160 * destroy an interface entry
162 static void iface_entry_destroy(iface_entry_t
*this)
164 this->addrs
->destroy_function(this->addrs
, (void*)addr_entry_destroy
);
168 CALLBACK(iface_entry_by_index
, bool,
169 iface_entry_t
*this, va_list args
)
173 VA_ARGS_VGET(args
, ifindex
);
174 return this->ifindex
== ifindex
;
177 CALLBACK(iface_entry_by_name
, bool,
178 iface_entry_t
*this, va_list args
)
182 VA_ARGS_VGET(args
, ifname
);
183 return streq(this->ifname
, ifname
);
187 * check if an interface is up
189 static inline bool iface_entry_up(iface_entry_t
*iface
)
191 return (iface
->flags
& IFF_UP
) == IFF_UP
;
195 * check if an interface is up and usable
197 static inline bool iface_entry_up_and_usable(iface_entry_t
*iface
)
199 return iface
->usable
&& iface_entry_up(iface
);
202 typedef struct addr_map_entry_t addr_map_entry_t
;
205 * Entry that maps an IP address to an interface entry
207 struct addr_map_entry_t
{
208 /** The IP address */
211 /** The address entry for this IP address */
214 /** The interface this address is installed on */
215 iface_entry_t
*iface
;
219 * Hash a addr_map_entry_t object, all entries with the same IP address
220 * are stored in the same bucket
222 static u_int
addr_map_entry_hash(addr_map_entry_t
*this)
224 return chunk_hash(this->ip
->get_address(this->ip
));
228 * Compare two addr_map_entry_t objects, two entries are equal if they are
229 * installed on the same interface
231 static bool addr_map_entry_equals(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
233 return a
->iface
->ifindex
== b
->iface
->ifindex
&&
234 a
->ip
->ip_equals(a
->ip
, b
->ip
);
238 * Used with get_match this finds an address entry if it is installed on
239 * an up and usable interface
241 static bool addr_map_entry_match_up_and_usable(addr_map_entry_t
*a
,
244 return iface_entry_up_and_usable(b
->iface
) &&
245 a
->ip
->ip_equals(a
->ip
, b
->ip
);
249 * Used with get_match this finds an address entry if it is installed on
250 * any active local interface
252 static bool addr_map_entry_match_up(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
254 return iface_entry_up(b
->iface
) && a
->ip
->ip_equals(a
->ip
, b
->ip
);
258 * Used with get_match this finds an address entry if it is installed on
259 * any local interface
261 static bool addr_map_entry_match(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
263 return a
->ip
->ip_equals(a
->ip
, b
->ip
);
266 typedef struct net_change_t net_change_t
;
269 * Queued network changes
271 struct net_change_t
{
272 /** Name of the interface that got activated (or an IP appeared on) */
277 * Destroy a net_change_t object
279 static void net_change_destroy(net_change_t
*this)
286 * Hash a net_change_t object
288 static u_int
net_change_hash(net_change_t
*this)
290 return chunk_hash(chunk_create(this->if_name
, strlen(this->if_name
)));
294 * Compare two net_change_t objects
296 static bool net_change_equals(net_change_t
*a
, net_change_t
*b
)
298 return streq(a
->if_name
, b
->if_name
);
301 typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t
;
304 * Private variables and functions of kernel_netlink_net class.
306 struct private_kernel_netlink_net_t
{
308 * Public part of the kernel_netlink_net_t object.
310 kernel_netlink_net_t
public;
313 * lock to access various lists and maps
318 * condition variable to signal virtual IP add/removal
320 rwlock_condvar_t
*condvar
;
323 * Cached list of interfaces and its addresses (iface_entry_t)
325 linked_list_t
*ifaces
;
328 * Map for IP addresses to iface_entry_t objects (addr_map_entry_t)
333 * Map for virtual IP addresses to iface_entry_t objects (addr_map_entry_t)
338 * netlink rt socket (routing)
340 netlink_socket_t
*socket
;
343 * Netlink rt event socket
345 netlink_event_socket_t
*socket_events
;
348 * earliest time of the next roam event
353 * roam event due to address change
358 * lock to check and update roam event time
360 spinlock_t
*roam_lock
;
363 * routing table to install routes
365 uint32_t routing_table
;
368 * priority of used routing table
370 uint32_t routing_table_prio
;
380 mutex_t
*routes_lock
;
383 * interface changes which may trigger route reinstallation
385 hashtable_t
*net_changes
;
388 * mutex for route reinstallation triggers
390 mutex_t
*net_changes_lock
;
393 * time of last route reinstallation
395 timeval_t last_route_reinstall
;
398 * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
403 * whether to react to RTM_NEWRULE or RTM_DELRULE events
408 * whether to trigger roam events
413 * whether to install IPsec policy routes
418 * whether to actually install virtual IPs
420 bool install_virtual_ip
;
423 * the name of the interface virtual IP addresses are installed on
425 char *install_virtual_ip_on
;
428 * whether preferred source addresses can be specified for IPv6 routes
430 bool rta_prefsrc_for_ipv6
;
433 * whether marks can be used in route lookups
438 * the mark excluded from the routing rule used for virtual IPs
443 * whether to prefer temporary IPv6 addresses over public ones
445 bool prefer_temporary_addrs
;
448 * list with routing tables to be excluded from route lookup
450 linked_list_t
*rt_exclude
;
453 * MTU to set on installed routes
458 * MSS to set on installed routes
464 * Forward declaration
466 static status_t
manage_srcroute(private_kernel_netlink_net_t
*this,
467 int nlmsg_type
, int flags
, chunk_t dst_net
,
468 uint8_t prefixlen
, host_t
*gateway
,
469 host_t
*src_ip
, char *if_name
, bool pass
);
472 * Clear the queued network changes.
474 static void net_changes_clear(private_kernel_netlink_net_t
*this)
476 enumerator_t
*enumerator
;
477 net_change_t
*change
;
479 enumerator
= this->net_changes
->create_enumerator(this->net_changes
);
480 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&change
))
482 this->net_changes
->remove_at(this->net_changes
, enumerator
);
483 net_change_destroy(change
);
485 enumerator
->destroy(enumerator
);
489 * Act upon queued network changes.
491 static job_requeue_t
reinstall_routes(private_kernel_netlink_net_t
*this)
493 enumerator_t
*enumerator
;
494 route_entry_t
*route
;
496 this->net_changes_lock
->lock(this->net_changes_lock
);
497 this->routes_lock
->lock(this->routes_lock
);
499 enumerator
= this->routes
->ht
.create_enumerator(&this->routes
->ht
);
500 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&route
))
502 net_change_t
*change
, lookup
= {
503 .if_name
= route
->if_name
,
505 if (route
->pass
|| !route
->if_name
)
506 { /* no need to reinstall these, they don't reference interfaces */
509 /* check if a change for the outgoing interface is queued */
510 change
= this->net_changes
->get(this->net_changes
, &lookup
);
512 { /* in case src_ip is not on the outgoing interface */
513 if (this->public.interface
.get_interface(&this->public.interface
,
514 route
->src_ip
, &lookup
.if_name
))
516 if (!streq(lookup
.if_name
, route
->if_name
))
518 change
= this->net_changes
->get(this->net_changes
, &lookup
);
520 free(lookup
.if_name
);
525 manage_srcroute(this, RTM_NEWROUTE
, NLM_F_CREATE
| NLM_F_EXCL
,
526 route
->dst_net
, route
->prefixlen
, route
->gateway
,
527 route
->src_ip
, route
->if_name
, route
->pass
);
530 enumerator
->destroy(enumerator
);
531 this->routes_lock
->unlock(this->routes_lock
);
533 net_changes_clear(this);
534 this->net_changes_lock
->unlock(this->net_changes_lock
);
535 return JOB_REQUEUE_NONE
;
539 * Queue route reinstallation caused by network changes for a given interface.
541 * The route reinstallation is delayed for a while and only done once for
542 * several calls during this delay, in order to avoid doing it too often.
543 * The interface name is freed.
545 static void queue_route_reinstall(private_kernel_netlink_net_t
*this,
548 net_change_t
*update
, *found
;
556 this->net_changes_lock
->lock(this->net_changes_lock
);
557 found
= this->net_changes
->put(this->net_changes
, update
, update
);
560 net_change_destroy(found
);
562 time_monotonic(&now
);
563 if (timercmp(&now
, &this->last_route_reinstall
, >))
565 timeval_add_ms(&now
, ROUTE_DELAY
);
566 this->last_route_reinstall
= now
;
568 job
= (job_t
*)callback_job_create((callback_job_cb_t
)reinstall_routes
,
570 lib
->scheduler
->schedule_job_ms(lib
->scheduler
, job
, ROUTE_DELAY
);
572 this->net_changes_lock
->unlock(this->net_changes_lock
);
576 * check if the given IP is known as virtual IP and currently installed
578 * this function will also return TRUE if the virtual IP entry disappeared.
579 * in that case the returned entry will be NULL.
581 * this->lock must be held when calling this function
583 static bool is_vip_installed_or_gone(private_kernel_netlink_net_t
*this,
584 host_t
*ip
, addr_map_entry_t
**entry
)
586 addr_map_entry_t lookup
= {
590 *entry
= this->vips
->get_match(this->vips
, &lookup
,
591 (void*)addr_map_entry_match
);
593 { /* the virtual IP disappeared */
596 return (*entry
)->addr
->installed
;
600 * check if the given IP is known as virtual IP
602 * this->lock must be held when calling this function
604 static bool is_known_vip(private_kernel_netlink_net_t
*this, host_t
*ip
)
606 addr_map_entry_t lookup
= {
610 return this->vips
->get_match(this->vips
, &lookup
,
611 (void*)addr_map_entry_match
) != NULL
;
615 * Add an address map entry
617 static void addr_map_entry_add(hashlist_t
*map
, addr_entry_t
*addr
,
618 iface_entry_t
*iface
)
620 addr_map_entry_t
*entry
;
627 entry
= map
->ht
.put(&map
->ht
, entry
, entry
);
632 * Remove an address map entry
634 static void addr_map_entry_remove(hashlist_t
*map
, addr_entry_t
*addr
,
635 iface_entry_t
*iface
)
637 addr_map_entry_t
*entry
, lookup
= {
643 entry
= map
->ht
.remove(&map
->ht
, &lookup
);
648 * Check if an address or net (addr with prefix net bits) is in
649 * subnet (net with net_len net bits)
651 static bool addr_in_subnet(chunk_t addr
, int prefix
, chunk_t net
, int net_len
)
653 static const u_char mask
[] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
657 { /* any address matches a /0 network */
660 if (addr
.len
!= net
.len
|| net_len
> 8 * net
.len
|| prefix
< net_len
)
664 /* scan through all bytes in network order */
669 return (mask
[net_len
] & addr
.ptr
[byte
]) == (mask
[net_len
] & net
.ptr
[byte
]);
673 if (addr
.ptr
[byte
] != net
.ptr
[byte
])
685 * Check if the given address is in subnet (net with net_len net bits)
687 static bool host_in_subnet(host_t
*host
, chunk_t net
, int net_len
)
691 addr
= host
->get_address(host
);
692 return addr_in_subnet(addr
, addr
.len
* 8, net
, net_len
);
696 * Determine the type or scope of the given unicast IP address. This is not
697 * the same thing returned in rtm_scope/ifa_scope.
699 * We use return values as defined in RFC 6724 (referring to RFC 4291).
701 static u_char
get_scope(host_t
*ip
)
705 addr
= ip
->get_address(ip
);
709 /* we use the mapping defined in RFC 6724, 3.2 */
710 if (addr
.ptr
[0] == 127)
711 { /* link-local, same as the IPv6 loopback address */
714 if (addr
.ptr
[0] == 169 && addr
.ptr
[1] == 254)
720 if (IN6_IS_ADDR_LOOPBACK((struct in6_addr
*)addr
.ptr
))
721 { /* link-local, according to RFC 4291, 2.5.3 */
724 if (IN6_IS_ADDR_LINKLOCAL((struct in6_addr
*)addr
.ptr
))
728 if (IN6_IS_ADDR_SITELOCAL((struct in6_addr
*)addr
.ptr
))
729 { /* deprecated, according to RFC 4291, 2.5.7 */
741 * Determine the label of the given unicast IP address.
743 * We currently only support the default table given in RFC 6724:
745 * Prefix Precedence Label
756 static u_char
get_label(host_t
*ip
)
763 /* priority table ordered by prefix */
765 { chunk_from_chars(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
766 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01), 128, 0 },
768 { chunk_from_chars(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
769 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00), 96, 4 },
771 { chunk_from_chars(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
772 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 96, 3 },
774 { chunk_from_chars(0x20, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
775 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 32, 5 },
777 { chunk_from_chars(0x20, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
778 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 16, 2 },
780 { chunk_from_chars(0x3f, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
781 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 16, 12 },
783 { chunk_from_chars(0xfe, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
784 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 10, 11 },
786 { chunk_from_chars(0xfc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
787 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00), 7, 13 },
791 for (i
= 0; i
< countof(priorities
); i
++)
793 if (host_in_subnet(ip
, priorities
[i
].net
, priorities
[i
].prefix
))
795 return priorities
[i
].label
;
803 * Returns the length of the common prefix in bits up to the length of a's
804 * prefix, defined by RFC 6724 as the portion of the address not including the
805 * interface ID, which is 64-bit for most unicast addresses (see RFC 4291).
807 static u_char
common_prefix(host_t
*a
, host_t
*b
)
810 u_char byte
, bits
= 0, match
;
812 aa
= a
->get_address(a
);
813 ba
= b
->get_address(b
);
814 for (byte
= 0; byte
< 8; byte
++)
816 if (aa
.ptr
[byte
] != ba
.ptr
[byte
])
818 match
= aa
.ptr
[byte
] ^ ba
.ptr
[byte
];
819 for (bits
= 8; match
; match
>>= 1)
826 return byte
* 8 + bits
;
830 * Compare two IP addresses and return TRUE if the second address is the better
831 * choice of the two to reach the destination.
832 * For IPv6 we approximately follow RFC 6724.
834 static bool is_address_better(private_kernel_netlink_net_t
*this,
835 addr_entry_t
*a
, addr_entry_t
*b
, host_t
*d
)
837 u_char sa
, sb
, sd
, la
, lb
, ld
, pa
, pb
;
839 /* rule 2: prefer appropriate scope */
842 sa
= get_scope(a
->ip
);
843 sb
= get_scope(b
->ip
);
854 if (a
->ip
->get_family(a
->ip
) == AF_INET
)
855 { /* stop here for IPv4, default to addresses found earlier */
858 /* rule 3: avoid deprecated addresses (RFC 4862) */
859 if ((a
->flags
& IFA_F_DEPRECATED
) != (b
->flags
& IFA_F_DEPRECATED
))
861 return a
->flags
& IFA_F_DEPRECATED
;
863 /* rule 4 is not applicable as we don't know if an address is a home or
865 * rule 5 does not apply as we only compare addresses from one interface
867 /* rule 6: prefer matching label */
870 la
= get_label(a
->ip
);
871 lb
= get_label(b
->ip
);
873 if (la
== ld
&& lb
!= ld
)
877 else if (lb
== ld
&& la
!= ld
)
882 /* rule 7: prefer temporary addresses (WE REVERSE THIS BY DEFAULT!) */
883 if ((a
->flags
& IFA_F_TEMPORARY
) != (b
->flags
& IFA_F_TEMPORARY
))
885 if (this->prefer_temporary_addrs
)
887 return b
->flags
& IFA_F_TEMPORARY
;
889 return a
->flags
& IFA_F_TEMPORARY
;
891 /* rule 8: use longest matching prefix */
894 pa
= common_prefix(a
->ip
, d
);
895 pb
= common_prefix(b
->ip
, d
);
901 /* default to addresses found earlier */
906 * Get a non-virtual IP address on the given interfaces and optionally in a
909 * If a candidate address is given, we first search for that address and if not
910 * found return the address as above.
911 * Returned host is a clone, has to be freed by caller.
913 * this->lock must be held when calling this function.
915 static host_t
*get_matching_address(private_kernel_netlink_net_t
*this,
916 int *ifindex
, int family
, chunk_t net
,
917 uint8_t mask
, host_t
*dest
,
920 enumerator_t
*ifaces
, *addrs
;
921 iface_entry_t
*iface
;
922 addr_entry_t
*addr
, *best
= NULL
;
923 bool candidate_matched
= FALSE
;
925 ifaces
= this->ifaces
->create_enumerator(this->ifaces
);
926 while (ifaces
->enumerate(ifaces
, &iface
))
928 if (iface
->usable
&& (!ifindex
|| iface
->ifindex
== *ifindex
))
929 { /* only use matching interfaces not excluded by config */
930 addrs
= iface
->addrs
->create_enumerator(iface
->addrs
);
931 while (addrs
->enumerate(addrs
, &addr
))
933 if (addr
->refcount
||
934 addr
->ip
->get_family(addr
->ip
) != family
)
935 { /* ignore virtual IP addresses and ensure family matches */
938 if (net
.ptr
&& !host_in_subnet(addr
->ip
, net
, mask
))
939 { /* optionally match a subnet */
942 if (candidate
&& candidate
->ip_equals(candidate
, addr
->ip
) &&
943 !(addr
->flags
& IFA_F_DEPRECATED
))
944 { /* stop if we find the candidate and it's not deprecated */
946 candidate_matched
= TRUE
;
949 else if (!best
|| is_address_better(this, best
, addr
, dest
))
954 addrs
->destroy(addrs
);
955 if (ifindex
|| candidate_matched
)
961 ifaces
->destroy(ifaces
);
962 return best
? best
->ip
->clone(best
->ip
) : NULL
;
966 * Get a non-virtual IP address on the given interface.
968 * If a candidate address is given, we first search for that address and if not
969 * found return the address as above.
970 * Returned host is a clone, has to be freed by caller.
972 * this->lock must be held when calling this function.
974 static host_t
*get_interface_address(private_kernel_netlink_net_t
*this,
975 int ifindex
, int family
, host_t
*dest
,
978 return get_matching_address(this, &ifindex
, family
, chunk_empty
, 0, dest
,
983 * Get a non-virtual IP address in the given subnet.
985 * If a candidate address is given, we first search for that address and if not
986 * found return the address as above.
987 * Returned host is a clone, has to be freed by caller.
989 * this->lock must be held when calling this function.
991 static host_t
*get_subnet_address(private_kernel_netlink_net_t
*this,
992 int family
, chunk_t net
, uint8_t mask
,
993 host_t
*dest
, host_t
*candidate
)
995 return get_matching_address(this, NULL
, family
, net
, mask
, dest
, candidate
);
999 * callback function that raises the delayed roam event
1001 static job_requeue_t
roam_event(private_kernel_netlink_net_t
*this)
1005 this->roam_lock
->lock(this->roam_lock
);
1006 address
= this->roam_address
;
1007 this->roam_address
= FALSE
;
1008 this->roam_lock
->unlock(this->roam_lock
);
1009 charon
->kernel
->roam(charon
->kernel
, address
);
1010 return JOB_REQUEUE_NONE
;
1014 * fire a roaming event. we delay it for a bit and fire only one event
1015 * for multiple calls. otherwise we would create too many events.
1017 static void fire_roam_event(private_kernel_netlink_net_t
*this, bool address
)
1022 if (!this->roam_events
)
1027 time_monotonic(&now
);
1028 this->roam_lock
->lock(this->roam_lock
);
1029 this->roam_address
|= address
;
1030 if (!timercmp(&now
, &this->next_roam
, >))
1032 this->roam_lock
->unlock(this->roam_lock
);
1035 timeval_add_ms(&now
, ROAM_DELAY
);
1036 this->next_roam
= now
;
1037 this->roam_lock
->unlock(this->roam_lock
);
1039 job
= (job_t
*)callback_job_create((callback_job_cb_t
)roam_event
,
1041 lib
->scheduler
->schedule_job_ms(lib
->scheduler
, job
, ROAM_DELAY
);
1045 * check if an interface with a given index is up and usable
1047 * this->lock must be locked when calling this function
1049 static bool is_interface_up_and_usable(private_kernel_netlink_net_t
*this,
1052 iface_entry_t
*iface
;
1054 if (this->ifaces
->find_first(this->ifaces
, iface_entry_by_index
,
1055 (void**)&iface
, index
))
1057 return iface_entry_up_and_usable(iface
);
1063 * unregister the current addr_entry_t from the hashtable it is stored in
1065 * this->lock must be locked when calling this function
1067 CALLBACK(addr_entry_unregister
, void,
1068 addr_entry_t
*addr
, va_list args
)
1070 private_kernel_netlink_net_t
*this;
1071 iface_entry_t
*iface
;
1073 VA_ARGS_VGET(args
, iface
, this);
1076 addr_map_entry_remove(this->vips
, addr
, iface
);
1077 this->condvar
->broadcast(this->condvar
);
1080 addr_map_entry_remove(this->addrs
, addr
, iface
);
1084 * process RTM_NEWLINK/RTM_DELLINK from kernel
1086 static void process_link(private_kernel_netlink_net_t
*this,
1087 struct nlmsghdr
*hdr
, bool event
)
1089 struct ifinfomsg
* msg
= NLMSG_DATA(hdr
);
1090 struct rtattr
*rta
= IFLA_RTA(msg
);
1091 size_t rtasize
= IFLA_PAYLOAD (hdr
);
1092 enumerator_t
*enumerator
;
1093 iface_entry_t
*current
, *entry
= NULL
;
1095 bool update
= FALSE
, update_routes
= FALSE
;
1097 while (RTA_OK(rta
, rtasize
))
1099 switch (rta
->rta_type
)
1102 name
= RTA_DATA(rta
);
1105 rta
= RTA_NEXT(rta
, rtasize
);
1112 this->lock
->write_lock(this->lock
);
1113 switch (hdr
->nlmsg_type
)
1117 if (!this->ifaces
->find_first(this->ifaces
, iface_entry_by_index
,
1118 (void**)&entry
, msg
->ifi_index
))
1121 .ifindex
= msg
->ifi_index
,
1122 .addrs
= linked_list_create(),
1124 this->ifaces
->insert_last(this->ifaces
, entry
);
1126 strncpy(entry
->ifname
, name
, IFNAMSIZ
-1);
1127 entry
->ifname
[IFNAMSIZ
-1] = '\0';
1128 entry
->usable
= charon
->kernel
->is_interface_usable(charon
->kernel
,
1130 if (event
&& entry
->usable
)
1132 if (!(entry
->flags
& IFF_UP
) && (msg
->ifi_flags
& IFF_UP
))
1134 update
= update_routes
= TRUE
;
1135 DBG1(DBG_KNL
, "interface %s activated", name
);
1137 if ((entry
->flags
& IFF_UP
) && !(msg
->ifi_flags
& IFF_UP
))
1140 DBG1(DBG_KNL
, "interface %s deactivated", name
);
1143 entry
->flags
= msg
->ifi_flags
;
1148 enumerator
= this->ifaces
->create_enumerator(this->ifaces
);
1149 while (enumerator
->enumerate(enumerator
, ¤t
))
1151 if (current
->ifindex
== msg
->ifi_index
)
1153 if (event
&& current
->usable
)
1156 DBG1(DBG_KNL
, "interface %s deleted", current
->ifname
);
1158 /* TODO: move virtual IPs installed on this interface to
1159 * another interface? */
1160 this->ifaces
->remove_at(this->ifaces
, enumerator
);
1161 current
->addrs
->invoke_function(current
->addrs
,
1162 addr_entry_unregister
, current
, this);
1163 iface_entry_destroy(current
);
1167 enumerator
->destroy(enumerator
);
1171 this->lock
->unlock(this->lock
);
1173 if (update_routes
&& event
)
1175 queue_route_reinstall(this, strdup(name
));
1178 if (update
&& event
)
1180 fire_roam_event(this, TRUE
);
1185 * process RTM_NEWADDR/RTM_DELADDR from kernel
1187 static void process_addr(private_kernel_netlink_net_t
*this,
1188 struct nlmsghdr
*hdr
, bool event
)
1190 struct ifaddrmsg
* msg
= NLMSG_DATA(hdr
);
1191 struct rtattr
*rta
= IFA_RTA(msg
);
1192 size_t rtasize
= IFA_PAYLOAD (hdr
);
1193 host_t
*host
= NULL
;
1194 iface_entry_t
*iface
;
1195 chunk_t local
= chunk_empty
, address
= chunk_empty
;
1196 char *route_ifname
= NULL
;
1197 bool update
= FALSE
, found
= FALSE
, changed
= FALSE
;
1199 while (RTA_OK(rta
, rtasize
))
1201 switch (rta
->rta_type
)
1204 local
.ptr
= RTA_DATA(rta
);
1205 local
.len
= RTA_PAYLOAD(rta
);
1208 address
.ptr
= RTA_DATA(rta
);
1209 address
.len
= RTA_PAYLOAD(rta
);
1212 rta
= RTA_NEXT(rta
, rtasize
);
1215 /* For PPP interfaces, we need the IFA_LOCAL address,
1216 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
1217 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
1220 host
= host_create_from_chunk(msg
->ifa_family
, local
, 0);
1222 else if (address
.ptr
)
1224 host
= host_create_from_chunk(msg
->ifa_family
, address
, 0);
1232 this->lock
->write_lock(this->lock
);
1233 if (this->ifaces
->find_first(this->ifaces
, iface_entry_by_index
,
1234 (void**)&iface
, msg
->ifa_index
))
1236 addr_map_entry_t
*entry
, lookup
= {
1242 entry
= this->vips
->ht
.get(&this->vips
->ht
, &lookup
);
1245 if (hdr
->nlmsg_type
== RTM_NEWADDR
)
1246 { /* mark as installed and signal waiting threads */
1247 entry
->addr
->installed
= TRUE
;
1250 { /* the address was already marked as uninstalled */
1252 iface
->addrs
->remove(iface
->addrs
, addr
, NULL
);
1253 addr_map_entry_remove(this->vips
, addr
, iface
);
1254 addr_entry_destroy(addr
);
1256 /* no roam events etc. for virtual IPs */
1257 this->condvar
->broadcast(this->condvar
);
1258 this->lock
->unlock(this->lock
);
1259 host
->destroy(host
);
1262 entry
= this->addrs
->ht
.get(&this->addrs
->ht
, &lookup
);
1265 if (hdr
->nlmsg_type
== RTM_DELADDR
)
1269 iface
->addrs
->remove(iface
->addrs
, addr
, NULL
);
1273 DBG1(DBG_KNL
, "%H disappeared from %s", host
,
1276 addr_map_entry_remove(this->addrs
, addr
, iface
);
1277 addr_entry_destroy(addr
);
1279 else if (entry
->addr
->flags
!= msg
->ifa_flags
)
1282 entry
->addr
->flags
= msg
->ifa_flags
;
1283 if (event
&& iface
->usable
)
1286 DBG1(DBG_KNL
, "flags changed for %H on %s", host
,
1293 if (hdr
->nlmsg_type
== RTM_NEWADDR
)
1297 route_ifname
= strdup(iface
->ifname
);
1299 .ip
= host
->clone(host
),
1300 .flags
= msg
->ifa_flags
,
1301 .scope
= msg
->ifa_scope
,
1303 iface
->addrs
->insert_last(iface
->addrs
, addr
);
1304 addr_map_entry_add(this->addrs
, addr
, iface
);
1305 if (event
&& iface
->usable
)
1307 DBG1(DBG_KNL
, "%H appeared on %s", host
, iface
->ifname
);
1311 if (found
&& (iface
->flags
& IFF_UP
))
1316 { /* ignore events for interfaces excluded by config */
1317 update
= changed
= FALSE
;
1320 this->lock
->unlock(this->lock
);
1322 if (update
&& event
&& route_ifname
)
1324 queue_route_reinstall(this, route_ifname
);
1330 host
->destroy(host
);
1332 /* send an update to all IKE_SAs */
1333 if (update
&& event
&& changed
)
1335 fire_roam_event(this, TRUE
);
1340 * process RTM_NEWROUTE and RTM_DELROUTE from kernel
1342 static void process_route(private_kernel_netlink_net_t
*this,
1343 struct nlmsghdr
*hdr
)
1345 struct rtmsg
* msg
= NLMSG_DATA(hdr
);
1346 struct rtattr
*rta
= RTM_RTA(msg
);
1347 size_t rtasize
= RTM_PAYLOAD(hdr
);
1348 uint32_t rta_oif
= 0;
1349 host_t
*host
= NULL
;
1351 /* ignore routes added by us or in the local routing table (local addrs) */
1352 if (msg
->rtm_table
&& (msg
->rtm_table
== this->routing_table
||
1353 msg
->rtm_table
== RT_TABLE_LOCAL
))
1357 else if (msg
->rtm_flags
& RTM_F_CLONED
)
1358 { /* ignore cached routes, seem to be created a lot for IPv6 */
1362 while (RTA_OK(rta
, rtasize
))
1364 switch (rta
->rta_type
)
1366 #ifdef HAVE_RTA_TABLE
1368 /* also check against extended table ID */
1369 if (RTA_PAYLOAD(rta
) == sizeof(uint32_t) &&
1370 this->routing_table
== *(uint32_t*)RTA_DATA(rta
))
1376 #endif /* HAVE_RTA_TABLE */
1379 host
= host_create_from_chunk(msg
->rtm_family
,
1380 chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
)), 0);
1383 if (RTA_PAYLOAD(rta
) == sizeof(rta_oif
))
1385 rta_oif
= *(uint32_t*)RTA_DATA(rta
);
1389 rta
= RTA_NEXT(rta
, rtasize
);
1391 this->lock
->read_lock(this->lock
);
1392 if (rta_oif
&& !is_interface_up_and_usable(this, rta_oif
))
1393 { /* ignore route changes for interfaces that are ignored or down */
1394 this->lock
->unlock(this->lock
);
1398 if (!host
&& rta_oif
)
1400 host
= get_interface_address(this, rta_oif
, msg
->rtm_family
,
1403 if (!host
|| is_known_vip(this, host
))
1404 { /* ignore routes added for virtual IPs */
1405 this->lock
->unlock(this->lock
);
1409 this->lock
->unlock(this->lock
);
1410 fire_roam_event(this, FALSE
);
1411 host
->destroy(host
);
1415 * process RTM_NEW|DELRULE from kernel
1417 static void process_rule(private_kernel_netlink_net_t
*this,
1418 struct nlmsghdr
*hdr
)
1420 #ifdef HAVE_LINUX_FIB_RULES_H
1421 struct rtmsg
* msg
= NLMSG_DATA(hdr
);
1422 struct rtattr
*rta
= RTM_RTA(msg
);
1423 size_t rtasize
= RTM_PAYLOAD(hdr
);
1425 /* ignore rules added by us or in the local routing table (local addrs) */
1426 if (msg
->rtm_table
&& (msg
->rtm_table
== this->routing_table
||
1427 msg
->rtm_table
== RT_TABLE_LOCAL
))
1432 while (RTA_OK(rta
, rtasize
))
1434 switch (rta
->rta_type
)
1437 /* also check against extended table ID */
1438 if (RTA_PAYLOAD(rta
) == sizeof(uint32_t) &&
1439 this->routing_table
== *(uint32_t*)RTA_DATA(rta
))
1445 rta
= RTA_NEXT(rta
, rtasize
);
1447 fire_roam_event(this, FALSE
);
1451 CALLBACK(receive_events
, void,
1452 private_kernel_netlink_net_t
*this, struct nlmsghdr
*hdr
)
1454 switch (hdr
->nlmsg_type
)
1458 process_addr(this, hdr
, TRUE
);
1462 process_link(this, hdr
, TRUE
);
1466 if (this->process_route
)
1468 process_route(this, hdr
);
1473 if (this->process_rules
)
1475 process_rule(this, hdr
);
1483 /** enumerator over addresses */
1485 private_kernel_netlink_net_t
* this;
1486 /** which addresses to enumerate */
1487 kernel_address_type_t which
;
1488 } address_enumerator_t
;
1490 CALLBACK(address_enumerator_destroy
, void,
1491 address_enumerator_t
*data
)
1493 data
->this->lock
->unlock(data
->this->lock
);
1497 CALLBACK(filter_addresses
, bool,
1498 address_enumerator_t
*data
, enumerator_t
*orig
, va_list args
)
1503 VA_ARGS_VGET(args
, out
);
1505 while (orig
->enumerate(orig
, &addr
))
1507 if (!(data
->which
& ADDR_TYPE_VIRTUAL
) && addr
->refcount
)
1508 { /* skip virtual interfaces added by us */
1511 if (!(data
->which
& ADDR_TYPE_REGULAR
) && !addr
->refcount
)
1512 { /* address is regular, but not requested */
1515 if (addr
->flags
& IFA_F_DEPRECATED
||
1516 addr
->scope
>= RT_SCOPE_LINK
)
1517 { /* skip deprecated addresses or those with an unusable scope */
1520 if (!addr
->refcount
&& addr
->ip
->get_family(addr
->ip
) == AF_INET6
)
1521 { /* handle non-VIP temporary IPv6 addresses according to config */
1522 bool temporary
= (addr
->flags
& IFA_F_TEMPORARY
) == IFA_F_TEMPORARY
;
1523 if (data
->this->prefer_temporary_addrs
!= temporary
)
1535 * enumerator constructor for interfaces
1537 static enumerator_t
*create_iface_enumerator(iface_entry_t
*iface
,
1538 address_enumerator_t
*data
)
1540 return enumerator_create_filter(
1541 iface
->addrs
->create_enumerator(iface
->addrs
),
1542 filter_addresses
, data
, NULL
);
1545 CALLBACK(filter_interfaces
, bool,
1546 address_enumerator_t
*data
, enumerator_t
*orig
, va_list args
)
1548 iface_entry_t
*iface
, **out
;
1550 VA_ARGS_VGET(args
, out
);
1552 while (orig
->enumerate(orig
, &iface
))
1554 if (!(data
->which
& ADDR_TYPE_IGNORED
) && !iface
->usable
)
1555 { /* skip interfaces excluded by config */
1558 if (!(data
->which
& ADDR_TYPE_LOOPBACK
) && (iface
->flags
& IFF_LOOPBACK
))
1559 { /* ignore loopback devices */
1562 if (!(data
->which
& ADDR_TYPE_DOWN
) && !(iface
->flags
& IFF_UP
))
1563 { /* skip interfaces not up */
1572 METHOD(kernel_net_t
, create_address_enumerator
, enumerator_t
*,
1573 private_kernel_netlink_net_t
*this, kernel_address_type_t which
)
1575 address_enumerator_t
*data
;
1582 this->lock
->read_lock(this->lock
);
1583 return enumerator_create_nested(
1584 enumerator_create_filter(
1585 this->ifaces
->create_enumerator(this->ifaces
),
1586 filter_interfaces
, data
, NULL
),
1587 (void*)create_iface_enumerator
, data
,
1588 address_enumerator_destroy
);
1591 METHOD(kernel_net_t
, get_interface_name
, bool,
1592 private_kernel_netlink_net_t
*this, host_t
* ip
, char **name
)
1594 addr_map_entry_t
*entry
, lookup
= {
1598 if (ip
->is_anyaddr(ip
))
1602 this->lock
->read_lock(this->lock
);
1603 /* first try to find it on an up and usable interface */
1604 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1605 (void*)addr_map_entry_match_up_and_usable
);
1610 *name
= strdup(entry
->iface
->ifname
);
1611 DBG2(DBG_KNL
, "%H is on interface %s", ip
, *name
);
1613 this->lock
->unlock(this->lock
);
1616 /* in a second step, consider virtual IPs installed by us */
1617 entry
= this->vips
->get_match(this->vips
, &lookup
,
1618 (void*)addr_map_entry_match_up_and_usable
);
1623 *name
= strdup(entry
->iface
->ifname
);
1624 DBG2(DBG_KNL
, "virtual IP %H is on interface %s", ip
, *name
);
1626 this->lock
->unlock(this->lock
);
1629 /* maybe it is installed on an ignored interface */
1630 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1631 (void*)addr_map_entry_match_up
);
1634 DBG2(DBG_KNL
, "%H is not a local address or the interface is down", ip
);
1636 this->lock
->unlock(this->lock
);
1641 * get the index of an interface by name
1643 static int get_interface_index(private_kernel_netlink_net_t
*this, char* name
)
1645 iface_entry_t
*iface
;
1648 DBG2(DBG_KNL
, "getting iface index for %s", name
);
1650 this->lock
->read_lock(this->lock
);
1651 if (this->ifaces
->find_first(this->ifaces
, iface_entry_by_name
,
1652 (void**)&iface
, name
))
1654 ifindex
= iface
->ifindex
;
1656 this->lock
->unlock(this->lock
);
1660 DBG1(DBG_KNL
, "unable to get interface index for %s", name
);
1666 * get the name of an interface by index (allocated)
1668 static char *get_interface_name_by_index(private_kernel_netlink_net_t
*this,
1671 iface_entry_t
*iface
;
1674 DBG2(DBG_KNL
, "getting iface name for index %d", index
);
1676 this->lock
->read_lock(this->lock
);
1677 if (this->ifaces
->find_first(this->ifaces
, iface_entry_by_index
,
1678 (void**)&iface
, index
))
1680 name
= strdup(iface
->ifname
);
1682 this->lock
->unlock(this->lock
);
1686 DBG1(DBG_KNL
, "unable to get interface name for %d", index
);
1692 * Store information about a route retrieved via RTNETLINK
1708 * Free a route entry
1710 static void rt_entry_destroy(rt_entry_t
*this)
1712 DESTROY_IF(this->src_host
);
1717 * Check if the route received with RTM_NEWROUTE is usable based on its type.
1719 static bool route_usable(struct nlmsghdr
*hdr
, bool allow_local
)
1723 msg
= NLMSG_DATA(hdr
);
1724 switch (msg
->rtm_type
)
1727 case RTN_UNREACHABLE
:
1739 * Parse route received with RTM_NEWROUTE. The given rt_entry_t object will be
1740 * reused if not NULL.
1742 * Returned chunks point to internal data of the Netlink message.
1744 static rt_entry_t
*parse_route(struct nlmsghdr
*hdr
, rt_entry_t
*route
)
1750 msg
= NLMSG_DATA(hdr
);
1752 rtasize
= RTM_PAYLOAD(hdr
);
1756 *route
= (rt_entry_t
){
1757 .dst_len
= msg
->rtm_dst_len
,
1758 .src_len
= msg
->rtm_src_len
,
1759 .table
= msg
->rtm_table
,
1765 .dst_len
= msg
->rtm_dst_len
,
1766 .src_len
= msg
->rtm_src_len
,
1767 .table
= msg
->rtm_table
,
1771 while (RTA_OK(rta
, rtasize
))
1773 switch (rta
->rta_type
)
1776 route
->pref_src
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1779 route
->gtw
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1782 route
->dst
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1785 route
->src
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1788 if (RTA_PAYLOAD(rta
) == sizeof(route
->oif
))
1790 route
->oif
= *(uint32_t*)RTA_DATA(rta
);
1794 if (RTA_PAYLOAD(rta
) == sizeof(route
->priority
))
1796 route
->priority
= *(uint32_t*)RTA_DATA(rta
);
1799 #ifdef HAVE_RTA_TABLE
1801 if (RTA_PAYLOAD(rta
) == sizeof(route
->table
))
1803 route
->table
= *(uint32_t*)RTA_DATA(rta
);
1806 #endif /* HAVE_RTA_TABLE*/
1808 rta
= RTA_NEXT(rta
, rtasize
);
1814 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
1816 static host_t
*get_route(private_kernel_netlink_net_t
*this, host_t
*dest
,
1817 int prefix
, bool nexthop
, host_t
*candidate
,
1818 char **iface
, u_int recursion
)
1820 netlink_buf_t request
;
1821 struct nlmsghdr
*hdr
, *out
, *current
;
1825 linked_list_t
*routes
;
1826 rt_entry_t
*route
= NULL
, *best
= NULL
;
1827 enumerator_t
*enumerator
;
1828 host_t
*addr
= NULL
;
1832 if (recursion
> MAX_ROUTE_RECURSION
)
1836 chunk
= dest
->get_address(dest
);
1837 len
= chunk
.len
* 8;
1838 prefix
= prefix
< 0 ? len
: min(prefix
, len
);
1839 match_net
= prefix
!= len
;
1841 memset(&request
, 0, sizeof(request
));
1843 family
= dest
->get_family(dest
);
1845 hdr
->nlmsg_flags
= NLM_F_REQUEST
;
1846 hdr
->nlmsg_type
= RTM_GETROUTE
;
1847 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
1849 msg
= NLMSG_DATA(hdr
);
1850 msg
->rtm_family
= family
;
1851 if (!match_net
&& this->rta_mark
&& this->routing_mark
.value
)
1853 /* if our routing rule excludes packets with a certain mark we can
1854 * get the preferred route without having to dump all routes */
1855 chunk
= chunk_from_thing(this->routing_mark
.value
);
1856 netlink_add_attribute(hdr
, RTA_MARK
, chunk
, sizeof(request
));
1858 else if (family
== AF_INET
|| this->rta_prefsrc_for_ipv6
||
1859 this->routing_table
|| match_net
)
1860 { /* kernels prior to 3.0 do not support RTA_PREFSRC for IPv6 routes.
1861 * as we want to ignore routes with virtual IPs we cannot use DUMP
1862 * if these routes are not installed in a separate table */
1863 if (this->install_routes
)
1865 hdr
->nlmsg_flags
|= NLM_F_DUMP
;
1870 chunk
= candidate
->get_address(candidate
);
1871 if (hdr
->nlmsg_flags
& NLM_F_DUMP
)
1873 netlink_add_attribute(hdr
, RTA_PREFSRC
, chunk
, sizeof(request
));
1877 netlink_add_attribute(hdr
, RTA_SRC
, chunk
, sizeof(request
));
1880 /* we use this below to match against the routes */
1881 chunk
= dest
->get_address(dest
);
1884 netlink_add_attribute(hdr
, RTA_DST
, chunk
, sizeof(request
));
1887 if (this->socket
->send(this->socket
, hdr
, &out
, &len
) != SUCCESS
)
1889 DBG2(DBG_KNL
, "getting %s to reach %H/%d failed",
1890 nexthop
? "nexthop" : "address", dest
, prefix
);
1893 routes
= linked_list_create();
1894 this->lock
->read_lock(this->lock
);
1896 for (current
= out
; NLMSG_OK(current
, len
);
1897 current
= NLMSG_NEXT(current
, len
))
1899 switch (current
->nlmsg_type
)
1908 if (!route_usable(current
, TRUE
))
1912 route
= parse_route(current
, route
);
1914 table
= (uintptr_t)route
->table
;
1915 if (this->rt_exclude
->find_first(this->rt_exclude
, NULL
,
1917 { /* route is from an excluded routing table */
1920 if (this->routing_table
!= 0 &&
1921 route
->table
== this->routing_table
)
1922 { /* route is from our own ipsec routing table */
1925 if (route
->oif
&& !is_interface_up_and_usable(this, route
->oif
))
1926 { /* interface is down */
1929 if (!addr_in_subnet(chunk
, prefix
, route
->dst
, route
->dst_len
))
1930 { /* route destination does not contain dest */
1933 if (route
->pref_src
.ptr
)
1934 { /* verify source address, if any */
1935 host_t
*src
= host_create_from_chunk(msg
->rtm_family
,
1936 route
->pref_src
, 0);
1937 if (src
&& is_known_vip(this, src
))
1938 { /* ignore routes installed by us */
1942 route
->src_host
= src
;
1944 /* insert route, sorted by network prefix and priority */
1945 enumerator
= routes
->create_enumerator(routes
);
1946 while (enumerator
->enumerate(enumerator
, &other
))
1948 if (route
->dst_len
> other
->dst_len
)
1952 if (route
->dst_len
== other
->dst_len
&&
1953 route
->priority
< other
->priority
)
1958 routes
->insert_before(routes
, enumerator
, route
);
1959 enumerator
->destroy(enumerator
);
1970 rt_entry_destroy(route
);
1973 /* now we have a list of routes matching dest, sorted by net prefix.
1974 * we will look for source addresses for these routes and select the one
1975 * with the preferred source address, if possible */
1976 enumerator
= routes
->create_enumerator(routes
);
1977 while (enumerator
->enumerate(enumerator
, &route
))
1979 if (route
->src_host
)
1980 { /* got a source address with the route, if no preferred source
1981 * is given or it matches we are done, as this is the best route */
1982 if (!candidate
|| candidate
->ip_equals(candidate
, route
->src_host
))
1987 else if (route
->oif
)
1988 { /* no match yet, maybe it is assigned to the same interface */
1989 host_t
*src
= get_interface_address(this, route
->oif
,
1990 msg
->rtm_family
, dest
, candidate
);
1991 if (src
&& src
->ip_equals(src
, candidate
))
1993 route
->src_host
->destroy(route
->src_host
);
1994 route
->src_host
= src
;
2000 /* no luck yet with the source address. if this is the best (first)
2001 * route we store it as fallback in case we don't find a route with
2002 * the preferred source */
2003 best
= best
?: route
;
2007 { /* no src, but a source selector, try to find a matching address */
2008 route
->src_host
= get_subnet_address(this, msg
->rtm_family
,
2009 route
->src
, route
->src_len
, dest
,
2011 if (route
->src_host
)
2012 { /* we handle this address the same as the one above */
2014 candidate
->ip_equals(candidate
, route
->src_host
))
2019 best
= best
?: route
;
2024 { /* no src, but an interface - get address from it */
2025 route
->src_host
= get_interface_address(this, route
->oif
,
2026 msg
->rtm_family
, dest
, candidate
);
2027 if (route
->src_host
)
2028 { /* more of the same */
2030 candidate
->ip_equals(candidate
, route
->src_host
))
2035 best
= best
?: route
;
2040 { /* no src, no iface, but a gateway - lookup src to reach gtw */
2043 gtw
= host_create_from_chunk(msg
->rtm_family
, route
->gtw
, 0);
2044 if (gtw
&& !gtw
->ip_equals(gtw
, dest
))
2046 route
->src_host
= get_route(this, gtw
, -1, FALSE
, candidate
,
2047 iface
, recursion
+ 1);
2050 if (route
->src_host
)
2051 { /* more of the same */
2053 candidate
->ip_equals(candidate
, route
->src_host
))
2058 best
= best
?: route
;
2062 enumerator
->destroy(enumerator
);
2065 { /* nexthop lookup, return gateway and oif if any */
2070 if (best
|| routes
->get_first(routes
, (void**)&best
) == SUCCESS
)
2072 addr
= host_create_from_chunk(msg
->rtm_family
, best
->gtw
, 0);
2073 if (iface
&& best
->oif
)
2075 *iface
= get_interface_name_by_index(this, best
->oif
);
2078 if (!addr
&& !match_net
)
2079 { /* fallback to destination address */
2080 addr
= dest
->clone(dest
);
2087 addr
= best
->src_host
->clone(best
->src_host
);
2090 this->lock
->unlock(this->lock
);
2091 routes
->destroy_function(routes
, (void*)rt_entry_destroy
);
2096 if (nexthop
&& iface
&& *iface
)
2098 DBG2(DBG_KNL
, "using %H as nexthop and %s as dev to reach %H/%d",
2099 addr
, *iface
, dest
, prefix
);
2103 DBG2(DBG_KNL
, "using %H as %s to reach %H/%d", addr
,
2104 nexthop
? "nexthop" : "address", dest
, prefix
);
2107 else if (!recursion
)
2109 DBG2(DBG_KNL
, "no %s found to reach %H/%d",
2110 nexthop
? "nexthop" : "address", dest
, prefix
);
2115 METHOD(kernel_net_t
, get_source_addr
, host_t
*,
2116 private_kernel_netlink_net_t
*this, host_t
*dest
, host_t
*src
)
2118 return get_route(this, dest
, -1, FALSE
, src
, NULL
, 0);
2121 METHOD(kernel_net_t
, get_nexthop
, host_t
*,
2122 private_kernel_netlink_net_t
*this, host_t
*dest
, int prefix
, host_t
*src
,
2125 return get_route(this, dest
, prefix
, TRUE
, src
, iface
, 0);
2128 /** enumerator over subnets */
2130 enumerator_t
public;
2131 private_kernel_netlink_net_t
*private;
2132 /** message from the kernel */
2133 struct nlmsghdr
*msg
;
2134 /** current message from the kernel */
2135 struct nlmsghdr
*current
;
2136 /** remaining length */
2138 /** last subnet enumerated */
2140 /** interface of current net */
2141 char ifname
[IFNAMSIZ
];
2142 } subnet_enumerator_t
;
2144 METHOD(enumerator_t
, destroy_subnet_enumerator
, void,
2145 subnet_enumerator_t
*this)
2147 DESTROY_IF(this->net
);
2152 METHOD(enumerator_t
, enumerate_subnets
, bool,
2153 subnet_enumerator_t
*this, va_list args
)
2159 VA_ARGS_VGET(args
, net
, mask
, ifname
);
2163 this->current
= this->msg
;
2167 this->current
= NLMSG_NEXT(this->current
, this->len
);
2168 DESTROY_IF(this->net
);
2172 while (NLMSG_OK(this->current
, this->len
))
2174 switch (this->current
->nlmsg_type
)
2182 if (!route_usable(this->current
, FALSE
))
2186 parse_route(this->current
, &route
);
2188 if (route
.table
&& (
2189 route
.table
== RT_TABLE_LOCAL
||
2190 route
.table
== this->private->routing_table
))
2191 { /* ignore our own and the local routing tables */
2194 else if (route
.gtw
.ptr
)
2195 { /* ignore routes via gateway/next hop */
2199 if (route
.dst
.ptr
&& route
.oif
&&
2200 if_indextoname(route
.oif
, this->ifname
))
2202 this->net
= host_create_from_chunk(AF_UNSPEC
, route
.dst
, 0);
2204 *mask
= route
.dst_len
;
2205 *ifname
= this->ifname
;
2213 this->current
= NLMSG_NEXT(this->current
, this->len
);
2218 METHOD(kernel_net_t
, create_local_subnet_enumerator
, enumerator_t
*,
2219 private_kernel_netlink_net_t
*this)
2221 netlink_buf_t request
;
2222 struct nlmsghdr
*hdr
, *out
;
2225 subnet_enumerator_t
*enumerator
;
2227 memset(&request
, 0, sizeof(request
));
2230 hdr
->nlmsg_flags
= NLM_F_REQUEST
;
2231 hdr
->nlmsg_type
= RTM_GETROUTE
;
2232 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
2233 hdr
->nlmsg_flags
|= NLM_F_DUMP
;
2235 msg
= NLMSG_DATA(hdr
);
2236 msg
->rtm_scope
= RT_SCOPE_LINK
;
2238 if (this->socket
->send(this->socket
, hdr
, &out
, &len
) != SUCCESS
)
2240 DBG2(DBG_KNL
, "enumerating local subnets failed");
2241 return enumerator_create_empty();
2246 .enumerate
= enumerator_enumerate_default
,
2247 .venumerate
= _enumerate_subnets
,
2248 .destroy
= _destroy_subnet_enumerator
,
2254 return &enumerator
->public;
2258 * Manages the creation and deletion of IPv6 address labels for virtual IPs.
2259 * By setting the appropriate nlmsg_type the label is either added or removed.
2261 static status_t
manage_addrlabel(private_kernel_netlink_net_t
*this,
2262 int nlmsg_type
, host_t
*ip
)
2264 netlink_buf_t request
;
2265 struct nlmsghdr
*hdr
;
2266 struct ifaddrlblmsg
*msg
;
2270 memset(&request
, 0, sizeof(request
));
2272 chunk
= ip
->get_address(ip
);
2275 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
2276 if (nlmsg_type
== RTM_NEWADDRLABEL
)
2278 hdr
->nlmsg_flags
|= NLM_F_CREATE
| NLM_F_EXCL
;
2280 hdr
->nlmsg_type
= nlmsg_type
;
2281 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct ifaddrlblmsg
));
2283 msg
= NLMSG_DATA(hdr
);
2284 msg
->ifal_family
= ip
->get_family(ip
);
2285 msg
->ifal_prefixlen
= chunk
.len
* 8;
2287 netlink_add_attribute(hdr
, IFAL_ADDRESS
, chunk
, sizeof(request
));
2288 /* doesn't really matter as default labels are < 20 but this makes it kinda
2291 netlink_add_attribute(hdr
, IFAL_LABEL
, chunk_from_thing(label
),
2294 return this->socket
->send_ack(this->socket
, hdr
);
2298 * Manages the creation and deletion of ip addresses on an interface.
2299 * By setting the appropriate nlmsg_type, the ip will be set or unset.
2301 static status_t
manage_ipaddr(private_kernel_netlink_net_t
*this, int nlmsg_type
,
2302 int flags
, int if_index
, host_t
*ip
, int prefix
)
2304 netlink_buf_t request
;
2305 struct nlmsghdr
*hdr
;
2306 struct ifaddrmsg
*msg
;
2309 memset(&request
, 0, sizeof(request
));
2311 chunk
= ip
->get_address(ip
);
2314 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
| flags
;
2315 hdr
->nlmsg_type
= nlmsg_type
;
2316 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct ifaddrmsg
));
2318 msg
= NLMSG_DATA(hdr
);
2319 msg
->ifa_family
= ip
->get_family(ip
);
2321 msg
->ifa_prefixlen
= prefix
< 0 ? chunk
.len
* 8 : prefix
;
2322 msg
->ifa_scope
= RT_SCOPE_UNIVERSE
;
2323 msg
->ifa_index
= if_index
;
2325 netlink_add_attribute(hdr
, IFA_LOCAL
, chunk
, sizeof(request
));
2327 if (ip
->get_family(ip
) == AF_INET6
)
2330 msg
->ifa_flags
|= IFA_F_NODAD
;
2332 if (this->rta_prefsrc_for_ipv6
)
2334 /* if source routes are possible we set a label for this virtual IP
2335 * so it gets only used if forced by our route, and not by the
2336 * default IPv6 address selection */
2337 int labelop
= nlmsg_type
== RTM_NEWADDR
? RTM_NEWADDRLABEL
2339 if (manage_addrlabel(this, labelop
, ip
) != SUCCESS
)
2341 /* if we can't use address labels we let the virtual IP get
2342 * deprecated immediately (but mark it as valid forever), which
2343 * should also avoid that it gets used by the default address
2345 struct ifa_cacheinfo cache
= {
2346 .ifa_valid
= 0xFFFFFFFF,
2349 netlink_add_attribute(hdr
, IFA_CACHEINFO
,
2350 chunk_from_thing(cache
), sizeof(request
));
2354 return this->socket
->send_ack(this->socket
, hdr
);
2357 METHOD(kernel_net_t
, add_ip
, status_t
,
2358 private_kernel_netlink_net_t
*this, host_t
*virtual_ip
, int prefix
,
2361 addr_map_entry_t
*entry
, lookup
= {
2364 iface_entry_t
*iface
= NULL
;
2366 if (!this->install_virtual_ip
)
2367 { /* disabled by config */
2371 this->lock
->write_lock(this->lock
);
2372 /* the virtual IP might actually be installed as regular IP, in which case
2373 * we don't track it as virtual IP */
2374 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
2375 (void*)addr_map_entry_match
);
2377 { /* otherwise it might already be installed as virtual IP */
2378 entry
= this->vips
->get_match(this->vips
, &lookup
,
2379 (void*)addr_map_entry_match
);
2381 { /* the vip we found can be in one of three states: 1) installed and
2382 * ready, 2) just added by another thread, but not yet confirmed to
2383 * be installed by the kernel, 3) just deleted, but not yet gone.
2384 * Then while we wait below, several things could happen (as we
2385 * release the lock). For instance, the interface could disappear,
2386 * or the IP is finally deleted, and it reappears on a different
2387 * interface. All these cases are handled by the call below. */
2388 while (!is_vip_installed_or_gone(this, virtual_ip
, &entry
))
2390 this->condvar
->wait(this->condvar
, this->lock
);
2394 entry
->addr
->refcount
++;
2400 DBG2(DBG_KNL
, "virtual IP %H is already installed on %s", virtual_ip
,
2401 entry
->iface
->ifname
);
2402 this->lock
->unlock(this->lock
);
2405 /* try to find the target interface, either by config or via src ip */
2406 if (!this->install_virtual_ip_on
||
2407 !this->ifaces
->find_first(this->ifaces
, iface_entry_by_name
,
2408 (void**)&iface
, this->install_virtual_ip_on
))
2410 if (!this->ifaces
->find_first(this->ifaces
, iface_entry_by_name
,
2411 (void**)&iface
, iface_name
))
2412 { /* if we don't find the requested interface we just use the first */
2413 this->ifaces
->get_first(this->ifaces
, (void**)&iface
);
2423 .ip
= virtual_ip
->clone(virtual_ip
),
2425 .scope
= RT_SCOPE_UNIVERSE
,
2427 iface
->addrs
->insert_last(iface
->addrs
, addr
);
2428 addr_map_entry_add(this->vips
, addr
, iface
);
2429 ifi
= iface
->ifindex
;
2430 this->lock
->unlock(this->lock
);
2431 if (manage_ipaddr(this, RTM_NEWADDR
, NLM_F_CREATE
| NLM_F_EXCL
,
2432 ifi
, virtual_ip
, prefix
) == SUCCESS
)
2434 this->lock
->write_lock(this->lock
);
2435 while (!is_vip_installed_or_gone(this, virtual_ip
, &entry
))
2436 { /* wait until address appears */
2437 this->condvar
->wait(this->condvar
, this->lock
);
2440 { /* we fail if the interface got deleted in the meantime */
2441 ifname
= strdup(entry
->iface
->ifname
);
2442 this->lock
->unlock(this->lock
);
2443 DBG2(DBG_KNL
, "virtual IP %H installed on %s",
2444 virtual_ip
, ifname
);
2445 /* during IKEv1 reauthentication, children get moved from
2446 * old the new SA before the virtual IP is available. This
2447 * kills the route for our virtual IP, reinstall. */
2448 queue_route_reinstall(this, ifname
);
2451 this->lock
->unlock(this->lock
);
2453 DBG1(DBG_KNL
, "adding virtual IP %H failed", virtual_ip
);
2456 this->lock
->unlock(this->lock
);
2457 DBG1(DBG_KNL
, "no interface available, unable to install virtual IP %H",
2462 METHOD(kernel_net_t
, del_ip
, status_t
,
2463 private_kernel_netlink_net_t
*this, host_t
*virtual_ip
, int prefix
,
2466 addr_map_entry_t
*entry
, lookup
= {
2470 if (!this->install_virtual_ip
)
2471 { /* disabled by config */
2475 DBG2(DBG_KNL
, "deleting virtual IP %H", virtual_ip
);
2477 this->lock
->write_lock(this->lock
);
2478 entry
= this->vips
->get_match(this->vips
, &lookup
,
2479 (void*)addr_map_entry_match
);
2481 { /* we didn't install this IP as virtual IP */
2482 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
2483 (void*)addr_map_entry_match
);
2486 DBG2(DBG_KNL
, "not deleting existing IP %H on %s", virtual_ip
,
2487 entry
->iface
->ifname
);
2488 this->lock
->unlock(this->lock
);
2491 DBG2(DBG_KNL
, "virtual IP %H not cached, unable to delete", virtual_ip
);
2492 this->lock
->unlock(this->lock
);
2495 if (entry
->addr
->refcount
== 1)
2500 /* we set this flag so that threads calling add_ip will block and wait
2501 * until the entry is gone, also so we can wait below */
2502 entry
->addr
->installed
= FALSE
;
2503 ifi
= entry
->iface
->ifindex
;
2504 this->lock
->unlock(this->lock
);
2505 status
= manage_ipaddr(this, RTM_DELADDR
, 0, ifi
, virtual_ip
, prefix
);
2506 if (status
== SUCCESS
&& wait
)
2507 { /* wait until the address is really gone */
2508 this->lock
->write_lock(this->lock
);
2509 while (is_known_vip(this, virtual_ip
) &&
2510 lib
->watcher
->get_state(lib
->watcher
) != WATCHER_STOPPED
)
2511 { /* don't wait during deinit when we can't get notified,
2512 * re-evaluate watcher state if we have to wait longer */
2513 this->condvar
->timed_wait(this->condvar
, this->lock
, 1000);
2515 this->lock
->unlock(this->lock
);
2521 entry
->addr
->refcount
--;
2523 DBG2(DBG_KNL
, "virtual IP %H used by other SAs, not deleting",
2525 this->lock
->unlock(this->lock
);
2530 * Manages source routes in the routing table.
2531 * By setting the appropriate nlmsg_type, the route gets added or removed.
2533 static status_t
manage_srcroute(private_kernel_netlink_net_t
*this,
2534 int nlmsg_type
, int flags
, chunk_t dst_net
,
2535 uint8_t prefixlen
, host_t
*gateway
,
2536 host_t
*src_ip
, char *if_name
, bool pass
)
2538 netlink_buf_t request
;
2539 struct nlmsghdr
*hdr
;
2545 /* if route is 0.0.0.0/0, we can't install it, as it would
2546 * overwrite the default route. Instead, we add two routes:
2547 * 0.0.0.0/1 and 128.0.0.0/1 */
2548 if (this->routing_table
== 0 && prefixlen
== 0)
2551 uint8_t half_prefixlen
;
2554 half_net
= chunk_alloca(dst_net
.len
);
2555 memset(half_net
.ptr
, 0, half_net
.len
);
2557 /* no throw routes in the main table */
2558 status
= manage_srcroute(this, nlmsg_type
, flags
, half_net
,
2559 half_prefixlen
, gateway
, src_ip
, if_name
, FALSE
);
2560 half_net
.ptr
[0] |= 0x80;
2561 status
|= manage_srcroute(this, nlmsg_type
, flags
, half_net
,
2562 half_prefixlen
, gateway
, src_ip
, if_name
, FALSE
);
2566 memset(&request
, 0, sizeof(request
));
2569 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
| flags
;
2570 hdr
->nlmsg_type
= nlmsg_type
;
2571 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
2573 msg
= NLMSG_DATA(hdr
);
2574 msg
->rtm_family
= (dst_net
.len
== 4) ? AF_INET
: AF_INET6
;
2575 msg
->rtm_dst_len
= prefixlen
;
2576 msg
->rtm_protocol
= RTPROT_STATIC
;
2577 msg
->rtm_type
= pass
? RTN_THROW
: RTN_UNICAST
;
2578 msg
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2580 if (this->routing_table
< 256)
2582 msg
->rtm_table
= this->routing_table
;
2586 #ifdef HAVE_RTA_TABLE
2587 chunk
= chunk_from_thing(this->routing_table
);
2588 netlink_add_attribute(hdr
, RTA_TABLE
, chunk
, sizeof(request
));
2590 DBG1(DBG_KNL
, "routing table IDs > 255 are not supported");
2592 #endif /* HAVE_RTA_TABLE */
2594 netlink_add_attribute(hdr
, RTA_DST
, dst_net
, sizeof(request
));
2596 /* only when installing regular routes do we need all the parameters,
2597 * deletes are done by destination net (except if metrics are used, which
2598 * we don't support), for throw routes we don't need any of them either */
2599 if (nlmsg_type
== RTM_NEWROUTE
&& !pass
)
2601 chunk
= src_ip
->get_address(src_ip
);
2602 netlink_add_attribute(hdr
, RTA_PREFSRC
, chunk
, sizeof(request
));
2603 if (gateway
&& gateway
->get_family(gateway
) == src_ip
->get_family(src_ip
))
2605 chunk
= gateway
->get_address(gateway
);
2606 netlink_add_attribute(hdr
, RTA_GATEWAY
, chunk
, sizeof(request
));
2608 ifindex
= get_interface_index(this, if_name
);
2609 chunk
.ptr
= (char*)&ifindex
;
2610 chunk
.len
= sizeof(ifindex
);
2611 netlink_add_attribute(hdr
, RTA_OIF
, chunk
, sizeof(request
));
2613 if (this->mtu
|| this->mss
)
2615 chunk
= chunk_alloca(RTA_LENGTH((sizeof(struct rtattr
) +
2616 sizeof(uint32_t)) * 2));
2618 rta
= (struct rtattr
*)chunk
.ptr
;
2621 rta
->rta_type
= RTAX_MTU
;
2622 rta
->rta_len
= RTA_LENGTH(sizeof(uint32_t));
2623 memcpy(RTA_DATA(rta
), &this->mtu
, sizeof(uint32_t));
2624 chunk
.len
= rta
->rta_len
;
2628 rta
= (struct rtattr
*)(chunk
.ptr
+ RTA_ALIGN(chunk
.len
));
2629 rta
->rta_type
= RTAX_ADVMSS
;
2630 rta
->rta_len
= RTA_LENGTH(sizeof(uint32_t));
2631 memcpy(RTA_DATA(rta
), &this->mss
, sizeof(uint32_t));
2632 chunk
.len
= RTA_ALIGN(chunk
.len
) + rta
->rta_len
;
2634 netlink_add_attribute(hdr
, RTA_METRICS
, chunk
, sizeof(request
));
2637 return this->socket
->send_ack(this->socket
, hdr
);
2641 * Helper struct used to check routes
2644 /** the entry we look for */
2645 route_entry_t route
;
2646 /** kernel interface */
2647 private_kernel_netlink_net_t
*this;
2648 } route_entry_lookup_t
;
2651 * Check if a matching route entry has a VIP associated
2653 static bool route_with_vip(route_entry_lookup_t
*a
, route_entry_t
*b
)
2655 if (chunk_equals(a
->route
.dst_net
, b
->dst_net
) &&
2656 a
->route
.prefixlen
== b
->prefixlen
&&
2657 is_known_vip(a
->this, b
->src_ip
))
2665 * Check if there is any route entry with a matching destination
2667 static bool route_with_dst(route_entry_lookup_t
*a
, route_entry_t
*b
)
2669 if (chunk_equals(a
->route
.dst_net
, b
->dst_net
) &&
2670 a
->route
.prefixlen
== b
->prefixlen
)
2677 METHOD(kernel_net_t
, add_route
, status_t
,
2678 private_kernel_netlink_net_t
*this, chunk_t dst_net
, uint8_t prefixlen
,
2679 host_t
*gateway
, host_t
*src_ip
, char *if_name
, bool pass
)
2682 route_entry_t
*found
;
2683 route_entry_lookup_t lookup
= {
2686 .prefixlen
= prefixlen
,
2695 if (!this->routing_table
)
2696 { /* treat these as regular routes if installing in the main table */
2697 pass
= lookup
.route
.pass
= FALSE
;
2700 this->routes_lock
->lock(this->routes_lock
);
2701 found
= this->routes
->ht
.get(&this->routes
->ht
, &lookup
.route
);
2704 this->routes_lock
->unlock(this->routes_lock
);
2705 return ALREADY_DONE
;
2708 /* don't replace the route if we already have one with a VIP installed,
2709 * but keep track of it in case that other route is uninstalled */
2710 this->lock
->read_lock(this->lock
);
2711 if (!is_known_vip(this, src_ip
))
2713 found
= this->routes
->get_match(this->routes
, &lookup
,
2714 (void*)route_with_vip
);
2716 this->lock
->unlock(this->lock
);
2723 status
= manage_srcroute(this, RTM_NEWROUTE
, NLM_F_CREATE
|NLM_F_REPLACE
,
2724 dst_net
, prefixlen
, gateway
, src_ip
, if_name
,
2727 if (status
== SUCCESS
)
2729 found
= route_entry_clone(&lookup
.route
);
2730 this->routes
->ht
.put(&this->routes
->ht
, found
, found
);
2732 this->routes_lock
->unlock(this->routes_lock
);
2736 METHOD(kernel_net_t
, del_route
, status_t
,
2737 private_kernel_netlink_net_t
*this, chunk_t dst_net
, uint8_t prefixlen
,
2738 host_t
*gateway
, host_t
*src_ip
, char *if_name
, bool pass
)
2741 route_entry_t
*found
;
2742 route_entry_lookup_t lookup
= {
2745 .prefixlen
= prefixlen
,
2754 if (!this->routing_table
)
2755 { /* treat these as regular routes if installing in the main table */
2756 pass
= lookup
.route
.pass
= FALSE
;
2759 this->routes_lock
->lock(this->routes_lock
);
2760 found
= this->routes
->ht
.remove(&this->routes
->ht
, &lookup
.route
);
2763 this->routes_lock
->unlock(this->routes_lock
);
2766 route_entry_destroy(found
);
2768 /* check if there are any other routes for the same destination and if
2769 * so update the route, otherwise uninstall it */
2770 this->lock
->read_lock(this->lock
);
2771 found
= this->routes
->get_match(this->routes
, &lookup
,
2772 (void*)route_with_vip
);
2773 this->lock
->unlock(this->lock
);
2776 found
= this->routes
->get_match(this->routes
, &lookup
,
2777 (void*)route_with_dst
);
2781 status
= manage_srcroute(this, RTM_NEWROUTE
, NLM_F_CREATE
|NLM_F_REPLACE
,
2782 found
->dst_net
, found
->prefixlen
, found
->gateway
,
2783 found
->src_ip
, found
->if_name
, found
->pass
);
2787 status
= manage_srcroute(this, RTM_DELROUTE
, 0, dst_net
, prefixlen
,
2788 gateway
, src_ip
, if_name
, pass
);
2790 this->routes_lock
->unlock(this->routes_lock
);
2795 * Initialize a list of local addresses.
2797 static status_t
init_address_list(private_kernel_netlink_net_t
*this)
2799 netlink_buf_t request
;
2800 struct nlmsghdr
*out
, *current
, *in
;
2801 struct rtgenmsg
*msg
;
2803 enumerator_t
*ifaces
, *addrs
;
2804 iface_entry_t
*iface
;
2807 DBG2(DBG_KNL
, "known interfaces and IP addresses:");
2809 memset(&request
, 0, sizeof(request
));
2812 in
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtgenmsg
));
2813 in
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_MATCH
| NLM_F_ROOT
;
2814 msg
= NLMSG_DATA(in
);
2815 msg
->rtgen_family
= AF_UNSPEC
;
2818 in
->nlmsg_type
= RTM_GETLINK
;
2819 if (this->socket
->send(this->socket
, in
, &out
, &len
) != SUCCESS
)
2824 while (NLMSG_OK(current
, len
))
2826 switch (current
->nlmsg_type
)
2831 process_link(this, current
, FALSE
);
2834 current
= NLMSG_NEXT(current
, len
);
2841 /* get all interface addresses */
2842 in
->nlmsg_type
= RTM_GETADDR
;
2843 if (this->socket
->send(this->socket
, in
, &out
, &len
) != SUCCESS
)
2848 while (NLMSG_OK(current
, len
))
2850 switch (current
->nlmsg_type
)
2855 process_addr(this, current
, FALSE
);
2858 current
= NLMSG_NEXT(current
, len
);
2865 this->lock
->read_lock(this->lock
);
2866 ifaces
= this->ifaces
->create_enumerator(this->ifaces
);
2867 while (ifaces
->enumerate(ifaces
, &iface
))
2869 if (iface_entry_up_and_usable(iface
))
2871 DBG2(DBG_KNL
, " %s", iface
->ifname
);
2872 addrs
= iface
->addrs
->create_enumerator(iface
->addrs
);
2873 while (addrs
->enumerate(addrs
, (void**)&addr
))
2875 DBG2(DBG_KNL
, " %H", addr
->ip
);
2877 addrs
->destroy(addrs
);
2881 DBG3(DBG_KNL
, " %s (ignored, %s)", iface
->ifname
,
2882 iface
->usable
? "down" : "configuration");
2885 ifaces
->destroy(ifaces
);
2886 this->lock
->unlock(this->lock
);
2891 * create or delete a rule to use our routing table
2893 static status_t
manage_rule(private_kernel_netlink_net_t
*this, int nlmsg_type
,
2894 int family
, uint32_t table
, uint32_t prio
)
2896 netlink_buf_t request
;
2897 struct nlmsghdr
*hdr
;
2902 memset(&request
, 0, sizeof(request
));
2904 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
2905 hdr
->nlmsg_type
= nlmsg_type
;
2906 if (nlmsg_type
== RTM_NEWRULE
)
2908 hdr
->nlmsg_flags
|= NLM_F_CREATE
| NLM_F_EXCL
;
2910 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
2912 msg
= NLMSG_DATA(hdr
);
2913 msg
->rtm_family
= family
;
2914 msg
->rtm_protocol
= RTPROT_BOOT
;
2915 msg
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2916 msg
->rtm_type
= RTN_UNICAST
;
2918 if (this->routing_table
< 256)
2920 msg
->rtm_table
= table
;
2924 #ifdef HAVE_LINUX_FIB_RULES_H
2925 chunk
= chunk_from_thing(table
);
2926 netlink_add_attribute(hdr
, FRA_TABLE
, chunk
, sizeof(request
));
2928 DBG1(DBG_KNL
, "routing table IDs > 255 are not supported");
2930 #endif /* HAVE_LINUX_FIB_RULES_H */
2932 chunk
= chunk_from_thing(prio
);
2933 netlink_add_attribute(hdr
, RTA_PRIORITY
, chunk
, sizeof(request
));
2935 fwmark
= lib
->settings
->get_str(lib
->settings
,
2936 "%s.plugins.kernel-netlink.fwmark", NULL
, lib
->ns
);
2939 #ifdef HAVE_LINUX_FIB_RULES_H
2942 if (fwmark
[0] == '!')
2944 msg
->rtm_flags
|= FIB_RULE_INVERT
;
2947 if (mark_from_string(fwmark
, MARK_OP_NONE
, &mark
))
2949 chunk
= chunk_from_thing(mark
.value
);
2950 netlink_add_attribute(hdr
, FRA_FWMARK
, chunk
, sizeof(request
));
2951 chunk
= chunk_from_thing(mark
.mask
);
2952 netlink_add_attribute(hdr
, FRA_FWMASK
, chunk
, sizeof(request
));
2953 if (msg
->rtm_flags
& FIB_RULE_INVERT
)
2955 this->routing_mark
= mark
;
2959 DBG1(DBG_KNL
, "setting firewall mark on routing rule is not supported");
2960 #endif /* HAVE_LINUX_FIB_RULES_H */
2962 return this->socket
->send_ack(this->socket
, hdr
);
2966 * check for kernel features (currently only via version number)
2968 static void check_kernel_features(private_kernel_netlink_net_t
*this)
2970 struct utsname utsname
;
2973 if (uname(&utsname
) == 0)
2975 switch(sscanf(utsname
.release
, "%d.%d.%d", &a
, &b
, &c
))
2980 if (b
== 6 && c
>= 36)
2982 this->rta_mark
= TRUE
;
2984 DBG2(DBG_KNL
, "detected Linux %d.%d.%d, no support for "
2985 "RTA_PREFSRC for IPv6 routes", a
, b
, c
);
2990 /* only 3.x+ uses two part version numbers */
2991 this->rta_prefsrc_for_ipv6
= TRUE
;
2992 this->rta_mark
= TRUE
;
3001 * Destroy an address to iface map
3003 static void addr_map_destroy(hashlist_t
*map
)
3005 map
->ht
.destroy_function(&map
->ht
, (void*)free
);
3008 METHOD(kernel_net_t
, destroy
, void,
3009 private_kernel_netlink_net_t
*this)
3011 enumerator_t
*enumerator
;
3012 route_entry_t
*route
;
3014 if (this->routing_table
&& this->socket
)
3016 manage_rule(this, RTM_DELRULE
, AF_INET
, this->routing_table
,
3017 this->routing_table_prio
);
3018 manage_rule(this, RTM_DELRULE
, AF_INET6
, this->routing_table
,
3019 this->routing_table_prio
);
3021 DESTROY_IF(this->socket_events
);
3022 enumerator
= this->routes
->ht
.create_enumerator(&this->routes
->ht
);
3023 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&route
))
3025 manage_srcroute(this, RTM_DELROUTE
, 0, route
->dst_net
, route
->prefixlen
,
3026 route
->gateway
, route
->src_ip
, route
->if_name
,
3028 route_entry_destroy(route
);
3030 enumerator
->destroy(enumerator
);
3031 this->routes
->destroy(this->routes
);
3032 this->routes_lock
->destroy(this->routes_lock
);
3033 DESTROY_IF(this->socket
);
3035 net_changes_clear(this);
3036 this->net_changes
->destroy(this->net_changes
);
3037 this->net_changes_lock
->destroy(this->net_changes_lock
);
3039 addr_map_destroy(this->addrs
);
3040 addr_map_destroy(this->vips
);
3042 this->ifaces
->destroy_function(this->ifaces
, (void*)iface_entry_destroy
);
3043 this->rt_exclude
->destroy(this->rt_exclude
);
3044 this->roam_lock
->destroy(this->roam_lock
);
3045 this->condvar
->destroy(this->condvar
);
3046 this->lock
->destroy(this->lock
);
3051 * Described in header.
3053 kernel_netlink_net_t
*kernel_netlink_net_create()
3055 private_kernel_netlink_net_t
*this;
3056 enumerator_t
*enumerator
;
3063 .get_interface
= _get_interface_name
,
3064 .create_address_enumerator
= _create_address_enumerator
,
3065 .create_local_subnet_enumerator
= _create_local_subnet_enumerator
,
3066 .get_source_addr
= _get_source_addr
,
3067 .get_nexthop
= _get_nexthop
,
3070 .add_route
= _add_route
,
3071 .del_route
= _del_route
,
3072 .destroy
= _destroy
,
3075 .socket
= netlink_socket_create(NETLINK_ROUTE
, rt_msg_names
,
3076 lib
->settings
->get_bool(lib
->settings
,
3077 "%s.plugins.kernel-netlink.parallel_route", FALSE
, lib
->ns
)),
3078 .rt_exclude
= linked_list_create(),
3079 .routes
= hashlist_create((hashtable_hash_t
)route_entry_hash
,
3080 (hashtable_equals_t
)route_entry_equals
, 16),
3081 .net_changes
= hashtable_create(
3082 (hashtable_hash_t
)net_change_hash
,
3083 (hashtable_equals_t
)net_change_equals
, 16),
3084 .addrs
= hashlist_create(
3085 (hashtable_hash_t
)addr_map_entry_hash
,
3086 (hashtable_equals_t
)addr_map_entry_equals
, 16),
3087 .vips
= hashlist_create((hashtable_hash_t
)addr_map_entry_hash
,
3088 (hashtable_equals_t
)addr_map_entry_equals
, 16),
3089 .routes_lock
= mutex_create(MUTEX_TYPE_DEFAULT
),
3090 .net_changes_lock
= mutex_create(MUTEX_TYPE_DEFAULT
),
3091 .ifaces
= linked_list_create(),
3092 .lock
= rwlock_create(RWLOCK_TYPE_DEFAULT
),
3093 .condvar
= rwlock_condvar_create(),
3094 .roam_lock
= spinlock_create(),
3095 .routing_table
= lib
->settings
->get_int(lib
->settings
,
3096 "%s.routing_table", ROUTING_TABLE
, lib
->ns
),
3097 .routing_table_prio
= lib
->settings
->get_int(lib
->settings
,
3098 "%s.routing_table_prio", ROUTING_TABLE_PRIO
, lib
->ns
),
3099 .process_route
= lib
->settings
->get_bool(lib
->settings
,
3100 "%s.process_route", TRUE
, lib
->ns
),
3101 .install_routes
= lib
->settings
->get_bool(lib
->settings
,
3102 "%s.install_routes", TRUE
, lib
->ns
),
3103 .install_virtual_ip
= lib
->settings
->get_bool(lib
->settings
,
3104 "%s.install_virtual_ip", TRUE
, lib
->ns
),
3105 .install_virtual_ip_on
= lib
->settings
->get_str(lib
->settings
,
3106 "%s.install_virtual_ip_on", NULL
, lib
->ns
),
3107 .prefer_temporary_addrs
= lib
->settings
->get_bool(lib
->settings
,
3108 "%s.prefer_temporary_addrs", FALSE
, lib
->ns
),
3109 .roam_events
= lib
->settings
->get_bool(lib
->settings
,
3110 "%s.plugins.kernel-netlink.roam_events", TRUE
, lib
->ns
),
3111 .process_rules
= lib
->settings
->get_bool(lib
->settings
,
3112 "%s.plugins.kernel-netlink.process_rules", FALSE
, lib
->ns
),
3113 .mtu
= lib
->settings
->get_int(lib
->settings
,
3114 "%s.plugins.kernel-netlink.mtu", 0, lib
->ns
),
3115 .mss
= lib
->settings
->get_int(lib
->settings
,
3116 "%s.plugins.kernel-netlink.mss", 0, lib
->ns
),
3118 timerclear(&this->last_route_reinstall
);
3119 timerclear(&this->next_roam
);
3121 check_kernel_features(this);
3129 exclude
= lib
->settings
->get_str(lib
->settings
,
3130 "%s.ignore_routing_tables", NULL
, lib
->ns
);
3136 enumerator
= enumerator_create_token(exclude
, " ", " ");
3137 while (enumerator
->enumerate(enumerator
, &token
))
3140 table
= strtoul(token
, NULL
, 10);
3144 this->rt_exclude
->insert_last(this->rt_exclude
, (void*)table
);
3147 enumerator
->destroy(enumerator
);
3150 groups
= nl_group(RTNLGRP_IPV4_IFADDR
) |
3151 nl_group(RTNLGRP_IPV6_IFADDR
) |
3152 nl_group(RTNLGRP_LINK
);
3153 if (this->process_route
)
3155 groups
|= nl_group(RTNLGRP_IPV4_ROUTE
) |
3156 nl_group(RTNLGRP_IPV6_ROUTE
);
3158 if (this->process_rules
)
3160 groups
|= nl_group(RTNLGRP_IPV4_RULE
) |
3161 nl_group(RTNLGRP_IPV6_RULE
);
3163 this->socket_events
= netlink_event_socket_create(NETLINK_ROUTE
, groups
,
3164 receive_events
, this);
3165 if (!this->socket_events
)
3171 if (init_address_list(this) != SUCCESS
)
3173 DBG1(DBG_KNL
, "unable to get interface list");
3178 if (this->routing_table
)
3180 if (manage_rule(this, RTM_NEWRULE
, AF_INET
, this->routing_table
,
3181 this->routing_table_prio
) != SUCCESS
)
3183 DBG1(DBG_KNL
, "unable to create IPv4 routing table rule");
3185 if (manage_rule(this, RTM_NEWRULE
, AF_INET6
, this->routing_table
,
3186 this->routing_table_prio
) != SUCCESS
)
3188 DBG1(DBG_KNL
, "unable to create IPv6 routing table rule");
3192 return &this->public;