2 * Copyright (C) 2008-2013 Tobias Brunner
3 * Copyright (C) 2005-2008 Martin Willi
4 * Hochschule fuer Technik Rapperswil
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2 of the License, or (at your
9 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 * Copyright (C) 2010 secunet Security Networks AG
19 * Copyright (C) 2010 Thomas Egerer
21 * Permission is hereby granted, free of charge, to any person obtaining a copy
22 * of this software and associated documentation files (the "Software"), to deal
23 * in the Software without restriction, including without limitation the rights
24 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25 * copies of the Software, and to permit persons to whom the Software is
26 * furnished to do so, subject to the following conditions:
28 * The above copyright notice and this permission notice shall be included in
29 * all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
40 #include <sys/socket.h>
41 #include <sys/utsname.h>
42 #include <linux/netlink.h>
43 #include <linux/rtnetlink.h>
47 #ifdef HAVE_LINUX_FIB_RULES_H
48 #include <linux/fib_rules.h>
51 #include "kernel_netlink_net.h"
52 #include "kernel_netlink_shared.h"
55 #include <utils/debug.h>
56 #include <threading/mutex.h>
57 #include <threading/rwlock.h>
58 #include <threading/rwlock_condvar.h>
59 #include <threading/spinlock.h>
60 #include <collections/hashtable.h>
61 #include <collections/linked_list.h>
62 #include <processing/jobs/callback_job.h>
64 /** delay before firing roam events (ms) */
65 #define ROAM_DELAY 100
67 /** delay before reinstalling routes (ms) */
68 #define ROUTE_DELAY 100
70 /** maximum recursion when searching for addresses in get_route() */
71 #define MAX_ROUTE_RECURSION 2
74 #define ROUTING_TABLE 0
77 #ifndef ROUTING_TABLE_PRIO
78 #define ROUTING_TABLE_PRIO 0
81 typedef struct addr_entry_t addr_entry_t
;
84 * IP address in an iface_entry_t
91 /** scope of the address */
94 /** number of times this IP is used, if virtual (i.e. managed by us) */
97 /** TRUE once it is installed, if virtual */
102 * destroy a addr_entry_t object
104 static void addr_entry_destroy(addr_entry_t
*this)
106 this->ip
->destroy(this->ip
);
110 typedef struct iface_entry_t iface_entry_t
;
113 * A network interface on this system, containing addr_entry_t's
115 struct iface_entry_t
{
117 /** interface index */
120 /** name of the interface */
121 char ifname
[IFNAMSIZ
];
123 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
126 /** list of addresses as host_t */
127 linked_list_t
*addrs
;
129 /** TRUE if usable by config */
134 * destroy an interface entry
136 static void iface_entry_destroy(iface_entry_t
*this)
138 this->addrs
->destroy_function(this->addrs
, (void*)addr_entry_destroy
);
143 * find an interface entry by index
145 static bool iface_entry_by_index(iface_entry_t
*this, int *ifindex
)
147 return this->ifindex
== *ifindex
;
151 * find an interface entry by name
153 static bool iface_entry_by_name(iface_entry_t
*this, char *ifname
)
155 return streq(this->ifname
, ifname
);
159 * check if an interface is up
161 static inline bool iface_entry_up(iface_entry_t
*iface
)
163 return (iface
->flags
& IFF_UP
) == IFF_UP
;
167 * check if an interface is up and usable
169 static inline bool iface_entry_up_and_usable(iface_entry_t
*iface
)
171 return iface
->usable
&& iface_entry_up(iface
);
174 typedef struct addr_map_entry_t addr_map_entry_t
;
177 * Entry that maps an IP address to an interface entry
179 struct addr_map_entry_t
{
180 /** The IP address */
183 /** The address entry for this IP address */
186 /** The interface this address is installed on */
187 iface_entry_t
*iface
;
191 * Hash a addr_map_entry_t object, all entries with the same IP address
192 * are stored in the same bucket
194 static u_int
addr_map_entry_hash(addr_map_entry_t
*this)
196 return chunk_hash(this->ip
->get_address(this->ip
));
200 * Compare two addr_map_entry_t objects, two entries are equal if they are
201 * installed on the same interface
203 static bool addr_map_entry_equals(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
205 return a
->iface
->ifindex
== b
->iface
->ifindex
&&
206 a
->ip
->ip_equals(a
->ip
, b
->ip
);
210 * Used with get_match this finds an address entry if it is installed on
211 * an up and usable interface
213 static bool addr_map_entry_match_up_and_usable(addr_map_entry_t
*a
,
216 return iface_entry_up_and_usable(b
->iface
) &&
217 a
->ip
->ip_equals(a
->ip
, b
->ip
);
221 * Used with get_match this finds an address entry if it is installed on
222 * any active local interface
224 static bool addr_map_entry_match_up(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
226 return iface_entry_up(b
->iface
) && a
->ip
->ip_equals(a
->ip
, b
->ip
);
230 * Used with get_match this finds an address entry if it is installed on
231 * any local interface
233 static bool addr_map_entry_match(addr_map_entry_t
*a
, addr_map_entry_t
*b
)
235 return a
->ip
->ip_equals(a
->ip
, b
->ip
);
238 typedef struct route_entry_t route_entry_t
;
241 * Installed routing entry
243 struct route_entry_t
{
244 /** Name of the interface the route is bound to */
247 /** Source ip of the route */
250 /** Gateway for this route */
253 /** Destination net */
256 /** Destination net prefixlen */
261 * Clone a route_entry_t object.
263 static route_entry_t
*route_entry_clone(route_entry_t
*this)
265 route_entry_t
*route
;
268 .if_name
= strdup(this->if_name
),
269 .src_ip
= this->src_ip
->clone(this->src_ip
),
270 .gateway
= this->gateway
? this->gateway
->clone(this->gateway
) : NULL
,
271 .dst_net
= chunk_clone(this->dst_net
),
272 .prefixlen
= this->prefixlen
,
278 * Destroy a route_entry_t object
280 static void route_entry_destroy(route_entry_t
*this)
283 DESTROY_IF(this->src_ip
);
284 DESTROY_IF(this->gateway
);
285 chunk_free(&this->dst_net
);
290 * Hash a route_entry_t object
292 static u_int
route_entry_hash(route_entry_t
*this)
294 return chunk_hash_inc(chunk_from_thing(this->prefixlen
),
295 chunk_hash(this->dst_net
));
299 * Compare two route_entry_t objects
301 static bool route_entry_equals(route_entry_t
*a
, route_entry_t
*b
)
303 if (a
->if_name
&& b
->if_name
&& streq(a
->if_name
, b
->if_name
) &&
304 a
->src_ip
->ip_equals(a
->src_ip
, b
->src_ip
) &&
305 chunk_equals(a
->dst_net
, b
->dst_net
) && a
->prefixlen
== b
->prefixlen
)
307 return (!a
->gateway
&& !b
->gateway
) || (a
->gateway
&& b
->gateway
&&
308 a
->gateway
->ip_equals(a
->gateway
, b
->gateway
));
313 typedef struct net_change_t net_change_t
;
316 * Queued network changes
318 struct net_change_t
{
319 /** Name of the interface that got activated (or an IP appeared on) */
324 * Destroy a net_change_t object
326 static void net_change_destroy(net_change_t
*this)
333 * Hash a net_change_t object
335 static u_int
net_change_hash(net_change_t
*this)
337 return chunk_hash(chunk_create(this->if_name
, strlen(this->if_name
)));
341 * Compare two net_change_t objects
343 static bool net_change_equals(net_change_t
*a
, net_change_t
*b
)
345 return streq(a
->if_name
, b
->if_name
);
348 typedef struct private_kernel_netlink_net_t private_kernel_netlink_net_t
;
351 * Private variables and functions of kernel_netlink_net class.
353 struct private_kernel_netlink_net_t
{
355 * Public part of the kernel_netlink_net_t object.
357 kernel_netlink_net_t
public;
360 * lock to access various lists and maps
365 * condition variable to signal virtual IP add/removal
367 rwlock_condvar_t
*condvar
;
370 * Cached list of interfaces and its addresses (iface_entry_t)
372 linked_list_t
*ifaces
;
375 * Map for IP addresses to iface_entry_t objects (addr_map_entry_t)
380 * Map for virtual IP addresses to iface_entry_t objects (addr_map_entry_t)
385 * netlink rt socket (routing)
387 netlink_socket_t
*socket
;
390 * Netlink rt socket to receive address change events
395 * earliest time of the next roam event
400 * roam event due to address change
405 * lock to check and update roam event time
407 spinlock_t
*roam_lock
;
410 * routing table to install routes
415 * priority of used routing table
417 int routing_table_prio
;
427 mutex_t
*routes_lock
;
430 * interface changes which may trigger route reinstallation
432 hashtable_t
*net_changes
;
435 * mutex for route reinstallation triggers
437 mutex_t
*net_changes_lock
;
440 * time of last route reinstallation
442 timeval_t last_route_reinstall
;
445 * whether to react to RTM_NEWROUTE or RTM_DELROUTE events
450 * whether to trigger roam events
455 * whether to actually install virtual IPs
457 bool install_virtual_ip
;
460 * the name of the interface virtual IP addresses are installed on
462 char *install_virtual_ip_on
;
465 * whether preferred source addresses can be specified for IPv6 routes
467 bool rta_prefsrc_for_ipv6
;
470 * list with routing tables to be excluded from route lookup
472 linked_list_t
*rt_exclude
;
476 * Forward declaration
478 static status_t
manage_srcroute(private_kernel_netlink_net_t
*this,
479 int nlmsg_type
, int flags
, chunk_t dst_net
,
480 u_int8_t prefixlen
, host_t
*gateway
,
481 host_t
*src_ip
, char *if_name
);
484 * Clear the queued network changes.
486 static void net_changes_clear(private_kernel_netlink_net_t
*this)
488 enumerator_t
*enumerator
;
489 net_change_t
*change
;
491 enumerator
= this->net_changes
->create_enumerator(this->net_changes
);
492 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&change
))
494 this->net_changes
->remove_at(this->net_changes
, enumerator
);
495 net_change_destroy(change
);
497 enumerator
->destroy(enumerator
);
501 * Act upon queued network changes.
503 static job_requeue_t
reinstall_routes(private_kernel_netlink_net_t
*this)
505 enumerator_t
*enumerator
;
506 route_entry_t
*route
;
508 this->net_changes_lock
->lock(this->net_changes_lock
);
509 this->routes_lock
->lock(this->routes_lock
);
511 enumerator
= this->routes
->create_enumerator(this->routes
);
512 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&route
))
514 net_change_t
*change
, lookup
= {
515 .if_name
= route
->if_name
,
517 /* check if a change for the outgoing interface is queued */
518 change
= this->net_changes
->get(this->net_changes
, &lookup
);
520 { /* in case src_ip is not on the outgoing interface */
521 if (this->public.interface
.get_interface(&this->public.interface
,
522 route
->src_ip
, &lookup
.if_name
))
524 if (!streq(lookup
.if_name
, route
->if_name
))
526 change
= this->net_changes
->get(this->net_changes
, &lookup
);
528 free(lookup
.if_name
);
533 manage_srcroute(this, RTM_NEWROUTE
, NLM_F_CREATE
| NLM_F_EXCL
,
534 route
->dst_net
, route
->prefixlen
, route
->gateway
,
535 route
->src_ip
, route
->if_name
);
538 enumerator
->destroy(enumerator
);
539 this->routes_lock
->unlock(this->routes_lock
);
541 net_changes_clear(this);
542 this->net_changes_lock
->unlock(this->net_changes_lock
);
543 return JOB_REQUEUE_NONE
;
547 * Queue route reinstallation caused by network changes for a given interface.
549 * The route reinstallation is delayed for a while and only done once for
550 * several calls during this delay, in order to avoid doing it too often.
551 * The interface name is freed.
553 static void queue_route_reinstall(private_kernel_netlink_net_t
*this,
556 net_change_t
*update
, *found
;
564 this->net_changes_lock
->lock(this->net_changes_lock
);
565 found
= this->net_changes
->put(this->net_changes
, update
, update
);
568 net_change_destroy(found
);
570 time_monotonic(&now
);
571 if (timercmp(&now
, &this->last_route_reinstall
, >))
573 timeval_add_ms(&now
, ROUTE_DELAY
);
574 this->last_route_reinstall
= now
;
576 job
= (job_t
*)callback_job_create((callback_job_cb_t
)reinstall_routes
,
578 lib
->scheduler
->schedule_job_ms(lib
->scheduler
, job
, ROUTE_DELAY
);
580 this->net_changes_lock
->unlock(this->net_changes_lock
);
584 * check if the given IP is known as virtual IP and currently installed
586 * this function will also return TRUE if the virtual IP entry disappeared.
587 * in that case the returned entry will be NULL.
589 * this->lock must be held when calling this function
591 static bool is_vip_installed_or_gone(private_kernel_netlink_net_t
*this,
592 host_t
*ip
, addr_map_entry_t
**entry
)
594 addr_map_entry_t lookup
= {
598 *entry
= this->vips
->get_match(this->vips
, &lookup
,
599 (void*)addr_map_entry_match
);
601 { /* the virtual IP disappeared */
604 return (*entry
)->addr
->installed
;
608 * check if the given IP is known as virtual IP
610 * this->lock must be held when calling this function
612 static bool is_known_vip(private_kernel_netlink_net_t
*this, host_t
*ip
)
614 addr_map_entry_t lookup
= {
618 return this->vips
->get_match(this->vips
, &lookup
,
619 (void*)addr_map_entry_match
) != NULL
;
623 * Add an address map entry
625 static void addr_map_entry_add(hashtable_t
*map
, addr_entry_t
*addr
,
626 iface_entry_t
*iface
)
628 addr_map_entry_t
*entry
;
635 entry
= map
->put(map
, entry
, entry
);
640 * Remove an address map entry
642 static void addr_map_entry_remove(hashtable_t
*map
, addr_entry_t
*addr
,
643 iface_entry_t
*iface
)
645 addr_map_entry_t
*entry
, lookup
= {
651 entry
= map
->remove(map
, &lookup
);
656 * get the first non-virtual ip address on the given interface.
657 * if a candidate address is given, we first search for that address and if not
658 * found return the address as above.
659 * returned host is a clone, has to be freed by caller.
661 * this->lock must be held when calling this function
663 static host_t
*get_interface_address(private_kernel_netlink_net_t
*this,
664 int ifindex
, int family
, host_t
*candidate
)
666 iface_entry_t
*iface
;
671 if (this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_index
,
672 (void**)&iface
, &ifindex
) == SUCCESS
)
675 { /* only use interfaces not excluded by config */
676 addrs
= iface
->addrs
->create_enumerator(iface
->addrs
);
677 while (addrs
->enumerate(addrs
, &addr
))
680 { /* ignore virtual IP addresses */
683 if (addr
->ip
->get_family(addr
->ip
) == family
)
685 if (!candidate
|| candidate
->ip_equals(candidate
, addr
->ip
))
686 { /* stop at the first address if we don't search for a
687 * candidate or if the candidate matches */
692 { /* store the first address as fallback if candidate is
698 addrs
->destroy(addrs
);
701 return ip
? ip
->clone(ip
) : NULL
;
705 * callback function that raises the delayed roam event
707 static job_requeue_t
roam_event(private_kernel_netlink_net_t
*this)
711 this->roam_lock
->lock(this->roam_lock
);
712 address
= this->roam_address
;
713 this->roam_address
= FALSE
;
714 this->roam_lock
->unlock(this->roam_lock
);
715 hydra
->kernel_interface
->roam(hydra
->kernel_interface
, address
);
716 return JOB_REQUEUE_NONE
;
720 * fire a roaming event. we delay it for a bit and fire only one event
721 * for multiple calls. otherwise we would create too many events.
723 static void fire_roam_event(private_kernel_netlink_net_t
*this, bool address
)
728 if (!this->roam_events
)
733 time_monotonic(&now
);
734 this->roam_lock
->lock(this->roam_lock
);
735 this->roam_address
|= address
;
736 if (!timercmp(&now
, &this->next_roam
, >))
738 this->roam_lock
->unlock(this->roam_lock
);
741 timeval_add_ms(&now
, ROAM_DELAY
);
742 this->next_roam
= now
;
743 this->roam_lock
->unlock(this->roam_lock
);
745 job
= (job_t
*)callback_job_create((callback_job_cb_t
)roam_event
,
747 lib
->scheduler
->schedule_job_ms(lib
->scheduler
, job
, ROAM_DELAY
);
751 * check if an interface with a given index is up and usable
753 * this->lock must be locked when calling this function
755 static bool is_interface_up_and_usable(private_kernel_netlink_net_t
*this,
758 iface_entry_t
*iface
;
760 if (this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_index
,
761 (void**)&iface
, &index
) == SUCCESS
)
763 return iface_entry_up_and_usable(iface
);
769 * unregister the current addr_entry_t from the hashtable it is stored in
771 * this->lock must be locked when calling this function
773 static void addr_entry_unregister(addr_entry_t
*addr
, iface_entry_t
*iface
,
774 private_kernel_netlink_net_t
*this)
778 addr_map_entry_remove(this->vips
, addr
, iface
);
779 this->condvar
->broadcast(this->condvar
);
782 addr_map_entry_remove(this->addrs
, addr
, iface
);
786 * process RTM_NEWLINK/RTM_DELLINK from kernel
788 static void process_link(private_kernel_netlink_net_t
*this,
789 struct nlmsghdr
*hdr
, bool event
)
791 struct ifinfomsg
* msg
= (struct ifinfomsg
*)(NLMSG_DATA(hdr
));
792 struct rtattr
*rta
= IFLA_RTA(msg
);
793 size_t rtasize
= IFLA_PAYLOAD (hdr
);
794 enumerator_t
*enumerator
;
795 iface_entry_t
*current
, *entry
= NULL
;
797 bool update
= FALSE
, update_routes
= FALSE
;
799 while (RTA_OK(rta
, rtasize
))
801 switch (rta
->rta_type
)
804 name
= RTA_DATA(rta
);
807 rta
= RTA_NEXT(rta
, rtasize
);
814 this->lock
->write_lock(this->lock
);
815 switch (hdr
->nlmsg_type
)
819 if (this->ifaces
->find_first(this->ifaces
,
820 (void*)iface_entry_by_index
, (void**)&entry
,
821 &msg
->ifi_index
) != SUCCESS
)
824 .ifindex
= msg
->ifi_index
,
825 .addrs
= linked_list_create(),
826 .usable
= hydra
->kernel_interface
->is_interface_usable(
827 hydra
->kernel_interface
, name
),
829 this->ifaces
->insert_last(this->ifaces
, entry
);
831 strncpy(entry
->ifname
, name
, IFNAMSIZ
);
832 entry
->ifname
[IFNAMSIZ
-1] = '\0';
833 if (event
&& entry
->usable
)
835 if (!(entry
->flags
& IFF_UP
) && (msg
->ifi_flags
& IFF_UP
))
837 update
= update_routes
= TRUE
;
838 DBG1(DBG_KNL
, "interface %s activated", name
);
840 if ((entry
->flags
& IFF_UP
) && !(msg
->ifi_flags
& IFF_UP
))
843 DBG1(DBG_KNL
, "interface %s deactivated", name
);
846 entry
->flags
= msg
->ifi_flags
;
851 enumerator
= this->ifaces
->create_enumerator(this->ifaces
);
852 while (enumerator
->enumerate(enumerator
, ¤t
))
854 if (current
->ifindex
== msg
->ifi_index
)
856 if (event
&& current
->usable
)
859 DBG1(DBG_KNL
, "interface %s deleted", current
->ifname
);
861 /* TODO: move virtual IPs installed on this interface to
862 * another interface? */
863 this->ifaces
->remove_at(this->ifaces
, enumerator
);
864 current
->addrs
->invoke_function(current
->addrs
,
865 (void*)addr_entry_unregister
, current
, this);
866 iface_entry_destroy(current
);
870 enumerator
->destroy(enumerator
);
874 this->lock
->unlock(this->lock
);
876 if (update_routes
&& event
)
878 queue_route_reinstall(this, strdup(name
));
883 fire_roam_event(this, TRUE
);
888 * process RTM_NEWADDR/RTM_DELADDR from kernel
890 static void process_addr(private_kernel_netlink_net_t
*this,
891 struct nlmsghdr
*hdr
, bool event
)
893 struct ifaddrmsg
* msg
= (struct ifaddrmsg
*)(NLMSG_DATA(hdr
));
894 struct rtattr
*rta
= IFA_RTA(msg
);
895 size_t rtasize
= IFA_PAYLOAD (hdr
);
897 iface_entry_t
*iface
;
898 chunk_t local
= chunk_empty
, address
= chunk_empty
;
899 char *route_ifname
= NULL
;
900 bool update
= FALSE
, found
= FALSE
, changed
= FALSE
;
902 while (RTA_OK(rta
, rtasize
))
904 switch (rta
->rta_type
)
907 local
.ptr
= RTA_DATA(rta
);
908 local
.len
= RTA_PAYLOAD(rta
);
911 address
.ptr
= RTA_DATA(rta
);
912 address
.len
= RTA_PAYLOAD(rta
);
915 rta
= RTA_NEXT(rta
, rtasize
);
918 /* For PPP interfaces, we need the IFA_LOCAL address,
919 * IFA_ADDRESS is the peers address. But IFA_LOCAL is
920 * not included in all cases (IPv6?), so fallback to IFA_ADDRESS. */
923 host
= host_create_from_chunk(msg
->ifa_family
, local
, 0);
925 else if (address
.ptr
)
927 host
= host_create_from_chunk(msg
->ifa_family
, address
, 0);
935 this->lock
->write_lock(this->lock
);
936 if (this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_index
,
937 (void**)&iface
, &msg
->ifa_index
) == SUCCESS
)
939 addr_map_entry_t
*entry
, lookup
= {
945 entry
= this->vips
->get(this->vips
, &lookup
);
948 if (hdr
->nlmsg_type
== RTM_NEWADDR
)
949 { /* mark as installed and signal waiting threads */
950 entry
->addr
->installed
= TRUE
;
953 { /* the address was already marked as uninstalled */
955 iface
->addrs
->remove(iface
->addrs
, addr
, NULL
);
956 addr_map_entry_remove(this->vips
, addr
, iface
);
957 addr_entry_destroy(addr
);
959 /* no roam events etc. for virtual IPs */
960 this->condvar
->broadcast(this->condvar
);
961 this->lock
->unlock(this->lock
);
965 entry
= this->addrs
->get(this->addrs
, &lookup
);
968 if (hdr
->nlmsg_type
== RTM_DELADDR
)
972 iface
->addrs
->remove(iface
->addrs
, addr
, NULL
);
976 DBG1(DBG_KNL
, "%H disappeared from %s", host
,
979 addr_map_entry_remove(this->addrs
, addr
, iface
);
980 addr_entry_destroy(addr
);
985 if (hdr
->nlmsg_type
== RTM_NEWADDR
)
989 route_ifname
= strdup(iface
->ifname
);
991 .ip
= host
->clone(host
),
992 .scope
= msg
->ifa_scope
,
994 iface
->addrs
->insert_last(iface
->addrs
, addr
);
995 addr_map_entry_add(this->addrs
, addr
, iface
);
996 if (event
&& iface
->usable
)
998 DBG1(DBG_KNL
, "%H appeared on %s", host
, iface
->ifname
);
1002 if (found
&& (iface
->flags
& IFF_UP
))
1007 { /* ignore events for interfaces excluded by config */
1008 update
= changed
= FALSE
;
1011 this->lock
->unlock(this->lock
);
1013 if (update
&& event
&& route_ifname
)
1015 queue_route_reinstall(this, route_ifname
);
1021 host
->destroy(host
);
1023 /* send an update to all IKE_SAs */
1024 if (update
&& event
&& changed
)
1026 fire_roam_event(this, TRUE
);
1031 * process RTM_NEWROUTE and RTM_DELROUTE from kernel
1033 static void process_route(private_kernel_netlink_net_t
*this, struct nlmsghdr
*hdr
)
1035 struct rtmsg
* msg
= (struct rtmsg
*)(NLMSG_DATA(hdr
));
1036 struct rtattr
*rta
= RTM_RTA(msg
);
1037 size_t rtasize
= RTM_PAYLOAD(hdr
);
1038 u_int32_t rta_oif
= 0;
1039 host_t
*host
= NULL
;
1041 /* ignore routes added by us or in the local routing table (local addrs) */
1042 if (msg
->rtm_table
&& (msg
->rtm_table
== this->routing_table
||
1043 msg
->rtm_table
== RT_TABLE_LOCAL
))
1047 else if (msg
->rtm_flags
& RTM_F_CLONED
)
1048 { /* ignore cached routes, seem to be created a lot for IPv6 */
1052 while (RTA_OK(rta
, rtasize
))
1054 switch (rta
->rta_type
)
1058 host
= host_create_from_chunk(msg
->rtm_family
,
1059 chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
)), 0);
1062 if (RTA_PAYLOAD(rta
) == sizeof(rta_oif
))
1064 rta_oif
= *(u_int32_t
*)RTA_DATA(rta
);
1068 rta
= RTA_NEXT(rta
, rtasize
);
1070 this->lock
->read_lock(this->lock
);
1071 if (rta_oif
&& !is_interface_up_and_usable(this, rta_oif
))
1072 { /* ignore route changes for interfaces that are ignored or down */
1073 this->lock
->unlock(this->lock
);
1077 if (!host
&& rta_oif
)
1079 host
= get_interface_address(this, rta_oif
, msg
->rtm_family
, NULL
);
1081 if (!host
|| is_known_vip(this, host
))
1082 { /* ignore routes added for virtual IPs */
1083 this->lock
->unlock(this->lock
);
1087 this->lock
->unlock(this->lock
);
1088 fire_roam_event(this, FALSE
);
1089 host
->destroy(host
);
1093 * Receives events from kernel
1095 static bool receive_events(private_kernel_netlink_net_t
*this, int fd
,
1096 watcher_event_t event
)
1098 char response
[1536];
1099 struct nlmsghdr
*hdr
= (struct nlmsghdr
*)response
;
1100 struct sockaddr_nl addr
;
1101 socklen_t addr_len
= sizeof(addr
);
1104 len
= recvfrom(this->socket_events
, response
, sizeof(response
),
1105 MSG_DONTWAIT
, (struct sockaddr
*)&addr
, &addr_len
);
1111 /* interrupted, try again */
1114 /* no data ready, select again */
1117 DBG1(DBG_KNL
, "unable to receive from rt event socket");
1123 if (addr
.nl_pid
!= 0)
1124 { /* not from kernel. not interested, try another one */
1128 while (NLMSG_OK(hdr
, len
))
1130 /* looks good so far, dispatch netlink message */
1131 switch (hdr
->nlmsg_type
)
1135 process_addr(this, hdr
, TRUE
);
1139 process_link(this, hdr
, TRUE
);
1143 if (this->process_route
)
1145 process_route(this, hdr
);
1151 hdr
= NLMSG_NEXT(hdr
, len
);
1156 /** enumerator over addresses */
1158 private_kernel_netlink_net_t
* this;
1159 /** which addresses to enumerate */
1160 kernel_address_type_t which
;
1161 } address_enumerator_t
;
1164 * cleanup function for address enumerator
1166 static void address_enumerator_destroy(address_enumerator_t
*data
)
1168 data
->this->lock
->unlock(data
->this->lock
);
1173 * filter for addresses
1175 static bool filter_addresses(address_enumerator_t
*data
,
1176 addr_entry_t
** in
, host_t
** out
)
1178 if (!(data
->which
& ADDR_TYPE_VIRTUAL
) && (*in
)->refcount
)
1179 { /* skip virtual interfaces added by us */
1182 if (!(data
->which
& ADDR_TYPE_REGULAR
) && !(*in
)->refcount
)
1183 { /* address is regular, but not requested */
1186 if ((*in
)->scope
>= RT_SCOPE_LINK
)
1187 { /* skip addresses with a unusable scope */
1195 * enumerator constructor for interfaces
1197 static enumerator_t
*create_iface_enumerator(iface_entry_t
*iface
,
1198 address_enumerator_t
*data
)
1200 return enumerator_create_filter(
1201 iface
->addrs
->create_enumerator(iface
->addrs
),
1202 (void*)filter_addresses
, data
, NULL
);
1206 * filter for interfaces
1208 static bool filter_interfaces(address_enumerator_t
*data
, iface_entry_t
** in
,
1209 iface_entry_t
** out
)
1211 if (!(data
->which
& ADDR_TYPE_IGNORED
) && !(*in
)->usable
)
1212 { /* skip interfaces excluded by config */
1215 if (!(data
->which
& ADDR_TYPE_LOOPBACK
) && ((*in
)->flags
& IFF_LOOPBACK
))
1216 { /* ignore loopback devices */
1219 if (!(data
->which
& ADDR_TYPE_DOWN
) && !((*in
)->flags
& IFF_UP
))
1220 { /* skip interfaces not up */
1227 METHOD(kernel_net_t
, create_address_enumerator
, enumerator_t
*,
1228 private_kernel_netlink_net_t
*this, kernel_address_type_t which
)
1230 address_enumerator_t
*data
;
1237 this->lock
->read_lock(this->lock
);
1238 return enumerator_create_nested(
1239 enumerator_create_filter(
1240 this->ifaces
->create_enumerator(this->ifaces
),
1241 (void*)filter_interfaces
, data
, NULL
),
1242 (void*)create_iface_enumerator
, data
,
1243 (void*)address_enumerator_destroy
);
1246 METHOD(kernel_net_t
, get_interface_name
, bool,
1247 private_kernel_netlink_net_t
*this, host_t
* ip
, char **name
)
1249 addr_map_entry_t
*entry
, lookup
= {
1253 if (ip
->is_anyaddr(ip
))
1257 this->lock
->read_lock(this->lock
);
1258 /* first try to find it on an up and usable interface */
1259 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1260 (void*)addr_map_entry_match_up_and_usable
);
1265 *name
= strdup(entry
->iface
->ifname
);
1266 DBG2(DBG_KNL
, "%H is on interface %s", ip
, *name
);
1268 this->lock
->unlock(this->lock
);
1271 /* in a second step, consider virtual IPs installed by us */
1272 entry
= this->vips
->get_match(this->vips
, &lookup
,
1273 (void*)addr_map_entry_match_up_and_usable
);
1278 *name
= strdup(entry
->iface
->ifname
);
1279 DBG2(DBG_KNL
, "virtual IP %H is on interface %s", ip
, *name
);
1281 this->lock
->unlock(this->lock
);
1284 /* maybe it is installed on an ignored interface */
1285 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1286 (void*)addr_map_entry_match_up
);
1289 DBG2(DBG_KNL
, "%H is not a local address or the interface is down", ip
);
1291 this->lock
->unlock(this->lock
);
1296 * get the index of an interface by name
1298 static int get_interface_index(private_kernel_netlink_net_t
*this, char* name
)
1300 iface_entry_t
*iface
;
1303 DBG2(DBG_KNL
, "getting iface index for %s", name
);
1305 this->lock
->read_lock(this->lock
);
1306 if (this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_name
,
1307 (void**)&iface
, name
) == SUCCESS
)
1309 ifindex
= iface
->ifindex
;
1311 this->lock
->unlock(this->lock
);
1315 DBG1(DBG_KNL
, "unable to get interface index for %s", name
);
1321 * check if an address (chunk) addr is in subnet (net with net_len net bits)
1323 static bool addr_in_subnet(chunk_t addr
, chunk_t net
, int net_len
)
1325 static const u_char mask
[] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
1329 { /* any address matches a /0 network */
1332 if (addr
.len
!= net
.len
|| net_len
> 8 * net
.len
)
1336 /* scan through all bytes in network order */
1341 return (mask
[net_len
] & addr
.ptr
[byte
]) == (mask
[net_len
] & net
.ptr
[byte
]);
1345 if (addr
.ptr
[byte
] != net
.ptr
[byte
])
1357 * Store information about a route retrieved via RTNETLINK
1370 * Free a route entry
1372 static void rt_entry_destroy(rt_entry_t
*this)
1374 DESTROY_IF(this->src_host
);
1379 * Parse route received with RTM_NEWROUTE. The given rt_entry_t object will be
1380 * reused if not NULL.
1382 * Returned chunks point to internal data of the Netlink message.
1384 static rt_entry_t
*parse_route(struct nlmsghdr
*hdr
, rt_entry_t
*route
)
1390 msg
= (struct rtmsg
*)(NLMSG_DATA(hdr
));
1392 rtasize
= RTM_PAYLOAD(hdr
);
1396 route
->gtw
= chunk_empty
;
1397 route
->src
= chunk_empty
;
1398 route
->dst
= chunk_empty
;
1399 route
->dst_len
= msg
->rtm_dst_len
;
1400 route
->table
= msg
->rtm_table
;
1406 .dst_len
= msg
->rtm_dst_len
,
1407 .table
= msg
->rtm_table
,
1411 while (RTA_OK(rta
, rtasize
))
1413 switch (rta
->rta_type
)
1416 route
->src
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1419 route
->gtw
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1422 route
->dst
= chunk_create(RTA_DATA(rta
), RTA_PAYLOAD(rta
));
1425 if (RTA_PAYLOAD(rta
) == sizeof(route
->oif
))
1427 route
->oif
= *(u_int32_t
*)RTA_DATA(rta
);
1430 #ifdef HAVE_RTA_TABLE
1432 if (RTA_PAYLOAD(rta
) == sizeof(route
->table
))
1434 route
->table
= *(u_int32_t
*)RTA_DATA(rta
);
1437 #endif /* HAVE_RTA_TABLE*/
1439 rta
= RTA_NEXT(rta
, rtasize
);
1445 * Get a route: If "nexthop", the nexthop is returned. source addr otherwise.
1447 static host_t
*get_route(private_kernel_netlink_net_t
*this, host_t
*dest
,
1448 bool nexthop
, host_t
*candidate
, u_int recursion
)
1450 netlink_buf_t request
;
1451 struct nlmsghdr
*hdr
, *out
, *current
;
1455 linked_list_t
*routes
;
1456 rt_entry_t
*route
= NULL
, *best
= NULL
;
1457 enumerator_t
*enumerator
;
1458 host_t
*addr
= NULL
;
1460 if (recursion
> MAX_ROUTE_RECURSION
)
1465 memset(&request
, 0, sizeof(request
));
1467 hdr
= (struct nlmsghdr
*)request
;
1468 hdr
->nlmsg_flags
= NLM_F_REQUEST
;
1469 if (dest
->get_family(dest
) == AF_INET
|| this->rta_prefsrc_for_ipv6
||
1470 this->routing_table
)
1471 { /* kernels prior to 3.0 do not support RTA_PREFSRC for IPv6 routes.
1472 * as we want to ignore routes with virtual IPs we cannot use DUMP
1473 * if these routes are not installed in a separate table */
1474 hdr
->nlmsg_flags
|= NLM_F_DUMP
;
1476 hdr
->nlmsg_type
= RTM_GETROUTE
;
1477 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
1479 msg
= (struct rtmsg
*)NLMSG_DATA(hdr
);
1480 msg
->rtm_family
= dest
->get_family(dest
);
1483 chunk
= candidate
->get_address(candidate
);
1484 netlink_add_attribute(hdr
, RTA_PREFSRC
, chunk
, sizeof(request
));
1486 chunk
= dest
->get_address(dest
);
1487 netlink_add_attribute(hdr
, RTA_DST
, chunk
, sizeof(request
));
1489 if (this->socket
->send(this->socket
, hdr
, &out
, &len
) != SUCCESS
)
1491 DBG2(DBG_KNL
, "getting %s to reach %H failed",
1492 nexthop
? "nexthop" : "address", dest
);
1495 routes
= linked_list_create();
1496 this->lock
->read_lock(this->lock
);
1498 for (current
= out
; NLMSG_OK(current
, len
);
1499 current
= NLMSG_NEXT(current
, len
))
1501 switch (current
->nlmsg_type
)
1510 route
= parse_route(current
, route
);
1512 table
= (uintptr_t)route
->table
;
1513 if (this->rt_exclude
->find_first(this->rt_exclude
, NULL
,
1514 (void**)&table
) == SUCCESS
)
1515 { /* route is from an excluded routing table */
1518 if (this->routing_table
!= 0 &&
1519 route
->table
== this->routing_table
)
1520 { /* route is from our own ipsec routing table */
1523 if (route
->oif
&& !is_interface_up_and_usable(this, route
->oif
))
1524 { /* interface is down */
1527 if (!addr_in_subnet(chunk
, route
->dst
, route
->dst_len
))
1528 { /* route destination does not contain dest */
1532 { /* verify source address, if any */
1533 host_t
*src
= host_create_from_chunk(msg
->rtm_family
,
1535 if (src
&& is_known_vip(this, src
))
1536 { /* ignore routes installed by us */
1540 route
->src_host
= src
;
1542 /* insert route, sorted by decreasing network prefix */
1543 enumerator
= routes
->create_enumerator(routes
);
1544 while (enumerator
->enumerate(enumerator
, &other
))
1546 if (route
->dst_len
> other
->dst_len
)
1551 routes
->insert_before(routes
, enumerator
, route
);
1552 enumerator
->destroy(enumerator
);
1563 rt_entry_destroy(route
);
1566 /* now we have a list of routes matching dest, sorted by net prefix.
1567 * we will look for source addresses for these routes and select the one
1568 * with the preferred source address, if possible */
1569 enumerator
= routes
->create_enumerator(routes
);
1570 while (enumerator
->enumerate(enumerator
, &route
))
1572 if (route
->src_host
)
1573 { /* got a source address with the route, if no preferred source
1574 * is given or it matches we are done, as this is the best route */
1575 if (!candidate
|| candidate
->ip_equals(candidate
, route
->src_host
))
1580 else if (route
->oif
)
1581 { /* no match yet, maybe it is assigned to the same interface */
1582 host_t
*src
= get_interface_address(this, route
->oif
,
1583 msg
->rtm_family
, candidate
);
1584 if (src
&& src
->ip_equals(src
, candidate
))
1586 route
->src_host
->destroy(route
->src_host
);
1587 route
->src_host
= src
;
1593 /* no luck yet with the source address. if this is the best (first)
1594 * route we store it as fallback in case we don't find a route with
1595 * the preferred source */
1596 best
= best
?: route
;
1600 { /* no src, but an interface - get address from it */
1601 route
->src_host
= get_interface_address(this, route
->oif
,
1602 msg
->rtm_family
, candidate
);
1603 if (route
->src_host
)
1604 { /* we handle this address the same as the one above */
1606 candidate
->ip_equals(candidate
, route
->src_host
))
1611 best
= best
?: route
;
1616 { /* no src, no iface, but a gateway - lookup src to reach gtw */
1619 gtw
= host_create_from_chunk(msg
->rtm_family
, route
->gtw
, 0);
1620 if (gtw
&& !gtw
->ip_equals(gtw
, dest
))
1622 route
->src_host
= get_route(this, gtw
, FALSE
, candidate
,
1626 if (route
->src_host
)
1627 { /* more of the same */
1629 candidate
->ip_equals(candidate
, route
->src_host
))
1634 best
= best
?: route
;
1638 enumerator
->destroy(enumerator
);
1641 { /* nexthop lookup, return gateway if any */
1642 if (best
|| routes
->get_first(routes
, (void**)&best
) == SUCCESS
)
1644 addr
= host_create_from_chunk(msg
->rtm_family
, best
->gtw
, 0);
1646 addr
= addr
?: dest
->clone(dest
);
1652 addr
= best
->src_host
->clone(best
->src_host
);
1655 this->lock
->unlock(this->lock
);
1656 routes
->destroy_function(routes
, (void*)rt_entry_destroy
);
1661 DBG2(DBG_KNL
, "using %H as %s to reach %H", addr
,
1662 nexthop
? "nexthop" : "address", dest
);
1664 else if (!recursion
)
1666 DBG2(DBG_KNL
, "no %s found to reach %H",
1667 nexthop
? "nexthop" : "address", dest
);
1672 METHOD(kernel_net_t
, get_source_addr
, host_t
*,
1673 private_kernel_netlink_net_t
*this, host_t
*dest
, host_t
*src
)
1675 return get_route(this, dest
, FALSE
, src
, 0);
1678 METHOD(kernel_net_t
, get_nexthop
, host_t
*,
1679 private_kernel_netlink_net_t
*this, host_t
*dest
, host_t
*src
)
1681 return get_route(this, dest
, TRUE
, src
, 0);
1685 * Manages the creation and deletion of ip addresses on an interface.
1686 * By setting the appropriate nlmsg_type, the ip will be set or unset.
1688 static status_t
manage_ipaddr(private_kernel_netlink_net_t
*this, int nlmsg_type
,
1689 int flags
, int if_index
, host_t
*ip
, int prefix
)
1691 netlink_buf_t request
;
1692 struct nlmsghdr
*hdr
;
1693 struct ifaddrmsg
*msg
;
1696 memset(&request
, 0, sizeof(request
));
1698 chunk
= ip
->get_address(ip
);
1700 hdr
= (struct nlmsghdr
*)request
;
1701 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
| flags
;
1702 hdr
->nlmsg_type
= nlmsg_type
;
1703 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct ifaddrmsg
));
1705 msg
= (struct ifaddrmsg
*)NLMSG_DATA(hdr
);
1706 msg
->ifa_family
= ip
->get_family(ip
);
1708 msg
->ifa_prefixlen
= prefix
< 0 ? chunk
.len
* 8 : prefix
;
1709 msg
->ifa_scope
= RT_SCOPE_UNIVERSE
;
1710 msg
->ifa_index
= if_index
;
1712 netlink_add_attribute(hdr
, IFA_LOCAL
, chunk
, sizeof(request
));
1714 return this->socket
->send_ack(this->socket
, hdr
);
1717 METHOD(kernel_net_t
, add_ip
, status_t
,
1718 private_kernel_netlink_net_t
*this, host_t
*virtual_ip
, int prefix
,
1721 addr_map_entry_t
*entry
, lookup
= {
1724 iface_entry_t
*iface
= NULL
;
1726 if (!this->install_virtual_ip
)
1727 { /* disabled by config */
1731 this->lock
->write_lock(this->lock
);
1732 /* the virtual IP might actually be installed as regular IP, in which case
1733 * we don't track it as virtual IP */
1734 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1735 (void*)addr_map_entry_match
);
1737 { /* otherwise it might already be installed as virtual IP */
1738 entry
= this->vips
->get_match(this->vips
, &lookup
,
1739 (void*)addr_map_entry_match
);
1741 { /* the vip we found can be in one of three states: 1) installed and
1742 * ready, 2) just added by another thread, but not yet confirmed to
1743 * be installed by the kernel, 3) just deleted, but not yet gone.
1744 * Then while we wait below, several things could happen (as we
1745 * release the lock). For instance, the interface could disappear,
1746 * or the IP is finally deleted, and it reappears on a different
1747 * interface. All these cases are handled by the call below. */
1748 while (!is_vip_installed_or_gone(this, virtual_ip
, &entry
))
1750 this->condvar
->wait(this->condvar
, this->lock
);
1754 entry
->addr
->refcount
++;
1760 DBG2(DBG_KNL
, "virtual IP %H is already installed on %s", virtual_ip
,
1761 entry
->iface
->ifname
);
1762 this->lock
->unlock(this->lock
);
1765 /* try to find the target interface, either by config or via src ip */
1766 if (!this->install_virtual_ip_on
||
1767 this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_name
,
1768 (void**)&iface
, this->install_virtual_ip_on
) != SUCCESS
)
1770 if (this->ifaces
->find_first(this->ifaces
, (void*)iface_entry_by_name
,
1771 (void**)&iface
, iface_name
) != SUCCESS
)
1772 { /* if we don't find the requested interface we just use the first */
1773 this->ifaces
->get_first(this->ifaces
, (void**)&iface
);
1781 .ip
= virtual_ip
->clone(virtual_ip
),
1783 .scope
= RT_SCOPE_UNIVERSE
,
1785 iface
->addrs
->insert_last(iface
->addrs
, addr
);
1786 addr_map_entry_add(this->vips
, addr
, iface
);
1787 if (manage_ipaddr(this, RTM_NEWADDR
, NLM_F_CREATE
| NLM_F_EXCL
,
1788 iface
->ifindex
, virtual_ip
, prefix
) == SUCCESS
)
1790 while (!is_vip_installed_or_gone(this, virtual_ip
, &entry
))
1791 { /* wait until address appears */
1792 this->condvar
->wait(this->condvar
, this->lock
);
1795 { /* we fail if the interface got deleted in the meantime */
1796 DBG2(DBG_KNL
, "virtual IP %H installed on %s", virtual_ip
,
1797 entry
->iface
->ifname
);
1798 this->lock
->unlock(this->lock
);
1799 /* during IKEv1 reauthentication, children get moved from
1800 * old the new SA before the virtual IP is available. This
1801 * kills the route for our virtual IP, reinstall. */
1802 queue_route_reinstall(this, strdup(entry
->iface
->ifname
));
1806 this->lock
->unlock(this->lock
);
1807 DBG1(DBG_KNL
, "adding virtual IP %H failed", virtual_ip
);
1810 this->lock
->unlock(this->lock
);
1811 DBG1(DBG_KNL
, "no interface available, unable to install virtual IP %H",
1816 METHOD(kernel_net_t
, del_ip
, status_t
,
1817 private_kernel_netlink_net_t
*this, host_t
*virtual_ip
, int prefix
,
1820 addr_map_entry_t
*entry
, lookup
= {
1824 if (!this->install_virtual_ip
)
1825 { /* disabled by config */
1829 DBG2(DBG_KNL
, "deleting virtual IP %H", virtual_ip
);
1831 this->lock
->write_lock(this->lock
);
1832 entry
= this->vips
->get_match(this->vips
, &lookup
,
1833 (void*)addr_map_entry_match
);
1835 { /* we didn't install this IP as virtual IP */
1836 entry
= this->addrs
->get_match(this->addrs
, &lookup
,
1837 (void*)addr_map_entry_match
);
1840 DBG2(DBG_KNL
, "not deleting existing IP %H on %s", virtual_ip
,
1841 entry
->iface
->ifname
);
1842 this->lock
->unlock(this->lock
);
1845 DBG2(DBG_KNL
, "virtual IP %H not cached, unable to delete", virtual_ip
);
1846 this->lock
->unlock(this->lock
);
1849 if (entry
->addr
->refcount
== 1)
1853 /* we set this flag so that threads calling add_ip will block and wait
1854 * until the entry is gone, also so we can wait below */
1855 entry
->addr
->installed
= FALSE
;
1856 status
= manage_ipaddr(this, RTM_DELADDR
, 0, entry
->iface
->ifindex
,
1857 virtual_ip
, prefix
);
1858 if (status
== SUCCESS
&& wait
)
1859 { /* wait until the address is really gone */
1860 while (is_known_vip(this, virtual_ip
))
1862 this->condvar
->wait(this->condvar
, this->lock
);
1865 this->lock
->unlock(this->lock
);
1870 entry
->addr
->refcount
--;
1872 DBG2(DBG_KNL
, "virtual IP %H used by other SAs, not deleting",
1874 this->lock
->unlock(this->lock
);
1879 * Manages source routes in the routing table.
1880 * By setting the appropriate nlmsg_type, the route gets added or removed.
1882 static status_t
manage_srcroute(private_kernel_netlink_net_t
*this,
1883 int nlmsg_type
, int flags
, chunk_t dst_net
,
1884 u_int8_t prefixlen
, host_t
*gateway
,
1885 host_t
*src_ip
, char *if_name
)
1887 netlink_buf_t request
;
1888 struct nlmsghdr
*hdr
;
1893 /* if route is 0.0.0.0/0, we can't install it, as it would
1894 * overwrite the default route. Instead, we add two routes:
1895 * 0.0.0.0/1 and 128.0.0.0/1 */
1896 if (this->routing_table
== 0 && prefixlen
== 0)
1899 u_int8_t half_prefixlen
;
1902 half_net
= chunk_alloca(dst_net
.len
);
1903 memset(half_net
.ptr
, 0, half_net
.len
);
1906 status
= manage_srcroute(this, nlmsg_type
, flags
, half_net
, half_prefixlen
,
1907 gateway
, src_ip
, if_name
);
1908 half_net
.ptr
[0] |= 0x80;
1909 status
= manage_srcroute(this, nlmsg_type
, flags
, half_net
, half_prefixlen
,
1910 gateway
, src_ip
, if_name
);
1914 memset(&request
, 0, sizeof(request
));
1916 hdr
= (struct nlmsghdr
*)request
;
1917 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
| flags
;
1918 hdr
->nlmsg_type
= nlmsg_type
;
1919 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
1921 msg
= (struct rtmsg
*)NLMSG_DATA(hdr
);
1922 msg
->rtm_family
= src_ip
->get_family(src_ip
);
1923 msg
->rtm_dst_len
= prefixlen
;
1924 msg
->rtm_table
= this->routing_table
;
1925 msg
->rtm_protocol
= RTPROT_STATIC
;
1926 msg
->rtm_type
= RTN_UNICAST
;
1927 msg
->rtm_scope
= RT_SCOPE_UNIVERSE
;
1929 netlink_add_attribute(hdr
, RTA_DST
, dst_net
, sizeof(request
));
1930 chunk
= src_ip
->get_address(src_ip
);
1931 netlink_add_attribute(hdr
, RTA_PREFSRC
, chunk
, sizeof(request
));
1932 if (gateway
&& gateway
->get_family(gateway
) == src_ip
->get_family(src_ip
))
1934 chunk
= gateway
->get_address(gateway
);
1935 netlink_add_attribute(hdr
, RTA_GATEWAY
, chunk
, sizeof(request
));
1937 ifindex
= get_interface_index(this, if_name
);
1938 chunk
.ptr
= (char*)&ifindex
;
1939 chunk
.len
= sizeof(ifindex
);
1940 netlink_add_attribute(hdr
, RTA_OIF
, chunk
, sizeof(request
));
1942 return this->socket
->send_ack(this->socket
, hdr
);
1945 METHOD(kernel_net_t
, add_route
, status_t
,
1946 private_kernel_netlink_net_t
*this, chunk_t dst_net
, u_int8_t prefixlen
,
1947 host_t
*gateway
, host_t
*src_ip
, char *if_name
)
1950 route_entry_t
*found
, route
= {
1952 .prefixlen
= prefixlen
,
1958 this->routes_lock
->lock(this->routes_lock
);
1959 found
= this->routes
->get(this->routes
, &route
);
1962 this->routes_lock
->unlock(this->routes_lock
);
1963 return ALREADY_DONE
;
1965 status
= manage_srcroute(this, RTM_NEWROUTE
, NLM_F_CREATE
| NLM_F_EXCL
,
1966 dst_net
, prefixlen
, gateway
, src_ip
, if_name
);
1967 if (status
== SUCCESS
)
1969 found
= route_entry_clone(&route
);
1970 this->routes
->put(this->routes
, found
, found
);
1972 this->routes_lock
->unlock(this->routes_lock
);
1976 METHOD(kernel_net_t
, del_route
, status_t
,
1977 private_kernel_netlink_net_t
*this, chunk_t dst_net
, u_int8_t prefixlen
,
1978 host_t
*gateway
, host_t
*src_ip
, char *if_name
)
1981 route_entry_t
*found
, route
= {
1983 .prefixlen
= prefixlen
,
1989 this->routes_lock
->lock(this->routes_lock
);
1990 found
= this->routes
->get(this->routes
, &route
);
1993 this->routes_lock
->unlock(this->routes_lock
);
1996 this->routes
->remove(this->routes
, found
);
1997 route_entry_destroy(found
);
1998 status
= manage_srcroute(this, RTM_DELROUTE
, 0, dst_net
, prefixlen
,
1999 gateway
, src_ip
, if_name
);
2000 this->routes_lock
->unlock(this->routes_lock
);
2005 * Initialize a list of local addresses.
2007 static status_t
init_address_list(private_kernel_netlink_net_t
*this)
2009 netlink_buf_t request
;
2010 struct nlmsghdr
*out
, *current
, *in
;
2011 struct rtgenmsg
*msg
;
2013 enumerator_t
*ifaces
, *addrs
;
2014 iface_entry_t
*iface
;
2017 DBG2(DBG_KNL
, "known interfaces and IP addresses:");
2019 memset(&request
, 0, sizeof(request
));
2021 in
= (struct nlmsghdr
*)&request
;
2022 in
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtgenmsg
));
2023 in
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_MATCH
| NLM_F_ROOT
;
2024 msg
= (struct rtgenmsg
*)NLMSG_DATA(in
);
2025 msg
->rtgen_family
= AF_UNSPEC
;
2028 in
->nlmsg_type
= RTM_GETLINK
;
2029 if (this->socket
->send(this->socket
, in
, &out
, &len
) != SUCCESS
)
2034 while (NLMSG_OK(current
, len
))
2036 switch (current
->nlmsg_type
)
2041 process_link(this, current
, FALSE
);
2044 current
= NLMSG_NEXT(current
, len
);
2051 /* get all interface addresses */
2052 in
->nlmsg_type
= RTM_GETADDR
;
2053 if (this->socket
->send(this->socket
, in
, &out
, &len
) != SUCCESS
)
2058 while (NLMSG_OK(current
, len
))
2060 switch (current
->nlmsg_type
)
2065 process_addr(this, current
, FALSE
);
2068 current
= NLMSG_NEXT(current
, len
);
2075 this->lock
->read_lock(this->lock
);
2076 ifaces
= this->ifaces
->create_enumerator(this->ifaces
);
2077 while (ifaces
->enumerate(ifaces
, &iface
))
2079 if (iface_entry_up_and_usable(iface
))
2081 DBG2(DBG_KNL
, " %s", iface
->ifname
);
2082 addrs
= iface
->addrs
->create_enumerator(iface
->addrs
);
2083 while (addrs
->enumerate(addrs
, (void**)&addr
))
2085 DBG2(DBG_KNL
, " %H", addr
->ip
);
2087 addrs
->destroy(addrs
);
2090 ifaces
->destroy(ifaces
);
2091 this->lock
->unlock(this->lock
);
2096 * create or delete a rule to use our routing table
2098 static status_t
manage_rule(private_kernel_netlink_net_t
*this, int nlmsg_type
,
2099 int family
, u_int32_t table
, u_int32_t prio
)
2101 netlink_buf_t request
;
2102 struct nlmsghdr
*hdr
;
2107 memset(&request
, 0, sizeof(request
));
2108 hdr
= (struct nlmsghdr
*)request
;
2109 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
2110 hdr
->nlmsg_type
= nlmsg_type
;
2111 if (nlmsg_type
== RTM_NEWRULE
)
2113 hdr
->nlmsg_flags
|= NLM_F_CREATE
| NLM_F_EXCL
;
2115 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
2117 msg
= (struct rtmsg
*)NLMSG_DATA(hdr
);
2118 msg
->rtm_table
= table
;
2119 msg
->rtm_family
= family
;
2120 msg
->rtm_protocol
= RTPROT_BOOT
;
2121 msg
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2122 msg
->rtm_type
= RTN_UNICAST
;
2124 chunk
= chunk_from_thing(prio
);
2125 netlink_add_attribute(hdr
, RTA_PRIORITY
, chunk
, sizeof(request
));
2127 fwmark
= lib
->settings
->get_str(lib
->settings
,
2128 "%s.plugins.kernel-netlink.fwmark", NULL
, lib
->ns
);
2131 #ifdef HAVE_LINUX_FIB_RULES_H
2134 if (fwmark
[0] == '!')
2136 msg
->rtm_flags
|= FIB_RULE_INVERT
;
2139 if (mark_from_string(fwmark
, &mark
))
2141 chunk
= chunk_from_thing(mark
.value
);
2142 netlink_add_attribute(hdr
, FRA_FWMARK
, chunk
, sizeof(request
));
2143 chunk
= chunk_from_thing(mark
.mask
);
2144 netlink_add_attribute(hdr
, FRA_FWMASK
, chunk
, sizeof(request
));
2147 DBG1(DBG_KNL
, "setting firewall mark on routing rule is not supported");
2150 return this->socket
->send_ack(this->socket
, hdr
);
2154 * check for kernel features (currently only via version number)
2156 static void check_kernel_features(private_kernel_netlink_net_t
*this)
2158 struct utsname utsname
;
2161 if (uname(&utsname
) == 0)
2163 switch(sscanf(utsname
.release
, "%d.%d.%d", &a
, &b
, &c
))
2168 DBG2(DBG_KNL
, "detected Linux %d.%d.%d, no support for "
2169 "RTA_PREFSRC for IPv6 routes", a
, b
, c
);
2174 /* only 3.x+ uses two part version numbers */
2175 this->rta_prefsrc_for_ipv6
= TRUE
;
2184 * Destroy an address to iface map
2186 static void addr_map_destroy(hashtable_t
*map
)
2188 enumerator_t
*enumerator
;
2189 addr_map_entry_t
*addr
;
2191 enumerator
= map
->create_enumerator(map
);
2192 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&addr
))
2196 enumerator
->destroy(enumerator
);
2200 METHOD(kernel_net_t
, destroy
, void,
2201 private_kernel_netlink_net_t
*this)
2203 enumerator_t
*enumerator
;
2204 route_entry_t
*route
;
2206 if (this->routing_table
)
2208 manage_rule(this, RTM_DELRULE
, AF_INET
, this->routing_table
,
2209 this->routing_table_prio
);
2210 manage_rule(this, RTM_DELRULE
, AF_INET6
, this->routing_table
,
2211 this->routing_table_prio
);
2213 if (this->socket_events
> 0)
2215 lib
->watcher
->remove(lib
->watcher
, this->socket_events
);
2216 close(this->socket_events
);
2218 enumerator
= this->routes
->create_enumerator(this->routes
);
2219 while (enumerator
->enumerate(enumerator
, NULL
, (void**)&route
))
2221 manage_srcroute(this, RTM_DELROUTE
, 0, route
->dst_net
, route
->prefixlen
,
2222 route
->gateway
, route
->src_ip
, route
->if_name
);
2223 route_entry_destroy(route
);
2225 enumerator
->destroy(enumerator
);
2226 this->routes
->destroy(this->routes
);
2227 this->routes_lock
->destroy(this->routes_lock
);
2228 DESTROY_IF(this->socket
);
2230 net_changes_clear(this);
2231 this->net_changes
->destroy(this->net_changes
);
2232 this->net_changes_lock
->destroy(this->net_changes_lock
);
2234 addr_map_destroy(this->addrs
);
2235 addr_map_destroy(this->vips
);
2237 this->ifaces
->destroy_function(this->ifaces
, (void*)iface_entry_destroy
);
2238 this->rt_exclude
->destroy(this->rt_exclude
);
2239 this->roam_lock
->destroy(this->roam_lock
);
2240 this->condvar
->destroy(this->condvar
);
2241 this->lock
->destroy(this->lock
);
2246 * Described in header.
2248 kernel_netlink_net_t
*kernel_netlink_net_create()
2250 private_kernel_netlink_net_t
*this;
2251 enumerator_t
*enumerator
;
2252 bool register_for_events
= TRUE
;
2258 .get_interface
= _get_interface_name
,
2259 .create_address_enumerator
= _create_address_enumerator
,
2260 .get_source_addr
= _get_source_addr
,
2261 .get_nexthop
= _get_nexthop
,
2264 .add_route
= _add_route
,
2265 .del_route
= _del_route
,
2266 .destroy
= _destroy
,
2269 .socket
= netlink_socket_create(NETLINK_ROUTE
),
2270 .rt_exclude
= linked_list_create(),
2271 .routes
= hashtable_create((hashtable_hash_t
)route_entry_hash
,
2272 (hashtable_equals_t
)route_entry_equals
, 16),
2273 .net_changes
= hashtable_create(
2274 (hashtable_hash_t
)net_change_hash
,
2275 (hashtable_equals_t
)net_change_equals
, 16),
2276 .addrs
= hashtable_create(
2277 (hashtable_hash_t
)addr_map_entry_hash
,
2278 (hashtable_equals_t
)addr_map_entry_equals
, 16),
2279 .vips
= hashtable_create((hashtable_hash_t
)addr_map_entry_hash
,
2280 (hashtable_equals_t
)addr_map_entry_equals
, 16),
2281 .routes_lock
= mutex_create(MUTEX_TYPE_DEFAULT
),
2282 .net_changes_lock
= mutex_create(MUTEX_TYPE_DEFAULT
),
2283 .ifaces
= linked_list_create(),
2284 .lock
= rwlock_create(RWLOCK_TYPE_DEFAULT
),
2285 .condvar
= rwlock_condvar_create(),
2286 .roam_lock
= spinlock_create(),
2287 .routing_table
= lib
->settings
->get_int(lib
->settings
,
2288 "%s.routing_table", ROUTING_TABLE
, lib
->ns
),
2289 .routing_table_prio
= lib
->settings
->get_int(lib
->settings
,
2290 "%s.routing_table_prio", ROUTING_TABLE_PRIO
, lib
->ns
),
2291 .process_route
= lib
->settings
->get_bool(lib
->settings
,
2292 "%s.process_route", TRUE
, lib
->ns
),
2293 .install_virtual_ip
= lib
->settings
->get_bool(lib
->settings
,
2294 "%s.install_virtual_ip", TRUE
, lib
->ns
),
2295 .install_virtual_ip_on
= lib
->settings
->get_str(lib
->settings
,
2296 "%s.install_virtual_ip_on", NULL
, lib
->ns
),
2297 .roam_events
= lib
->settings
->get_bool(lib
->settings
,
2298 "%s.plugins.kernel-netlink.roam_events", TRUE
, lib
->ns
),
2300 timerclear(&this->last_route_reinstall
);
2301 timerclear(&this->next_roam
);
2303 check_kernel_features(this);
2305 if (streq(lib
->ns
, "starter"))
2306 { /* starter has no threads, so we do not register for kernel events */
2307 register_for_events
= FALSE
;
2310 exclude
= lib
->settings
->get_str(lib
->settings
,
2311 "%s.ignore_routing_tables", NULL
, lib
->ns
);
2317 enumerator
= enumerator_create_token(exclude
, " ", " ");
2318 while (enumerator
->enumerate(enumerator
, &token
))
2321 table
= strtoul(token
, NULL
, 10);
2325 this->rt_exclude
->insert_last(this->rt_exclude
, (void*)table
);
2328 enumerator
->destroy(enumerator
);
2331 if (register_for_events
)
2333 struct sockaddr_nl addr
;
2335 memset(&addr
, 0, sizeof(addr
));
2336 addr
.nl_family
= AF_NETLINK
;
2338 /* create and bind RT socket for events (address/interface/route changes) */
2339 this->socket_events
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
2340 if (this->socket_events
< 0)
2342 DBG1(DBG_KNL
, "unable to create RT event socket");
2346 addr
.nl_groups
= RTMGRP_IPV4_IFADDR
| RTMGRP_IPV6_IFADDR
|
2347 RTMGRP_IPV4_ROUTE
| RTMGRP_IPV6_ROUTE
| RTMGRP_LINK
;
2348 if (bind(this->socket_events
, (struct sockaddr
*)&addr
, sizeof(addr
)))
2350 DBG1(DBG_KNL
, "unable to bind RT event socket");
2355 lib
->watcher
->add(lib
->watcher
, this->socket_events
, WATCHER_READ
,
2356 (watcher_cb_t
)receive_events
, this);
2359 if (init_address_list(this) != SUCCESS
)
2361 DBG1(DBG_KNL
, "unable to get interface list");
2366 if (this->routing_table
)
2368 if (manage_rule(this, RTM_NEWRULE
, AF_INET
, this->routing_table
,
2369 this->routing_table_prio
) != SUCCESS
)
2371 DBG1(DBG_KNL
, "unable to create IPv4 routing table rule");
2373 if (manage_rule(this, RTM_NEWRULE
, AF_INET6
, this->routing_table
,
2374 this->routing_table_prio
) != SUCCESS
)
2376 DBG1(DBG_KNL
, "unable to create IPv6 routing table rule");
2380 return &this->public;