]> git.ipfire.org Git - thirdparty/strongswan.git/blobdiff - src/libcharon/plugins/kernel_netlink/kernel_netlink_net.c
kernel-netlink: Implement passthrough type routes and use them on Linux
[thirdparty/strongswan.git] / src / libcharon / plugins / kernel_netlink / kernel_netlink_net.c
index 61e0292545560626f2f79ea06bfa0e6821784316..24d93cc2f2917f0241adfe0c0b3193470f5394c1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008-2016 Tobias Brunner
+ * Copyright (C) 2008-2019 Tobias Brunner
  * Copyright (C) 2005-2008 Martin Willi
  * HSR Hochschule fuer Technik Rapperswil
  *
@@ -41,6 +41,7 @@
 #include <sys/utsname.h>
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h>
+#include <linux/if_addrlabel.h>
 #include <unistd.h>
 #include <errno.h>
 #include <net/if.h>
@@ -78,6 +79,9 @@
 #define ROUTING_TABLE_PRIO 0
 #endif
 
+/** multicast groups (for groups > 31 setsockopt has to be used) */
+#define nl_group(group) (1 << (group - 1))
+
 ENUM(rt_msg_names, RTM_NEWLINK, RTM_GETRULE,
        "RTM_NEWLINK",
        "RTM_DELLINK",
@@ -86,14 +90,15 @@ ENUM(rt_msg_names, RTM_NEWLINK, RTM_GETRULE,
        "RTM_NEWADDR",
        "RTM_DELADDR",
        "RTM_GETADDR",
-       "31",
+       "23",
        "RTM_NEWROUTE",
        "RTM_DELROUTE",
        "RTM_GETROUTE",
-       "35",
+       "27",
        "RTM_NEWNEIGH",
        "RTM_DELNEIGH",
        "RTM_GETNEIGH",
+       "31",
        "RTM_NEWRULE",
        "RTM_DELRULE",
        "RTM_GETRULE",
@@ -163,19 +168,21 @@ static void iface_entry_destroy(iface_entry_t *this)
        free(this);
 }
 
-/**
- * find an interface entry by index
- */
-static bool iface_entry_by_index(iface_entry_t *this, int *ifindex)
+CALLBACK(iface_entry_by_index, bool,
+       iface_entry_t *this, va_list args)
 {
-       return this->ifindex == *ifindex;
+       int ifindex;
+
+       VA_ARGS_VGET(args, ifindex);
+       return this->ifindex == ifindex;
 }
 
-/**
- * find an interface entry by name
- */
-static bool iface_entry_by_name(iface_entry_t *this, char *ifname)
+CALLBACK(iface_entry_by_name, bool,
+       iface_entry_t *this, va_list args)
 {
+       char *ifname;
+
+       VA_ARGS_VGET(args, ifname);
        return streq(this->ifname, ifname);
 }
 
@@ -279,6 +286,9 @@ struct route_entry_t {
 
        /** Destination net prefixlen */
        uint8_t prefixlen;
+
+       /** Whether the route was installed for a passthrough policy */
+       bool pass;
 };
 
 /**
@@ -294,6 +304,7 @@ static route_entry_t *route_entry_clone(route_entry_t *this)
                .gateway = this->gateway ? this->gateway->clone(this->gateway) : NULL,
                .dst_net = chunk_clone(this->dst_net),
                .prefixlen = this->prefixlen,
+               .pass = this->pass,
        );
        return route;
 }
@@ -325,6 +336,7 @@ static u_int route_entry_hash(route_entry_t *this)
 static bool route_entry_equals(route_entry_t *a, route_entry_t *b)
 {
        if (a->if_name && b->if_name && streq(a->if_name, b->if_name) &&
+               a->pass == b->pass &&
                a->src_ip->ip_equals(a->src_ip, b->src_ip) &&
                chunk_equals(a->dst_net, b->dst_net) && a->prefixlen == b->prefixlen)
        {
@@ -433,12 +445,12 @@ struct private_kernel_netlink_net_t {
        /**
         * routing table to install routes
         */
-       int routing_table;
+       uint32_t routing_table;
 
        /**
         * priority of used routing table
         */
-       int routing_table_prio;
+       uint32_t routing_table_prio;
 
        /**
         * installed routes
@@ -470,11 +482,21 @@ struct private_kernel_netlink_net_t {
         */
        bool process_route;
 
+       /**
+        * whether to react to RTM_NEWRULE or RTM_DELRULE events
+        */
+       bool process_rules;
+
        /**
         * whether to trigger roam events
         */
        bool roam_events;
 
+       /**
+        * whether to install IPsec policy routes
+        */
+       bool install_routes;
+
        /**
         * whether to actually install virtual IPs
         */
@@ -527,7 +549,7 @@ struct private_kernel_netlink_net_t {
 static status_t manage_srcroute(private_kernel_netlink_net_t *this,
                                                                int nlmsg_type, int flags, chunk_t dst_net,
                                                                uint8_t prefixlen, host_t *gateway,
-                                                               host_t *src_ip, char *if_name);
+                                                               host_t *src_ip, char *if_name, bool pass);
 
 /**
  * Clear the queued network changes.
@@ -563,6 +585,10 @@ static job_requeue_t reinstall_routes(private_kernel_netlink_net_t *this)
                net_change_t *change, lookup = {
                        .if_name = route->if_name,
                };
+               if (route->pass)
+               {       /* no need to reinstall these, they don't reference interfaces */
+                       continue;
+               }
                /* check if a change for the outgoing interface is queued */
                change = this->net_changes->get(this->net_changes, &lookup);
                if (!change)
@@ -581,7 +607,7 @@ static job_requeue_t reinstall_routes(private_kernel_netlink_net_t *this)
                {
                        manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
                                                        route->dst_net, route->prefixlen, route->gateway,
-                                                       route->src_ip, route->if_name);
+                                                       route->src_ip, route->if_name, route->pass);
                }
        }
        enumerator->destroy(enumerator);
@@ -1107,8 +1133,8 @@ static bool is_interface_up_and_usable(private_kernel_netlink_net_t *this,
 {
        iface_entry_t *iface;
 
-       if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
-                                                                (void**)&iface, &index) == SUCCESS)
+       if (this->ifaces->find_first(this->ifaces, iface_entry_by_index,
+                                                                (void**)&iface, index))
        {
                return iface_entry_up_and_usable(iface);
        }
@@ -1120,9 +1146,13 @@ static bool is_interface_up_and_usable(private_kernel_netlink_net_t *this,
  *
  * this->lock must be locked when calling this function
  */
-static void addr_entry_unregister(addr_entry_t *addr, iface_entry_t *iface,
-                                                                 private_kernel_netlink_net_t *this)
+CALLBACK(addr_entry_unregister, void,
+       addr_entry_t *addr, va_list args)
 {
+       private_kernel_netlink_net_t *this;
+       iface_entry_t *iface;
+
+       VA_ARGS_VGET(args, iface, this);
        if (addr->refcount)
        {
                addr_map_entry_remove(this->vips, addr, iface);
@@ -1166,20 +1196,19 @@ static void process_link(private_kernel_netlink_net_t *this,
        {
                case RTM_NEWLINK:
                {
-                       if (this->ifaces->find_first(this->ifaces,
-                                                                       (void*)iface_entry_by_index, (void**)&entry,
-                                                                       &msg->ifi_index) != SUCCESS)
+                       if (!this->ifaces->find_first(this->ifaces, iface_entry_by_index,
+                                                                                (void**)&entry, msg->ifi_index))
                        {
                                INIT(entry,
                                        .ifindex = msg->ifi_index,
                                        .addrs = linked_list_create(),
-                                       .usable = charon->kernel->is_interface_usable(
-                                                                                                               charon->kernel, name),
                                );
                                this->ifaces->insert_last(this->ifaces, entry);
                        }
                        strncpy(entry->ifname, name, IFNAMSIZ);
                        entry->ifname[IFNAMSIZ-1] = '\0';
+                       entry->usable = charon->kernel->is_interface_usable(charon->kernel,
+                                                                                                                               name);
                        if (event && entry->usable)
                        {
                                if (!(entry->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
@@ -1212,7 +1241,7 @@ static void process_link(private_kernel_netlink_net_t *this,
                                         * another interface? */
                                        this->ifaces->remove_at(this->ifaces, enumerator);
                                        current->addrs->invoke_function(current->addrs,
-                                                               (void*)addr_entry_unregister, current, this);
+                                                                               addr_entry_unregister, current, this);
                                        iface_entry_destroy(current);
                                        break;
                                }
@@ -1283,8 +1312,8 @@ static void process_addr(private_kernel_netlink_net_t *this,
        }
 
        this->lock->write_lock(this->lock);
-       if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
-                                                                (void**)&iface, &msg->ifa_index) == SUCCESS)
+       if (this->ifaces->find_first(this->ifaces, iface_entry_by_index,
+                                                                (void**)&iface, msg->ifa_index))
        {
                addr_map_entry_t *entry, lookup = {
                        .ip = host,
@@ -1381,7 +1410,8 @@ static void process_addr(private_kernel_netlink_net_t *this,
 /**
  * process RTM_NEWROUTE and RTM_DELROUTE from kernel
  */
-static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *hdr)
+static void process_route(private_kernel_netlink_net_t *this,
+                                                 struct nlmsghdr *hdr)
 {
        struct rtmsg* msg = NLMSG_DATA(hdr);
        struct rtattr *rta = RTM_RTA(msg);
@@ -1404,6 +1434,16 @@ static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *h
        {
                switch (rta->rta_type)
                {
+#ifdef HAVE_RTA_TABLE
+                       case RTA_TABLE:
+                               /* also check against extended table ID */
+                               if (RTA_PAYLOAD(rta) == sizeof(uint32_t) &&
+                                       this->routing_table == *(uint32_t*)RTA_DATA(rta))
+                               {
+                                       return;
+                               }
+                               break;
+#endif /* HAVE_RTA_TABLE */
                        case RTA_PREFSRC:
                                DESTROY_IF(host);
                                host = host_create_from_chunk(msg->rtm_family,
@@ -1441,13 +1481,50 @@ static void process_route(private_kernel_netlink_net_t *this, struct nlmsghdr *h
        host->destroy(host);
 }
 
+/**
+ * process RTM_NEW|DELRULE from kernel
+ */
+static void process_rule(private_kernel_netlink_net_t *this,
+                                                struct nlmsghdr *hdr)
+{
+#ifdef HAVE_LINUX_FIB_RULES_H
+       struct rtmsg* msg = NLMSG_DATA(hdr);
+       struct rtattr *rta = RTM_RTA(msg);
+       size_t rtasize = RTM_PAYLOAD(hdr);
+
+       /* ignore rules added by us or in the local routing table (local addrs) */
+       if (msg->rtm_table && (msg->rtm_table == this->routing_table ||
+                                                  msg->rtm_table == RT_TABLE_LOCAL))
+       {
+               return;
+       }
+
+       while (RTA_OK(rta, rtasize))
+       {
+               switch (rta->rta_type)
+               {
+                       case FRA_TABLE:
+                               /* also check against extended table ID */
+                               if (RTA_PAYLOAD(rta) == sizeof(uint32_t) &&
+                                       this->routing_table == *(uint32_t*)RTA_DATA(rta))
+                               {
+                                       return;
+                               }
+                               break;
+               }
+               rta = RTA_NEXT(rta, rtasize);
+       }
+       fire_roam_event(this, FALSE);
+#endif
+}
+
 /**
  * Receives events from kernel
  */
 static bool receive_events(private_kernel_netlink_net_t *this, int fd,
                                                   watcher_event_t event)
 {
-       char response[1536];
+       char response[netlink_get_buflen()];
        struct nlmsghdr *hdr = (struct nlmsghdr*)response;
        struct sockaddr_nl addr;
        socklen_t addr_len = sizeof(addr);
@@ -1498,6 +1575,13 @@ static bool receive_events(private_kernel_netlink_net_t *this, int fd,
                                        process_route(this, hdr);
                                }
                                break;
+                       case RTM_NEWRULE:
+                       case RTM_DELRULE:
+                               if (this->process_rules)
+                               {
+                                       process_rule(this, hdr);
+                               }
+                               break;
                        default:
                                break;
                }
@@ -1513,35 +1597,48 @@ typedef struct {
        kernel_address_type_t which;
 } address_enumerator_t;
 
-/**
- * cleanup function for address enumerator
- */
-static void address_enumerator_destroy(address_enumerator_t *data)
+CALLBACK(address_enumerator_destroy, void,
+       address_enumerator_t *data)
 {
        data->this->lock->unlock(data->this->lock);
        free(data);
 }
 
-/**
- * filter for addresses
- */
-static bool filter_addresses(address_enumerator_t *data,
-                                                        addr_entry_t** in, host_t** out)
+CALLBACK(filter_addresses, bool,
+       address_enumerator_t *data, enumerator_t *orig, va_list args)
 {
-       if (!(data->which & ADDR_TYPE_VIRTUAL) && (*in)->refcount)
-       {       /* skip virtual interfaces added by us */
-               return FALSE;
-       }
-       if (!(data->which & ADDR_TYPE_REGULAR) && !(*in)->refcount)
-       {       /* address is regular, but not requested */
-               return FALSE;
-       }
-       if ((*in)->scope >= RT_SCOPE_LINK)
-       {       /* skip addresses with a unusable scope */
-               return FALSE;
+       addr_entry_t *addr;
+       host_t **out;
+
+       VA_ARGS_VGET(args, out);
+
+       while (orig->enumerate(orig, &addr))
+       {
+               if (!(data->which & ADDR_TYPE_VIRTUAL) && addr->refcount)
+               {       /* skip virtual interfaces added by us */
+                       continue;
+               }
+               if (!(data->which & ADDR_TYPE_REGULAR) && !addr->refcount)
+               {       /* address is regular, but not requested */
+                       continue;
+               }
+               if (addr->flags & IFA_F_DEPRECATED ||
+                       addr->scope >= RT_SCOPE_LINK)
+               {       /* skip deprecated addresses or those with an unusable scope */
+                       continue;
+               }
+               if (addr->ip->get_family(addr->ip) == AF_INET6)
+               {       /* handle temporary IPv6 addresses according to config */
+                       bool temporary = (addr->flags & IFA_F_TEMPORARY) == IFA_F_TEMPORARY;
+                       if (data->this->prefer_temporary_addrs != temporary)
+                       {
+                               continue;
+                       }
+               }
+               *out = addr->ip;
+               return TRUE;
        }
-       *out = (*in)->ip;
-       return TRUE;
+       return FALSE;
 }
 
 /**
@@ -1551,30 +1648,35 @@ static enumerator_t *create_iface_enumerator(iface_entry_t *iface,
                                                                                         address_enumerator_t *data)
 {
        return enumerator_create_filter(
-                               iface->addrs->create_enumerator(iface->addrs),
-                               (void*)filter_addresses, data, NULL);
+                                               iface->addrs->create_enumerator(iface->addrs),
+                                               filter_addresses, data, NULL);
 }
 
-/**
- * filter for interfaces
- */
-static bool filter_interfaces(address_enumerator_t *data, iface_entry_t** in,
-                                                         iface_entry_t** out)
+CALLBACK(filter_interfaces, bool,
+       address_enumerator_t *data, enumerator_t *orig, va_list args)
 {
-       if (!(data->which & ADDR_TYPE_IGNORED) && !(*in)->usable)
-       {       /* skip interfaces excluded by config */
-               return FALSE;
-       }
-       if (!(data->which & ADDR_TYPE_LOOPBACK) && ((*in)->flags & IFF_LOOPBACK))
-       {       /* ignore loopback devices */
-               return FALSE;
-       }
-       if (!(data->which & ADDR_TYPE_DOWN) && !((*in)->flags & IFF_UP))
-       {       /* skip interfaces not up */
-               return FALSE;
+       iface_entry_t *iface, **out;
+
+       VA_ARGS_VGET(args, out);
+
+       while (orig->enumerate(orig, &iface))
+       {
+               if (!(data->which & ADDR_TYPE_IGNORED) && !iface->usable)
+               {       /* skip interfaces excluded by config */
+                       continue;
+               }
+               if (!(data->which & ADDR_TYPE_LOOPBACK) && (iface->flags & IFF_LOOPBACK))
+               {       /* ignore loopback devices */
+                       continue;
+               }
+               if (!(data->which & ADDR_TYPE_DOWN) && !(iface->flags & IFF_UP))
+               {       /* skip interfaces not up */
+                       continue;
+               }
+               *out = iface;
+               return TRUE;
        }
-       *out = *in;
-       return TRUE;
+       return FALSE;
 }
 
 METHOD(kernel_net_t, create_address_enumerator, enumerator_t*,
@@ -1591,9 +1693,9 @@ METHOD(kernel_net_t, create_address_enumerator, enumerator_t*,
        return enumerator_create_nested(
                                enumerator_create_filter(
                                        this->ifaces->create_enumerator(this->ifaces),
-                                       (void*)filter_interfaces, data, NULL),
+                                       filter_interfaces, data, NULL),
                                (void*)create_iface_enumerator, data,
-                               (void*)address_enumerator_destroy);
+                               address_enumerator_destroy);
 }
 
 METHOD(kernel_net_t, get_interface_name, bool,
@@ -1656,8 +1758,8 @@ static int get_interface_index(private_kernel_netlink_net_t *this, char* name)
        DBG2(DBG_KNL, "getting iface index for %s", name);
 
        this->lock->read_lock(this->lock);
-       if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_name,
-                                                               (void**)&iface, name) == SUCCESS)
+       if (this->ifaces->find_first(this->ifaces, iface_entry_by_name,
+                                                               (void**)&iface, name))
        {
                ifindex = iface->ifindex;
        }
@@ -1682,8 +1784,8 @@ static char *get_interface_name_by_index(private_kernel_netlink_net_t *this,
        DBG2(DBG_KNL, "getting iface name for index %d", index);
 
        this->lock->read_lock(this->lock);
-       if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_index,
-                                                               (void**)&iface, &index) == SUCCESS)
+       if (this->ifaces->find_first(this->ifaces, iface_entry_by_index,
+                                                               (void**)&iface, index))
        {
                name = strdup(iface->ifname);
        }
@@ -1724,7 +1826,7 @@ static void rt_entry_destroy(rt_entry_t *this)
 /**
  * Check if the route received with RTM_NEWROUTE is usable based on its type.
  */
-static bool route_usable(struct nlmsghdr *hdr)
+static bool route_usable(struct nlmsghdr *hdr, bool allow_local)
 {
        struct rtmsg *msg;
 
@@ -1736,6 +1838,8 @@ static bool route_usable(struct nlmsghdr *hdr)
                case RTN_PROHIBIT:
                case RTN_THROW:
                        return FALSE;
+               case RTN_LOCAL:
+                       return allow_local;
                default:
                        return TRUE;
        }
@@ -1759,15 +1863,11 @@ static rt_entry_t *parse_route(struct nlmsghdr *hdr, rt_entry_t *route)
 
        if (route)
        {
-               route->gtw = chunk_empty;
-               route->pref_src = chunk_empty;
-               route->dst = chunk_empty;
-               route->dst_len = msg->rtm_dst_len;
-               route->src = chunk_empty;
-               route->src_len = msg->rtm_src_len;
-               route->table = msg->rtm_table;
-               route->oif = 0;
-               route->priority = 0;
+               *route = (rt_entry_t){
+                       .dst_len = msg->rtm_dst_len,
+                       .src_len = msg->rtm_src_len,
+                       .table = msg->rtm_table,
+               };
        }
        else
        {
@@ -1870,12 +1970,22 @@ static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
        {       /* kernels prior to 3.0 do not support RTA_PREFSRC for IPv6 routes.
                 * as we want to ignore routes with virtual IPs we cannot use DUMP
                 * if these routes are not installed in a separate table */
-               hdr->nlmsg_flags |= NLM_F_DUMP;
+               if (this->install_routes)
+               {
+                       hdr->nlmsg_flags |= NLM_F_DUMP;
+               }
        }
        if (candidate)
        {
                chunk = candidate->get_address(candidate);
-               netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
+               if (hdr->nlmsg_flags & NLM_F_DUMP)
+               {
+                       netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
+               }
+               else
+               {
+                       netlink_add_attribute(hdr, RTA_SRC, chunk, sizeof(request));
+               }
        }
        /* we use this below to match against the routes */
        chunk = dest->get_address(dest);
@@ -1905,7 +2015,7 @@ static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
                                rt_entry_t *other;
                                uintptr_t table;
 
-                               if (!route_usable(current))
+                               if (!route_usable(current, TRUE))
                                {
                                        continue;
                                }
@@ -1913,7 +2023,7 @@ static host_t *get_route(private_kernel_netlink_net_t *this, host_t *dest,
 
                                table = (uintptr_t)route->table;
                                if (this->rt_exclude->find_first(this->rt_exclude, NULL,
-                                                                                                (void**)&table) == SUCCESS)
+                                                                                                (void**)&table))
                                {       /* route is from an excluded routing table */
                                        continue;
                                }
@@ -2137,6 +2247,8 @@ typedef struct {
        size_t len;
        /** last subnet enumerated */
        host_t *net;
+       /** interface of current net */
+       char ifname[IFNAMSIZ];
 } subnet_enumerator_t;
 
 METHOD(enumerator_t, destroy_subnet_enumerator, void,
@@ -2148,8 +2260,14 @@ METHOD(enumerator_t, destroy_subnet_enumerator, void,
 }
 
 METHOD(enumerator_t, enumerate_subnets, bool,
-       subnet_enumerator_t *this, host_t **net, uint8_t *mask, char **ifname)
+       subnet_enumerator_t *this, va_list args)
 {
+       host_t **net;
+       uint8_t *mask;
+       char **ifname;
+
+       VA_ARGS_VGET(args, net, mask, ifname);
+
        if (!this->current)
        {
                this->current = this->msg;
@@ -2169,42 +2287,32 @@ METHOD(enumerator_t, enumerate_subnets, bool,
                                break;
                        case RTM_NEWROUTE:
                        {
-                               struct rtmsg *msg;
-                               struct rtattr *rta;
-                               size_t rtasize;
-                               chunk_t dst = chunk_empty;
-
-                               msg = NLMSG_DATA(this->current);
+                               rt_entry_t route;
 
-                               if (!route_usable(this->current))
+                               if (!route_usable(this->current, FALSE))
                                {
                                        break;
                                }
-                               else if (msg->rtm_table && (
-                                                       msg->rtm_table == RT_TABLE_LOCAL ||
-                                                       msg->rtm_table == this->private->routing_table))
+                               parse_route(this->current, &route);
+
+                               if (route.table && (
+                                                       route.table == RT_TABLE_LOCAL ||
+                                                       route.table == this->private->routing_table))
                                {       /* ignore our own and the local routing tables */
                                        break;
                                }
-
-                               rta = RTM_RTA(msg);
-                               rtasize = RTM_PAYLOAD(this->current);
-                               while (RTA_OK(rta, rtasize))
-                               {
-                                       if (rta->rta_type == RTA_DST)
-                                       {
-                                               dst = chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta));
-                                               break;
-                                       }
-                                       rta = RTA_NEXT(rta, rtasize);
+                               else if (route.gtw.ptr)
+                               {       /* ignore routes via gateway/next hop */
+                                       break;
                                }
 
-                               if (dst.ptr)
+                               if (route.dst.ptr && route.oif &&
+                                       if_indextoname(route.oif, this->ifname))
                                {
-                                       this->net = host_create_from_chunk(msg->rtm_family, dst, 0);
+                                       this->net = host_create_from_chunk(AF_UNSPEC, route.dst, 0);
                                        *net = this->net;
-                                       *mask = msg->rtm_dst_len;
-                                       *ifname = NULL;
+                                       *mask = route.dst_len;
+                                       *ifname = this->ifname;
                                        return TRUE;
                                }
                                break;
@@ -2245,7 +2353,8 @@ METHOD(kernel_net_t, create_local_subnet_enumerator, enumerator_t*,
 
        INIT(enumerator,
                .public = {
-                       .enumerate = (void*)_enumerate_subnets,
+                       .enumerate = enumerator_enumerate_default,
+                       .venumerate = _enumerate_subnets,
                        .destroy = _destroy_subnet_enumerator,
                },
                .private = this,
@@ -2255,6 +2364,46 @@ METHOD(kernel_net_t, create_local_subnet_enumerator, enumerator_t*,
        return &enumerator->public;
 }
 
+/**
+ * Manages the creation and deletion of IPv6 address labels for virtual IPs.
+ * By setting the appropriate nlmsg_type the label is either added or removed.
+ */
+static status_t manage_addrlabel(private_kernel_netlink_net_t *this,
+                                                                int nlmsg_type, host_t *ip)
+{
+       netlink_buf_t request;
+       struct nlmsghdr *hdr;
+       struct ifaddrlblmsg *msg;
+       chunk_t chunk;
+       uint32_t label;
+
+       memset(&request, 0, sizeof(request));
+
+       chunk = ip->get_address(ip);
+
+       hdr = &request.hdr;
+       hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+       if (nlmsg_type == RTM_NEWADDRLABEL)
+       {
+               hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
+       }
+       hdr->nlmsg_type = nlmsg_type;
+       hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrlblmsg));
+
+       msg = NLMSG_DATA(hdr);
+       msg->ifal_family = ip->get_family(ip);
+       msg->ifal_prefixlen = chunk.len * 8;
+
+       netlink_add_attribute(hdr, IFAL_ADDRESS, chunk, sizeof(request));
+       /* doesn't really matter as default labels are < 20 but this makes it kinda
+        * recognizable */
+       label = 220;
+       netlink_add_attribute(hdr, IFAL_LABEL, chunk_from_thing(label),
+                                                 sizeof(request));
+
+       return this->socket->send_ack(this->socket, hdr);
+}
+
 /**
  * Manages the creation and deletion of ip addresses on an interface.
  * By setting the appropriate nlmsg_type, the ip will be set or unset.
@@ -2287,19 +2436,29 @@ static status_t manage_ipaddr(private_kernel_netlink_net_t *this, int nlmsg_type
 
        if (ip->get_family(ip) == AF_INET6)
        {
+#ifdef IFA_F_NODAD
                msg->ifa_flags |= IFA_F_NODAD;
+#endif
                if (this->rta_prefsrc_for_ipv6)
                {
-                       /* if source routes are possible we let the virtual IP get
-                        * deprecated immediately (but mark it as valid forever) so it gets
-                        * only used if forced by our route, and not by the default IPv6
-                        * address selection */
-                       struct ifa_cacheinfo cache = {
-                               .ifa_valid = 0xFFFFFFFF,
-                               .ifa_prefered = 0,
-                       };
-                       netlink_add_attribute(hdr, IFA_CACHEINFO, chunk_from_thing(cache),
-                                                                 sizeof(request));
+                       /* if source routes are possible we set a label for this virtual IP
+                        * so it gets only used if forced by our route, and not by the
+                        * default IPv6 address selection */
+                       int labelop = nlmsg_type == RTM_NEWADDR ? RTM_NEWADDRLABEL
+                                                                                                       : RTM_DELADDRLABEL;
+                       if (manage_addrlabel(this, labelop, ip) != SUCCESS)
+                       {
+                               /* if we can't use address labels we let the virtual IP get
+                                * deprecated immediately (but mark it as valid forever), which
+                                * should also avoid that it gets used by the default address
+                                * selection */
+                               struct ifa_cacheinfo cache = {
+                                       .ifa_valid = 0xFFFFFFFF,
+                                       .ifa_prefered = 0,
+                               };
+                               netlink_add_attribute(hdr, IFA_CACHEINFO,
+                                                                         chunk_from_thing(cache), sizeof(request));
+                       }
                }
        }
        return this->socket->send_ack(this->socket, hdr);
@@ -2355,11 +2514,11 @@ METHOD(kernel_net_t, add_ip, status_t,
        }
        /* try to find the target interface, either by config or via src ip */
        if (!this->install_virtual_ip_on ||
-                this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_name,
-                                               (void**)&iface, this->install_virtual_ip_on) != SUCCESS)
+               !this->ifaces->find_first(this->ifaces, iface_entry_by_name,
+                                                                (void**)&iface, this->install_virtual_ip_on))
        {
-               if (this->ifaces->find_first(this->ifaces, (void*)iface_entry_by_name,
-                                                                        (void**)&iface, iface_name) != SUCCESS)
+               if (!this->ifaces->find_first(this->ifaces, iface_entry_by_name,
+                                                                        (void**)&iface, iface_name))
                {       /* if we don't find the requested interface we just use the first */
                        this->ifaces->get_first(this->ifaces, (void**)&iface);
                }
@@ -2482,7 +2641,7 @@ METHOD(kernel_net_t, del_ip, status_t,
 static status_t manage_srcroute(private_kernel_netlink_net_t *this,
                                                                int nlmsg_type, int flags, chunk_t dst_net,
                                                                uint8_t prefixlen, host_t *gateway,
-                                                               host_t *src_ip, char *if_name)
+                                                               host_t *src_ip, char *if_name, bool pass)
 {
        netlink_buf_t request;
        struct nlmsghdr *hdr;
@@ -2503,12 +2662,12 @@ static status_t manage_srcroute(private_kernel_netlink_net_t *this,
                half_net = chunk_alloca(dst_net.len);
                memset(half_net.ptr, 0, half_net.len);
                half_prefixlen = 1;
-
-               status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
-                                       gateway, src_ip, if_name);
+               /* no throw routes in the main table */
+               status = manage_srcroute(this, nlmsg_type, flags, half_net,
+                                                       half_prefixlen, gateway, src_ip, if_name, FALSE);
                half_net.ptr[0] |= 0x80;
-               status = manage_srcroute(this, nlmsg_type, flags, half_net, half_prefixlen,
-                                       gateway, src_ip, if_name);
+               status |= manage_srcroute(this, nlmsg_type, flags, half_net,
+                                                       half_prefixlen, gateway, src_ip, if_name, FALSE);
                return status;
        }
 
@@ -2520,78 +2679,162 @@ static status_t manage_srcroute(private_kernel_netlink_net_t *this,
        hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
 
        msg = NLMSG_DATA(hdr);
-       msg->rtm_family = src_ip->get_family(src_ip);
+       msg->rtm_family = (dst_net.len == 4) ? AF_INET : AF_INET6;
        msg->rtm_dst_len = prefixlen;
-       msg->rtm_table = this->routing_table;
        msg->rtm_protocol = RTPROT_STATIC;
-       msg->rtm_type = RTN_UNICAST;
+       msg->rtm_type = pass ? RTN_THROW : RTN_UNICAST;
        msg->rtm_scope = RT_SCOPE_UNIVERSE;
 
-       netlink_add_attribute(hdr, RTA_DST, dst_net, sizeof(request));
-       chunk = src_ip->get_address(src_ip);
-       netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
-       if (gateway && gateway->get_family(gateway) == src_ip->get_family(src_ip))
+       if (this->routing_table < 256)
        {
-               chunk = gateway->get_address(gateway);
-               netlink_add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
+               msg->rtm_table = this->routing_table;
        }
-       ifindex = get_interface_index(this, if_name);
-       chunk.ptr = (char*)&ifindex;
-       chunk.len = sizeof(ifindex);
-       netlink_add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
+       else
+       {
+#ifdef HAVE_RTA_TABLE
+               chunk = chunk_from_thing(this->routing_table);
+               netlink_add_attribute(hdr, RTA_TABLE, chunk, sizeof(request));
+#else
+               DBG1(DBG_KNL, "routing table IDs > 255 are not supported");
+               return FAILED;
+#endif /* HAVE_RTA_TABLE */
+       }
+       netlink_add_attribute(hdr, RTA_DST, dst_net, sizeof(request));
 
-       if (this->mtu || this->mss)
+       /* only when installing regular routes do we need all the parameters,
+        * deletes are done by destination net (except if metrics are used, which
+        * we don't support), for throw routes we don't need any of them either */
+       if (nlmsg_type == RTM_NEWROUTE && !pass)
        {
-               chunk = chunk_alloca(RTA_LENGTH((sizeof(struct rtattr) +
-                                                                                sizeof(uint32_t)) * 2));
-               chunk.len = 0;
-               rta = (struct rtattr*)chunk.ptr;
-               if (this->mtu)
+               chunk = src_ip->get_address(src_ip);
+               netlink_add_attribute(hdr, RTA_PREFSRC, chunk, sizeof(request));
+               if (gateway && gateway->get_family(gateway) == src_ip->get_family(src_ip))
                {
-                       rta->rta_type = RTAX_MTU;
-                       rta->rta_len = RTA_LENGTH(sizeof(uint32_t));
-                       memcpy(RTA_DATA(rta), &this->mtu, sizeof(uint32_t));
-                       chunk.len = rta->rta_len;
+                       chunk = gateway->get_address(gateway);
+                       netlink_add_attribute(hdr, RTA_GATEWAY, chunk, sizeof(request));
                }
-               if (this->mss)
+               ifindex = get_interface_index(this, if_name);
+               chunk.ptr = (char*)&ifindex;
+               chunk.len = sizeof(ifindex);
+               netlink_add_attribute(hdr, RTA_OIF, chunk, sizeof(request));
+
+               if (this->mtu || this->mss)
                {
-                       rta = (struct rtattr*)(chunk.ptr + RTA_ALIGN(chunk.len));
-                       rta->rta_type = RTAX_ADVMSS;
-                       rta->rta_len = RTA_LENGTH(sizeof(uint32_t));
-                       memcpy(RTA_DATA(rta), &this->mss, sizeof(uint32_t));
-                       chunk.len = RTA_ALIGN(chunk.len) + rta->rta_len;
+                       chunk = chunk_alloca(RTA_LENGTH((sizeof(struct rtattr) +
+                                                                                        sizeof(uint32_t)) * 2));
+                       chunk.len = 0;
+                       rta = (struct rtattr*)chunk.ptr;
+                       if (this->mtu)
+                       {
+                               rta->rta_type = RTAX_MTU;
+                               rta->rta_len = RTA_LENGTH(sizeof(uint32_t));
+                               memcpy(RTA_DATA(rta), &this->mtu, sizeof(uint32_t));
+                               chunk.len = rta->rta_len;
+                       }
+                       if (this->mss)
+                       {
+                               rta = (struct rtattr*)(chunk.ptr + RTA_ALIGN(chunk.len));
+                               rta->rta_type = RTAX_ADVMSS;
+                               rta->rta_len = RTA_LENGTH(sizeof(uint32_t));
+                               memcpy(RTA_DATA(rta), &this->mss, sizeof(uint32_t));
+                               chunk.len = RTA_ALIGN(chunk.len) + rta->rta_len;
+                       }
+                       netlink_add_attribute(hdr, RTA_METRICS, chunk, sizeof(request));
                }
-               netlink_add_attribute(hdr, RTA_METRICS, chunk, sizeof(request));
        }
-
        return this->socket->send_ack(this->socket, hdr);
 }
 
+/**
+ * Helper struct used to check routes
+ */
+typedef struct {
+       /** the entry we look for */
+       route_entry_t route;
+       /** kernel interface */
+       private_kernel_netlink_net_t *this;
+} route_entry_lookup_t;
+
+/**
+ * Check if a matching route entry has a VIP associated
+ */
+static bool route_with_vip(route_entry_lookup_t *a, route_entry_t *b)
+{
+       if (chunk_equals(a->route.dst_net, b->dst_net) &&
+               a->route.prefixlen == b->prefixlen &&
+               is_known_vip(a->this, b->src_ip))
+       {
+               return TRUE;
+       }
+       return FALSE;
+}
+
+/**
+ * Check if there is any route entry with a matching destination
+ */
+static bool route_with_dst(route_entry_lookup_t *a, route_entry_t *b)
+{
+       if (chunk_equals(a->route.dst_net, b->dst_net) &&
+               a->route.prefixlen == b->prefixlen)
+       {
+               return TRUE;
+       }
+       return FALSE;
+}
+
 METHOD(kernel_net_t, add_route, status_t,
        private_kernel_netlink_net_t *this, chunk_t dst_net, uint8_t prefixlen,
-       host_t *gateway, host_t *src_ip, char *if_name)
+       host_t *gateway, host_t *src_ip, char *if_name, bool pass)
 {
        status_t status;
-       route_entry_t *found, route = {
-               .dst_net = dst_net,
-               .prefixlen = prefixlen,
-               .gateway = gateway,
-               .src_ip = src_ip,
-               .if_name = if_name,
+       route_entry_t *found;
+       route_entry_lookup_t lookup = {
+               .route = {
+                       .dst_net = dst_net,
+                       .prefixlen = prefixlen,
+                       .gateway = gateway,
+                       .src_ip = src_ip,
+                       .if_name = if_name,
+                       .pass = pass,
+               },
+               .this = this,
        };
 
+       if (!this->routing_table)
+       {       /* treat these as regular routes if installing in the main table */
+               pass = lookup.route.pass = FALSE;
+       }
+
        this->routes_lock->lock(this->routes_lock);
-       found = this->routes->get(this->routes, &route);
+       found = this->routes->get(this->routes, &lookup.route);
        if (found)
        {
                this->routes_lock->unlock(this->routes_lock);
                return ALREADY_DONE;
        }
-       status = manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE | NLM_F_EXCL,
-                                                        dst_net, prefixlen, gateway, src_ip, if_name);
+
+       /* don't replace the route if we already have one with a VIP installed,
+        * but keep track of it in case that other route is uninstalled */
+       this->lock->read_lock(this->lock);
+       if (!is_known_vip(this, src_ip))
+       {
+               found = this->routes->get_match(this->routes, &lookup,
+                                                                               (void*)route_with_vip);
+       }
+       this->lock->unlock(this->lock);
+       if (found)
+       {
+               status = SUCCESS;
+       }
+       else
+       {
+               status = manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE|NLM_F_REPLACE,
+                                                                dst_net, prefixlen, gateway, src_ip, if_name,
+                                                                pass);
+       }
        if (status == SUCCESS)
        {
-               found = route_entry_clone(&route);
+               found = route_entry_clone(&lookup.route);
                this->routes->put(this->routes, found, found);
        }
        this->routes_lock->unlock(this->routes_lock);
@@ -2600,28 +2843,58 @@ METHOD(kernel_net_t, add_route, status_t,
 
 METHOD(kernel_net_t, del_route, status_t,
        private_kernel_netlink_net_t *this, chunk_t dst_net, uint8_t prefixlen,
-       host_t *gateway, host_t *src_ip, char *if_name)
+       host_t *gateway, host_t *src_ip, char *if_name, bool pass)
 {
        status_t status;
-       route_entry_t *found, route = {
-               .dst_net = dst_net,
-               .prefixlen = prefixlen,
-               .gateway = gateway,
-               .src_ip = src_ip,
-               .if_name = if_name,
+       route_entry_t *found;
+       route_entry_lookup_t lookup = {
+               .route = {
+                       .dst_net = dst_net,
+                       .prefixlen = prefixlen,
+                       .gateway = gateway,
+                       .src_ip = src_ip,
+                       .if_name = if_name,
+                       .pass = pass,
+               },
+               .this = this,
        };
 
+       if (!this->routing_table)
+       {       /* treat these as regular routes if installing in the main table */
+               pass = lookup.route.pass = FALSE;
+       }
+
        this->routes_lock->lock(this->routes_lock);
-       found = this->routes->get(this->routes, &route);
+       found = this->routes->remove(this->routes, &lookup.route);
        if (!found)
        {
                this->routes_lock->unlock(this->routes_lock);
                return NOT_FOUND;
        }
-       this->routes->remove(this->routes, found);
        route_entry_destroy(found);
-       status = manage_srcroute(this, RTM_DELROUTE, 0, dst_net, prefixlen,
-                                                        gateway, src_ip, if_name);
+
+       /* check if there are any other routes for the same destination and if
+        * so update the route, otherwise uninstall it */
+       this->lock->read_lock(this->lock);
+       found = this->routes->get_match(this->routes, &lookup,
+                                                                       (void*)route_with_vip);
+       this->lock->unlock(this->lock);
+       if (!found)
+       {
+               found = this->routes->get_match(this->routes, &lookup,
+                                                                               (void*)route_with_dst);
+       }
+       if (found)
+       {
+               status = manage_srcroute(this, RTM_NEWROUTE, NLM_F_CREATE|NLM_F_REPLACE,
+                                                       found->dst_net, found->prefixlen, found->gateway,
+                                                       found->src_ip, found->if_name, found->pass);
+       }
+       else
+       {
+               status = manage_srcroute(this, RTM_DELROUTE, 0, dst_net, prefixlen,
+                                                                gateway, src_ip, if_name, pass);
+       }
        this->routes_lock->unlock(this->routes_lock);
        return status;
 }
@@ -2740,12 +3013,25 @@ static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
        hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
 
        msg = NLMSG_DATA(hdr);
-       msg->rtm_table = table;
        msg->rtm_family = family;
        msg->rtm_protocol = RTPROT_BOOT;
        msg->rtm_scope = RT_SCOPE_UNIVERSE;
        msg->rtm_type = RTN_UNICAST;
 
+       if (this->routing_table < 256)
+       {
+               msg->rtm_table = table;
+       }
+       else
+       {
+#ifdef HAVE_LINUX_FIB_RULES_H
+               chunk = chunk_from_thing(table);
+               netlink_add_attribute(hdr, FRA_TABLE, chunk, sizeof(request));
+#else
+               DBG1(DBG_KNL, "routing table IDs > 255 are not supported");
+               return FAILED;
+#endif /* HAVE_LINUX_FIB_RULES_H */
+       }
        chunk = chunk_from_thing(prio);
        netlink_add_attribute(hdr, RTA_PRIORITY, chunk, sizeof(request));
 
@@ -2761,7 +3047,7 @@ static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
                        msg->rtm_flags |= FIB_RULE_INVERT;
                        fwmark++;
                }
-               if (mark_from_string(fwmark, &mark))
+               if (mark_from_string(fwmark, MARK_OP_NONE, &mark))
                {
                        chunk = chunk_from_thing(mark.value);
                        netlink_add_attribute(hdr, FRA_FWMARK, chunk, sizeof(request));
@@ -2774,7 +3060,7 @@ static status_t manage_rule(private_kernel_netlink_net_t *this, int nlmsg_type,
                }
 #else
                DBG1(DBG_KNL, "setting firewall mark on routing rule is not supported");
-#endif
+#endif /* HAVE_LINUX_FIB_RULES_H */
        }
        return this->socket->send_ack(this->socket, hdr);
 }
@@ -2853,7 +3139,8 @@ METHOD(kernel_net_t, destroy, void,
        while (enumerator->enumerate(enumerator, NULL, (void**)&route))
        {
                manage_srcroute(this, RTM_DELROUTE, 0, route->dst_net, route->prefixlen,
-                                               route->gateway, route->src_ip, route->if_name);
+                                               route->gateway, route->src_ip, route->if_name,
+                                               route->pass);
                route_entry_destroy(route);
        }
        enumerator->destroy(enumerator);
@@ -2927,6 +3214,8 @@ kernel_netlink_net_t *kernel_netlink_net_create()
                                                "%s.routing_table_prio", ROUTING_TABLE_PRIO, lib->ns),
                .process_route = lib->settings->get_bool(lib->settings,
                                                "%s.process_route", TRUE, lib->ns),
+               .install_routes = lib->settings->get_bool(lib->settings,
+                                               "%s.install_routes", TRUE, lib->ns),
                .install_virtual_ip = lib->settings->get_bool(lib->settings,
                                                "%s.install_virtual_ip", TRUE, lib->ns),
                .install_virtual_ip_on = lib->settings->get_str(lib->settings,
@@ -2935,6 +3224,8 @@ kernel_netlink_net_t *kernel_netlink_net_create()
                                                "%s.prefer_temporary_addrs", FALSE, lib->ns),
                .roam_events = lib->settings->get_bool(lib->settings,
                                                "%s.plugins.kernel-netlink.roam_events", TRUE, lib->ns),
+               .process_rules = lib->settings->get_bool(lib->settings,
+                                               "%s.plugins.kernel-netlink.process_rules", FALSE, lib->ns),
                .mtu = lib->settings->get_int(lib->settings,
                                                "%s.plugins.kernel-netlink.mtu", 0, lib->ns),
                .mss = lib->settings->get_int(lib->settings,
@@ -2987,8 +3278,19 @@ kernel_netlink_net_t *kernel_netlink_net_create()
                        destroy(this);
                        return NULL;
                }
-               addr.nl_groups = RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
-                                                RTMGRP_IPV4_ROUTE | RTMGRP_IPV6_ROUTE | RTMGRP_LINK;
+               addr.nl_groups = nl_group(RTNLGRP_IPV4_IFADDR) |
+                                                nl_group(RTNLGRP_IPV6_IFADDR) |
+                                                nl_group(RTNLGRP_LINK);
+               if (this->process_route)
+               {
+                       addr.nl_groups |= nl_group(RTNLGRP_IPV4_ROUTE) |
+                                                         nl_group(RTNLGRP_IPV6_ROUTE);
+               }
+               if (this->process_rules)
+               {
+                       addr.nl_groups |= nl_group(RTNLGRP_IPV4_RULE) |
+                                                         nl_group(RTNLGRP_IPV6_RULE);
+               }
                if (bind(this->socket_events, (struct sockaddr*)&addr, sizeof(addr)))
                {
                        DBG1(DBG_KNL, "unable to bind RT event socket: %s (%d)",