]> git.ipfire.org Git - thirdparty/strongswan.git/blob - src/libcharon/plugins/kernel_pfroute/kernel_pfroute_net.c
kernel-netlink: Implement passthrough type routes and use them on Linux
[thirdparty/strongswan.git] / src / libcharon / plugins / kernel_pfroute / kernel_pfroute_net.c
1 /*
2 * Copyright (C) 2009-2016 Tobias Brunner
3 * HSR Hochschule fuer Technik Rapperswil
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the
7 * Free Software Foundation; either version 2 of the License, or (at your
8 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * for more details.
14 */
15
16 #include <sys/types.h>
17 #include <sys/socket.h>
18 #include <sys/sysctl.h>
19 #include <net/if.h>
20 #include <net/if_dl.h>
21 #include <ifaddrs.h>
22 #include <net/route.h>
23 #include <unistd.h>
24 #include <errno.h>
25
26 #include "kernel_pfroute_net.h"
27
28 #include <daemon.h>
29 #include <utils/debug.h>
30 #include <networking/host.h>
31 #include <networking/tun_device.h>
32 #include <threading/thread.h>
33 #include <threading/mutex.h>
34 #include <threading/condvar.h>
35 #include <threading/rwlock.h>
36 #include <threading/spinlock.h>
37 #include <collections/hashtable.h>
38 #include <collections/linked_list.h>
39 #include <processing/jobs/callback_job.h>
40
41 #ifndef HAVE_STRUCT_SOCKADDR_SA_LEN
42 #error Cannot compile this plugin on systems where 'struct sockaddr' has no sa_len member.
43 #endif
44
45 /** properly align sockaddrs */
46 #ifdef __APPLE__
47 /* Apple always uses 4 bytes */
48 #define SA_ALIGN 4
49 #else
50 /* while on other platforms like FreeBSD it depends on the architecture */
51 #define SA_ALIGN sizeof(long)
52 #endif
53 #define SA_LEN(len) ((len) > 0 ? (((len)+SA_ALIGN-1) & ~(SA_ALIGN-1)) : SA_ALIGN)
54
55 /** delay before firing roam events (ms) */
56 #define ROAM_DELAY 100
57
58 /** delay before reinstalling routes (ms) */
59 #define ROUTE_DELAY 100
60
61 typedef struct addr_entry_t addr_entry_t;
62
63 /**
64 * IP address in an inface_entry_t
65 */
66 struct addr_entry_t {
67
68 /** The ip address */
69 host_t *ip;
70
71 /** virtual IP managed by us */
72 bool virtual;
73 };
74
75 /**
76 * destroy a addr_entry_t object
77 */
78 static void addr_entry_destroy(addr_entry_t *this)
79 {
80 this->ip->destroy(this->ip);
81 free(this);
82 }
83
84 typedef struct iface_entry_t iface_entry_t;
85
86 /**
87 * A network interface on this system, containing addr_entry_t's
88 */
89 struct iface_entry_t {
90
91 /** interface index */
92 int ifindex;
93
94 /** name of the interface */
95 char ifname[IFNAMSIZ];
96
97 /** interface flags, as in netdevice(7) SIOCGIFFLAGS */
98 u_int flags;
99
100 /** list of addresses as host_t */
101 linked_list_t *addrs;
102
103 /** TRUE if usable by config */
104 bool usable;
105 };
106
107 /**
108 * destroy an interface entry
109 */
110 static void iface_entry_destroy(iface_entry_t *this)
111 {
112 this->addrs->destroy_function(this->addrs, (void*)addr_entry_destroy);
113 free(this);
114 }
115
116 /**
117 * check if an interface is up
118 */
119 static inline bool iface_entry_up(iface_entry_t *iface)
120 {
121 return (iface->flags & IFF_UP) == IFF_UP;
122 }
123
124 /**
125 * check if an interface is up and usable
126 */
127 static inline bool iface_entry_up_and_usable(iface_entry_t *iface)
128 {
129 return iface->usable && iface_entry_up(iface);
130 }
131
132 typedef struct addr_map_entry_t addr_map_entry_t;
133
134 /**
135 * Entry that maps an IP address to an interface entry
136 */
137 struct addr_map_entry_t {
138 /** The IP address */
139 host_t *ip;
140
141 /** The address entry for this IP address */
142 addr_entry_t *addr;
143
144 /** The interface this address is installed on */
145 iface_entry_t *iface;
146 };
147
148 /**
149 * Hash a addr_map_entry_t object, all entries with the same IP address
150 * are stored in the same bucket
151 */
152 static u_int addr_map_entry_hash(addr_map_entry_t *this)
153 {
154 return chunk_hash(this->ip->get_address(this->ip));
155 }
156
157 /**
158 * Compare two addr_map_entry_t objects, two entries are equal if they are
159 * installed on the same interface
160 */
161 static bool addr_map_entry_equals(addr_map_entry_t *a, addr_map_entry_t *b)
162 {
163 return a->iface->ifindex == b->iface->ifindex &&
164 a->ip->ip_equals(a->ip, b->ip);
165 }
166
167 /**
168 * Used with get_match this finds an address entry if it is installed on
169 * an up and usable interface
170 */
171 static bool addr_map_entry_match_up_and_usable(addr_map_entry_t *a,
172 addr_map_entry_t *b)
173 {
174 return !b->addr->virtual && iface_entry_up_and_usable(b->iface) &&
175 a->ip->ip_equals(a->ip, b->ip);
176 }
177
178 /**
179 * Used with get_match this finds an address entry if it is installed as virtual
180 * IP address
181 */
182 static bool addr_map_entry_match_virtual(addr_map_entry_t *a, addr_map_entry_t *b)
183 {
184 return b->addr->virtual && a->ip->ip_equals(a->ip, b->ip);
185 }
186
187 /**
188 * Used with get_match this finds an address entry if it is installed on
189 * any active local interface
190 */
191 static bool addr_map_entry_match_up(addr_map_entry_t *a, addr_map_entry_t *b)
192 {
193 return !b->addr->virtual && iface_entry_up(b->iface) &&
194 a->ip->ip_equals(a->ip, b->ip);
195 }
196
197 typedef struct route_entry_t route_entry_t;
198
199 /**
200 * Installed routing entry
201 */
202 struct route_entry_t {
203 /** Name of the interface the route is bound to */
204 char *if_name;
205
206 /** Gateway for this route */
207 host_t *gateway;
208
209 /** Destination net */
210 chunk_t dst_net;
211
212 /** Destination net prefixlen */
213 uint8_t prefixlen;
214 };
215
216 /**
217 * Clone a route_entry_t object.
218 */
219 static route_entry_t *route_entry_clone(route_entry_t *this)
220 {
221 route_entry_t *route;
222
223 INIT(route,
224 .if_name = strdup(this->if_name),
225 .gateway = this->gateway ? this->gateway->clone(this->gateway) : NULL,
226 .dst_net = chunk_clone(this->dst_net),
227 .prefixlen = this->prefixlen,
228 );
229 return route;
230 }
231
232 /**
233 * Destroy a route_entry_t object
234 */
235 static void route_entry_destroy(route_entry_t *this)
236 {
237 free(this->if_name);
238 DESTROY_IF(this->gateway);
239 chunk_free(&this->dst_net);
240 free(this);
241 }
242
243 /**
244 * Hash a route_entry_t object
245 */
246 static u_int route_entry_hash(route_entry_t *this)
247 {
248 return chunk_hash_inc(chunk_from_thing(this->prefixlen),
249 chunk_hash(this->dst_net));
250 }
251
252 /**
253 * Compare two route_entry_t objects
254 */
255 static bool route_entry_equals(route_entry_t *a, route_entry_t *b)
256 {
257 if (a->if_name && b->if_name && streq(a->if_name, b->if_name) &&
258 chunk_equals(a->dst_net, b->dst_net) && a->prefixlen == b->prefixlen)
259 {
260 return (!a->gateway && !b->gateway) || (a->gateway && b->gateway &&
261 a->gateway->ip_equals(a->gateway, b->gateway));
262 }
263 return FALSE;
264 }
265
266 typedef struct net_change_t net_change_t;
267
268 /**
269 * Queued network changes
270 */
271 struct net_change_t {
272 /** Name of the interface that got activated (or an IP appeared on) */
273 char *if_name;
274 };
275
276 /**
277 * Destroy a net_change_t object
278 */
279 static void net_change_destroy(net_change_t *this)
280 {
281 free(this->if_name);
282 free(this);
283 }
284
285 /**
286 * Hash a net_change_t object
287 */
288 static u_int net_change_hash(net_change_t *this)
289 {
290 return chunk_hash(chunk_create(this->if_name, strlen(this->if_name)));
291 }
292
293 /**
294 * Compare two net_change_t objects
295 */
296 static bool net_change_equals(net_change_t *a, net_change_t *b)
297 {
298 return streq(a->if_name, b->if_name);
299 }
300
301 typedef struct private_kernel_pfroute_net_t private_kernel_pfroute_net_t;
302
303 /**
304 * Private variables and functions of kernel_pfroute class.
305 */
306 struct private_kernel_pfroute_net_t
307 {
308 /**
309 * Public part of the kernel_pfroute_t object.
310 */
311 kernel_pfroute_net_t public;
312
313 /**
314 * lock to access lists and maps
315 */
316 rwlock_t *lock;
317
318 /**
319 * Cached list of interfaces and their addresses (iface_entry_t)
320 */
321 linked_list_t *ifaces;
322
323 /**
324 * Map for IP addresses to iface_entry_t objects (addr_map_entry_t)
325 */
326 hashtable_t *addrs;
327
328 /**
329 * List of tun devices we installed for virtual IPs
330 */
331 linked_list_t *tuns;
332
333 /**
334 * mutex to communicate exclusively with PF_KEY
335 */
336 mutex_t *mutex;
337
338 /**
339 * condvar to signal if PF_KEY query got a response
340 */
341 condvar_t *condvar;
342
343 /**
344 * installed routes
345 */
346 hashtable_t *routes;
347
348 /**
349 * mutex for routes
350 */
351 mutex_t *routes_lock;
352
353 /**
354 * interface changes which may trigger route reinstallation
355 */
356 hashtable_t *net_changes;
357
358 /**
359 * mutex for route reinstallation triggers
360 */
361 mutex_t *net_changes_lock;
362
363 /**
364 * time of last route reinstallation
365 */
366 timeval_t last_route_reinstall;
367
368 /**
369 * pid to send PF_ROUTE messages with
370 */
371 pid_t pid;
372
373 /**
374 * PF_ROUTE socket to communicate with the kernel
375 */
376 int socket;
377
378 /**
379 * sequence number for messages sent to the kernel
380 */
381 int seq;
382
383 /**
384 * Sequence number a query is waiting for
385 */
386 int waiting_seq;
387
388 /**
389 * Allocated reply message from kernel
390 */
391 struct rt_msghdr *reply;
392
393 /**
394 * earliest time of the next roam event
395 */
396 timeval_t next_roam;
397
398 /**
399 * roam event due to address change
400 */
401 bool roam_address;
402
403 /**
404 * lock to check and update roam event time
405 */
406 spinlock_t *roam_lock;
407
408 /**
409 * Time in ms to wait for IP addresses to appear/disappear
410 */
411 int vip_wait;
412
413 /**
414 * whether to actually install virtual IPs
415 */
416 bool install_virtual_ip;
417 };
418
419
420 /**
421 * Forward declaration
422 */
423 static status_t manage_route(private_kernel_pfroute_net_t *this, int op,
424 chunk_t dst_net, uint8_t prefixlen,
425 host_t *gateway, char *if_name);
426
427 /**
428 * Clear the queued network changes.
429 */
430 static void net_changes_clear(private_kernel_pfroute_net_t *this)
431 {
432 enumerator_t *enumerator;
433 net_change_t *change;
434
435 enumerator = this->net_changes->create_enumerator(this->net_changes);
436 while (enumerator->enumerate(enumerator, NULL, (void**)&change))
437 {
438 this->net_changes->remove_at(this->net_changes, enumerator);
439 net_change_destroy(change);
440 }
441 enumerator->destroy(enumerator);
442 }
443
444 /**
445 * Act upon queued network changes.
446 */
447 static job_requeue_t reinstall_routes(private_kernel_pfroute_net_t *this)
448 {
449 enumerator_t *enumerator;
450 route_entry_t *route;
451
452 this->net_changes_lock->lock(this->net_changes_lock);
453 this->routes_lock->lock(this->routes_lock);
454
455 enumerator = this->routes->create_enumerator(this->routes);
456 while (enumerator->enumerate(enumerator, NULL, (void**)&route))
457 {
458 net_change_t *change, lookup = {
459 .if_name = route->if_name,
460 };
461 /* check if a change for the outgoing interface is queued */
462 change = this->net_changes->get(this->net_changes, &lookup);
463 if (change)
464 {
465 manage_route(this, RTM_ADD, route->dst_net, route->prefixlen,
466 route->gateway, route->if_name);
467 }
468 }
469 enumerator->destroy(enumerator);
470 this->routes_lock->unlock(this->routes_lock);
471
472 net_changes_clear(this);
473 this->net_changes_lock->unlock(this->net_changes_lock);
474 return JOB_REQUEUE_NONE;
475 }
476
477 /**
478 * Queue route reinstallation caused by network changes for a given interface.
479 *
480 * The route reinstallation is delayed for a while and only done once for
481 * several calls during this delay, in order to avoid doing it too often.
482 * The interface name is freed.
483 */
484 static void queue_route_reinstall(private_kernel_pfroute_net_t *this,
485 char *if_name)
486 {
487 net_change_t *update, *found;
488 timeval_t now;
489 job_t *job;
490
491 INIT(update,
492 .if_name = if_name
493 );
494
495 this->net_changes_lock->lock(this->net_changes_lock);
496 found = this->net_changes->put(this->net_changes, update, update);
497 if (found)
498 {
499 net_change_destroy(found);
500 }
501 time_monotonic(&now);
502 if (timercmp(&now, &this->last_route_reinstall, >))
503 {
504 timeval_add_ms(&now, ROUTE_DELAY);
505 this->last_route_reinstall = now;
506
507 job = (job_t*)callback_job_create((callback_job_cb_t)reinstall_routes,
508 this, NULL, NULL);
509 lib->scheduler->schedule_job_ms(lib->scheduler, job, ROUTE_DELAY);
510 }
511 this->net_changes_lock->unlock(this->net_changes_lock);
512 }
513
514 /**
515 * Add an address map entry
516 */
517 static void addr_map_entry_add(private_kernel_pfroute_net_t *this,
518 addr_entry_t *addr, iface_entry_t *iface)
519 {
520 addr_map_entry_t *entry;
521
522 INIT(entry,
523 .ip = addr->ip,
524 .addr = addr,
525 .iface = iface,
526 );
527 entry = this->addrs->put(this->addrs, entry, entry);
528 free(entry);
529 }
530
531 /**
532 * Remove an address map entry (the argument order is a bit strange because
533 * it is also used with linked_list_t.invoke_function)
534 */
535 static void addr_map_entry_remove(addr_entry_t *addr, iface_entry_t *iface,
536 private_kernel_pfroute_net_t *this)
537 {
538 addr_map_entry_t *entry, lookup = {
539 .ip = addr->ip,
540 .addr = addr,
541 .iface = iface,
542 };
543
544 entry = this->addrs->remove(this->addrs, &lookup);
545 free(entry);
546 }
547
548 /**
549 * callback function that raises the delayed roam event
550 */
551 static job_requeue_t roam_event(private_kernel_pfroute_net_t *this)
552 {
553 bool address;
554
555 this->roam_lock->lock(this->roam_lock);
556 address = this->roam_address;
557 this->roam_address = FALSE;
558 this->roam_lock->unlock(this->roam_lock);
559 charon->kernel->roam(charon->kernel, address);
560 return JOB_REQUEUE_NONE;
561 }
562
563 /**
564 * fire a roaming event. we delay it for a bit and fire only one event
565 * for multiple calls. otherwise we would create too many events.
566 */
567 static void fire_roam_event(private_kernel_pfroute_net_t *this, bool address)
568 {
569 timeval_t now;
570 job_t *job;
571
572 time_monotonic(&now);
573 this->roam_lock->lock(this->roam_lock);
574 this->roam_address |= address;
575 if (!timercmp(&now, &this->next_roam, >))
576 {
577 this->roam_lock->unlock(this->roam_lock);
578 return;
579 }
580 timeval_add_ms(&now, ROAM_DELAY);
581 this->next_roam = now;
582 this->roam_lock->unlock(this->roam_lock);
583
584 job = (job_t*)callback_job_create((callback_job_cb_t)roam_event,
585 this, NULL, NULL);
586 lib->scheduler->schedule_job_ms(lib->scheduler, job, ROAM_DELAY);
587 }
588
589 /**
590 * Data for enumerator over rtmsg sockaddrs
591 */
592 typedef struct {
593 /** implements enumerator */
594 enumerator_t public;
595 /** copy of attribute bitfield */
596 int types;
597 /** bytes remaining in buffer */
598 int remaining;
599 /** next sockaddr to enumerate */
600 struct sockaddr *addr;
601 } rt_enumerator_t;
602
603 METHOD(enumerator_t, rt_enumerate, bool,
604 rt_enumerator_t *this, va_list args)
605 {
606 struct sockaddr **addr;
607 int i, type, *xtype;
608
609 VA_ARGS_VGET(args, xtype, addr);
610
611 if (this->remaining < sizeof(this->addr->sa_len) ||
612 this->remaining < this->addr->sa_len)
613 {
614 return FALSE;
615 }
616 for (i = 0; i < RTAX_MAX; i++)
617 {
618 type = (1 << i);
619 if (this->types & type)
620 {
621 this->types &= ~type;
622 *addr = this->addr;
623 *xtype = i;
624 this->remaining -= SA_LEN(this->addr->sa_len);
625 this->addr = (struct sockaddr*)((char*)this->addr +
626 SA_LEN(this->addr->sa_len));
627 return TRUE;
628 }
629 }
630 return FALSE;
631 }
632
633 /**
634 * Create an enumerator over sockaddrs in rt/if messages
635 */
636 static enumerator_t *create_rt_enumerator(int types, int remaining,
637 struct sockaddr *addr)
638 {
639 rt_enumerator_t *this;
640
641 INIT(this,
642 .public = {
643 .enumerate = enumerator_enumerate_default,
644 .venumerate = _rt_enumerate,
645 .destroy = (void*)free,
646 },
647 .types = types,
648 .remaining = remaining,
649 .addr = addr,
650 );
651 return &this->public;
652 }
653
654 /**
655 * Create a safe enumerator over sockaddrs in rt_msghdr
656 */
657 static enumerator_t *create_rtmsg_enumerator(struct rt_msghdr *hdr)
658 {
659 return create_rt_enumerator(hdr->rtm_addrs, hdr->rtm_msglen - sizeof(*hdr),
660 (struct sockaddr *)(hdr + 1));
661 }
662
663 /**
664 * Create a safe enumerator over sockaddrs in ifa_msghdr
665 */
666 static enumerator_t *create_ifamsg_enumerator(struct ifa_msghdr *hdr)
667 {
668 return create_rt_enumerator(hdr->ifam_addrs, hdr->ifam_msglen - sizeof(*hdr),
669 (struct sockaddr *)(hdr + 1));
670 }
671
672 /**
673 * Process an RTM_*ADDR message from the kernel
674 */
675 static void process_addr(private_kernel_pfroute_net_t *this,
676 struct ifa_msghdr *ifa)
677 {
678 struct sockaddr *sockaddr;
679 host_t *host = NULL;
680 enumerator_t *ifaces, *addrs;
681 iface_entry_t *iface;
682 addr_entry_t *addr;
683 bool found = FALSE, changed = FALSE, roam = FALSE;
684 enumerator_t *enumerator;
685 char *ifname = NULL;
686 int type;
687
688 enumerator = create_ifamsg_enumerator(ifa);
689 while (enumerator->enumerate(enumerator, &type, &sockaddr))
690 {
691 if (type == RTAX_IFA)
692 {
693 host = host_create_from_sockaddr(sockaddr);
694 break;
695 }
696 }
697 enumerator->destroy(enumerator);
698
699 if (!host || host->is_anyaddr(host))
700 {
701 DESTROY_IF(host);
702 return;
703 }
704
705 this->lock->write_lock(this->lock);
706 ifaces = this->ifaces->create_enumerator(this->ifaces);
707 while (ifaces->enumerate(ifaces, &iface))
708 {
709 if (iface->ifindex == ifa->ifam_index)
710 {
711 addrs = iface->addrs->create_enumerator(iface->addrs);
712 while (addrs->enumerate(addrs, &addr))
713 {
714 if (host->ip_equals(host, addr->ip))
715 {
716 found = TRUE;
717 if (ifa->ifam_type == RTM_DELADDR)
718 {
719 iface->addrs->remove_at(iface->addrs, addrs);
720 if (!addr->virtual && iface->usable)
721 {
722 changed = TRUE;
723 DBG1(DBG_KNL, "%H disappeared from %s",
724 host, iface->ifname);
725 }
726 addr_map_entry_remove(addr, iface, this);
727 addr_entry_destroy(addr);
728 }
729 }
730 }
731 addrs->destroy(addrs);
732
733 if (!found && ifa->ifam_type == RTM_NEWADDR)
734 {
735 INIT(addr,
736 .ip = host->clone(host),
737 );
738 changed = TRUE;
739 ifname = strdup(iface->ifname);
740 iface->addrs->insert_last(iface->addrs, addr);
741 addr_map_entry_add(this, addr, iface);
742 if (iface->usable)
743 {
744 DBG1(DBG_KNL, "%H appeared on %s", host, iface->ifname);
745 }
746 }
747
748 if (changed && iface_entry_up_and_usable(iface))
749 {
750 roam = TRUE;
751 }
752 break;
753 }
754 }
755 ifaces->destroy(ifaces);
756 this->lock->unlock(this->lock);
757 host->destroy(host);
758
759 if (roam && ifname)
760 {
761 queue_route_reinstall(this, ifname);
762 }
763 else
764 {
765 free(ifname);
766 }
767
768 if (roam)
769 {
770 fire_roam_event(this, TRUE);
771 }
772 }
773
774 /**
775 * Re-initialize address list of an interface if it changes state
776 */
777 static void repopulate_iface(private_kernel_pfroute_net_t *this,
778 iface_entry_t *iface)
779 {
780 struct ifaddrs *ifap, *ifa;
781 addr_entry_t *addr;
782
783 while (iface->addrs->remove_last(iface->addrs, (void**)&addr) == SUCCESS)
784 {
785 addr_map_entry_remove(addr, iface, this);
786 addr_entry_destroy(addr);
787 }
788
789 if (getifaddrs(&ifap) == 0)
790 {
791 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next)
792 {
793 if (ifa->ifa_addr && streq(ifa->ifa_name, iface->ifname))
794 {
795 switch (ifa->ifa_addr->sa_family)
796 {
797 case AF_INET:
798 case AF_INET6:
799 INIT(addr,
800 .ip = host_create_from_sockaddr(ifa->ifa_addr),
801 );
802 iface->addrs->insert_last(iface->addrs, addr);
803 addr_map_entry_add(this, addr, iface);
804 break;
805 default:
806 break;
807 }
808 }
809 }
810 freeifaddrs(ifap);
811 }
812 }
813
814 /**
815 * Process an RTM_IFINFO message from the kernel
816 */
817 static void process_link(private_kernel_pfroute_net_t *this,
818 struct if_msghdr *msg)
819 {
820 enumerator_t *enumerator;
821 iface_entry_t *iface;
822 bool roam = FALSE, found = FALSE, update_routes = FALSE;
823
824 this->lock->write_lock(this->lock);
825 enumerator = this->ifaces->create_enumerator(this->ifaces);
826 while (enumerator->enumerate(enumerator, &iface))
827 {
828 if (iface->ifindex == msg->ifm_index)
829 {
830 if (iface->usable)
831 {
832 if (!(iface->flags & IFF_UP) && (msg->ifm_flags & IFF_UP))
833 {
834 roam = update_routes = TRUE;
835 DBG1(DBG_KNL, "interface %s activated", iface->ifname);
836 }
837 else if ((iface->flags & IFF_UP) && !(msg->ifm_flags & IFF_UP))
838 {
839 roam = TRUE;
840 DBG1(DBG_KNL, "interface %s deactivated", iface->ifname);
841 }
842 }
843 #ifdef __APPLE__
844 /* There seems to be a race condition on 10.10, where we get
845 * the RTM_IFINFO, but getifaddrs() does not return the virtual
846 * IP installed on a tun device, but we also don't get a
847 * RTM_NEWADDR. We therefore could miss the new address, letting
848 * virtual IP installation fail. Delaying getifaddrs() helps,
849 * but is obviously not a clean fix. */
850 usleep(50000);
851 #endif
852 iface->flags = msg->ifm_flags;
853 repopulate_iface(this, iface);
854 found = TRUE;
855 break;
856 }
857 }
858 enumerator->destroy(enumerator);
859
860 if (!found)
861 {
862 INIT(iface,
863 .ifindex = msg->ifm_index,
864 .flags = msg->ifm_flags,
865 .addrs = linked_list_create(),
866 );
867 #ifdef __APPLE__
868 /* Similar to the issue described above, on 10.13 we need this delay as
869 * we might otherwise not be able to convert the index to a name yet. */
870 usleep(50000);
871 #endif
872 if (if_indextoname(iface->ifindex, iface->ifname))
873 {
874 DBG1(DBG_KNL, "interface %s appeared", iface->ifname);
875 iface->usable = charon->kernel->is_interface_usable(charon->kernel,
876 iface->ifname);
877 repopulate_iface(this, iface);
878 this->ifaces->insert_last(this->ifaces, iface);
879 if (iface->usable)
880 {
881 roam = update_routes = TRUE;
882 }
883 }
884 else
885 {
886 free(iface);
887 }
888 }
889 this->lock->unlock(this->lock);
890
891 if (update_routes)
892 {
893 queue_route_reinstall(this, strdup(iface->ifname));
894 }
895
896 if (roam)
897 {
898 fire_roam_event(this, TRUE);
899 }
900 }
901
902 #ifdef HAVE_RTM_IFANNOUNCE
903
904 /**
905 * Process an RTM_IFANNOUNCE message from the kernel
906 */
907 static void process_announce(private_kernel_pfroute_net_t *this,
908 struct if_announcemsghdr *msg)
909 {
910 enumerator_t *enumerator;
911 iface_entry_t *iface;
912
913 if (msg->ifan_what != IFAN_DEPARTURE)
914 {
915 /* we handle new interfaces in process_link() */
916 return;
917 }
918
919 this->lock->write_lock(this->lock);
920 enumerator = this->ifaces->create_enumerator(this->ifaces);
921 while (enumerator->enumerate(enumerator, &iface))
922 {
923 if (iface->ifindex == msg->ifan_index)
924 {
925 DBG1(DBG_KNL, "interface %s disappeared", iface->ifname);
926 this->ifaces->remove_at(this->ifaces, enumerator);
927 iface_entry_destroy(iface);
928 break;
929 }
930 }
931 enumerator->destroy(enumerator);
932 this->lock->unlock(this->lock);
933 }
934
935 #endif /* HAVE_RTM_IFANNOUNCE */
936
937 /**
938 * Process an RTM_*ROUTE message from the kernel
939 */
940 static void process_route(private_kernel_pfroute_net_t *this,
941 struct rt_msghdr *msg)
942 {
943
944 }
945
946 /**
947 * Receives PF_ROUTE messages from kernel
948 */
949 static bool receive_events(private_kernel_pfroute_net_t *this, int fd,
950 watcher_event_t event)
951 {
952 struct {
953 union {
954 struct rt_msghdr rtm;
955 struct if_msghdr ifm;
956 struct ifa_msghdr ifam;
957 #ifdef HAVE_RTM_IFANNOUNCE
958 struct if_announcemsghdr ifanm;
959 #endif
960 };
961 char buf[sizeof(struct sockaddr_storage) * RTAX_MAX];
962 } msg;
963 int len, hdrlen;
964
965 len = recv(this->socket, &msg, sizeof(msg), MSG_DONTWAIT);
966 if (len < 0)
967 {
968 switch (errno)
969 {
970 case EINTR:
971 case EAGAIN:
972 return TRUE;
973 default:
974 DBG1(DBG_KNL, "unable to receive from PF_ROUTE event socket");
975 sleep(1);
976 return TRUE;
977 }
978 }
979
980 if (len < offsetof(struct rt_msghdr, rtm_flags) || len < msg.rtm.rtm_msglen)
981 {
982 DBG1(DBG_KNL, "received invalid PF_ROUTE message");
983 return TRUE;
984 }
985 if (msg.rtm.rtm_version != RTM_VERSION)
986 {
987 DBG1(DBG_KNL, "received PF_ROUTE message with unsupported version: %d",
988 msg.rtm.rtm_version);
989 return TRUE;
990 }
991 switch (msg.rtm.rtm_type)
992 {
993 case RTM_NEWADDR:
994 case RTM_DELADDR:
995 hdrlen = sizeof(msg.ifam);
996 break;
997 case RTM_IFINFO:
998 hdrlen = sizeof(msg.ifm);
999 break;
1000 #ifdef HAVE_RTM_IFANNOUNCE
1001 case RTM_IFANNOUNCE:
1002 hdrlen = sizeof(msg.ifanm);
1003 break;
1004 #endif /* HAVE_RTM_IFANNOUNCE */
1005 case RTM_ADD:
1006 case RTM_DELETE:
1007 case RTM_GET:
1008 hdrlen = sizeof(msg.rtm);
1009 break;
1010 default:
1011 return TRUE;
1012 }
1013 if (msg.rtm.rtm_msglen < hdrlen)
1014 {
1015 DBG1(DBG_KNL, "ignoring short PF_ROUTE message");
1016 return TRUE;
1017 }
1018 switch (msg.rtm.rtm_type)
1019 {
1020 case RTM_NEWADDR:
1021 case RTM_DELADDR:
1022 process_addr(this, &msg.ifam);
1023 break;
1024 case RTM_IFINFO:
1025 process_link(this, &msg.ifm);
1026 break;
1027 #ifdef HAVE_RTM_IFANNOUNCE
1028 case RTM_IFANNOUNCE:
1029 process_announce(this, &msg.ifanm);
1030 break;
1031 #endif /* HAVE_RTM_IFANNOUNCE */
1032 case RTM_ADD:
1033 case RTM_DELETE:
1034 process_route(this, &msg.rtm);
1035 break;
1036 default:
1037 break;
1038 }
1039
1040 this->mutex->lock(this->mutex);
1041 if (msg.rtm.rtm_pid == this->pid && msg.rtm.rtm_seq == this->waiting_seq)
1042 {
1043 /* seems like the message someone is waiting for, deliver */
1044 this->reply = realloc(this->reply, msg.rtm.rtm_msglen);
1045 memcpy(this->reply, &msg, msg.rtm.rtm_msglen);
1046 }
1047 /* signal on any event, add_ip()/del_ip() might wait for it */
1048 this->condvar->broadcast(this->condvar);
1049 this->mutex->unlock(this->mutex);
1050
1051 return TRUE;
1052 }
1053
1054
1055 /** enumerator over addresses */
1056 typedef struct {
1057 private_kernel_pfroute_net_t* this;
1058 /** which addresses to enumerate */
1059 kernel_address_type_t which;
1060 } address_enumerator_t;
1061
1062 CALLBACK(address_enumerator_destroy, void,
1063 address_enumerator_t *data)
1064 {
1065 data->this->lock->unlock(data->this->lock);
1066 free(data);
1067 }
1068
1069 CALLBACK(filter_addresses, bool,
1070 address_enumerator_t *data, enumerator_t *orig, va_list args)
1071 {
1072 addr_entry_t *addr;
1073 host_t *ip, **out;
1074 struct sockaddr_in6 *sin6;
1075
1076 VA_ARGS_VGET(args, out);
1077
1078 while (orig->enumerate(orig, &addr))
1079 {
1080 if (!(data->which & ADDR_TYPE_VIRTUAL) && addr->virtual)
1081 { /* skip virtual interfaces added by us */
1082 continue;
1083 }
1084 if (!(data->which & ADDR_TYPE_REGULAR) && !addr->virtual)
1085 { /* address is regular, but not requested */
1086 continue;
1087 }
1088 ip = addr->ip;
1089 if (ip->get_family(ip) == AF_INET6)
1090 {
1091 sin6 = (struct sockaddr_in6 *)ip->get_sockaddr(ip);
1092 if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
1093 { /* skip addresses with a unusable scope */
1094 continue;
1095 }
1096 }
1097 *out = ip;
1098 return TRUE;
1099 }
1100 return FALSE;
1101 }
1102
1103 /**
1104 * enumerator constructor for interfaces
1105 */
1106 static enumerator_t *create_iface_enumerator(iface_entry_t *iface,
1107 address_enumerator_t *data)
1108 {
1109 return enumerator_create_filter(iface->addrs->create_enumerator(iface->addrs),
1110 filter_addresses, data, NULL);
1111 }
1112
1113 CALLBACK(filter_interfaces, bool,
1114 address_enumerator_t *data, enumerator_t *orig, va_list args)
1115 {
1116 iface_entry_t *iface, **out;
1117
1118 VA_ARGS_VGET(args, out);
1119
1120 while (orig->enumerate(orig, &iface))
1121 {
1122 if (!(data->which & ADDR_TYPE_IGNORED) && !iface->usable)
1123 { /* skip interfaces excluded by config */
1124 continue;
1125 }
1126 if (!(data->which & ADDR_TYPE_LOOPBACK) && (iface->flags & IFF_LOOPBACK))
1127 { /* ignore loopback devices */
1128 continue;
1129 }
1130 if (!(data->which & ADDR_TYPE_DOWN) && !(iface->flags & IFF_UP))
1131 { /* skip interfaces not up */
1132 continue;
1133 }
1134 *out = iface;
1135 return TRUE;
1136 }
1137 return FALSE;
1138 }
1139
1140 METHOD(kernel_net_t, create_address_enumerator, enumerator_t*,
1141 private_kernel_pfroute_net_t *this, kernel_address_type_t which)
1142 {
1143 address_enumerator_t *data;
1144
1145 INIT(data,
1146 .this = this,
1147 .which = which,
1148 );
1149
1150 this->lock->read_lock(this->lock);
1151 return enumerator_create_nested(
1152 enumerator_create_filter(
1153 this->ifaces->create_enumerator(this->ifaces),
1154 filter_interfaces, data, NULL),
1155 (void*)create_iface_enumerator, data,
1156 address_enumerator_destroy);
1157 }
1158
1159 METHOD(kernel_net_t, get_features, kernel_feature_t,
1160 private_kernel_pfroute_net_t *this)
1161 {
1162 return KERNEL_REQUIRE_EXCLUDE_ROUTE;
1163 }
1164
1165 METHOD(kernel_net_t, get_interface_name, bool,
1166 private_kernel_pfroute_net_t *this, host_t* ip, char **name)
1167 {
1168 addr_map_entry_t *entry, lookup = {
1169 .ip = ip,
1170 };
1171
1172 if (ip->is_anyaddr(ip))
1173 {
1174 return FALSE;
1175 }
1176 this->lock->read_lock(this->lock);
1177 /* first try to find it on an up and usable interface */
1178 entry = this->addrs->get_match(this->addrs, &lookup,
1179 (void*)addr_map_entry_match_up_and_usable);
1180 if (entry)
1181 {
1182 if (name)
1183 {
1184 *name = strdup(entry->iface->ifname);
1185 DBG2(DBG_KNL, "%H is on interface %s", ip, *name);
1186 }
1187 this->lock->unlock(this->lock);
1188 return TRUE;
1189 }
1190 /* check if it is a virtual IP */
1191 entry = this->addrs->get_match(this->addrs, &lookup,
1192 (void*)addr_map_entry_match_virtual);
1193 if (entry)
1194 {
1195 if (name)
1196 {
1197 *name = strdup(entry->iface->ifname);
1198 DBG2(DBG_KNL, "virtual IP %H is on interface %s", ip, *name);
1199 }
1200 this->lock->unlock(this->lock);
1201 return TRUE;
1202 }
1203 /* maybe it is installed on an ignored interface */
1204 entry = this->addrs->get_match(this->addrs, &lookup,
1205 (void*)addr_map_entry_match_up);
1206 if (!entry)
1207 { /* the address does not exist, is on a down interface */
1208 DBG2(DBG_KNL, "%H is not a local address or the interface is down", ip);
1209 }
1210 this->lock->unlock(this->lock);
1211 return FALSE;
1212 }
1213
1214 METHOD(kernel_net_t, add_ip, status_t,
1215 private_kernel_pfroute_net_t *this, host_t *vip, int prefix,
1216 char *ifname)
1217 {
1218 enumerator_t *ifaces, *addrs;
1219 iface_entry_t *iface;
1220 addr_entry_t *addr;
1221 tun_device_t *tun;
1222 bool timeout = FALSE;
1223
1224 if (!this->install_virtual_ip)
1225 { /* disabled by config */
1226 return SUCCESS;
1227 }
1228
1229 tun = tun_device_create(NULL);
1230 if (!tun)
1231 {
1232 return FAILED;
1233 }
1234 if (prefix == -1)
1235 {
1236 prefix = vip->get_address(vip).len * 8;
1237 }
1238 if (!tun->up(tun) || !tun->set_address(tun, vip, prefix))
1239 {
1240 tun->destroy(tun);
1241 return FAILED;
1242 }
1243
1244 /* wait until address appears */
1245 this->mutex->lock(this->mutex);
1246 while (!timeout && !get_interface_name(this, vip, NULL))
1247 {
1248 timeout = this->condvar->timed_wait(this->condvar, this->mutex,
1249 this->vip_wait);
1250 }
1251 this->mutex->unlock(this->mutex);
1252 if (timeout)
1253 {
1254 DBG1(DBG_KNL, "virtual IP %H did not appear on %s",
1255 vip, tun->get_name(tun));
1256 tun->destroy(tun);
1257 return FAILED;
1258 }
1259
1260 this->lock->write_lock(this->lock);
1261 this->tuns->insert_last(this->tuns, tun);
1262
1263 ifaces = this->ifaces->create_enumerator(this->ifaces);
1264 while (ifaces->enumerate(ifaces, &iface))
1265 {
1266 if (streq(iface->ifname, tun->get_name(tun)))
1267 {
1268 addrs = iface->addrs->create_enumerator(iface->addrs);
1269 while (addrs->enumerate(addrs, &addr))
1270 {
1271 if (addr->ip->ip_equals(addr->ip, vip))
1272 {
1273 addr->virtual = TRUE;
1274 }
1275 }
1276 addrs->destroy(addrs);
1277 /* during IKEv1 reauthentication, children get moved from
1278 * old the new SA before the virtual IP is available. This
1279 * kills the route for our virtual IP, reinstall. */
1280 queue_route_reinstall(this, strdup(iface->ifname));
1281 break;
1282 }
1283 }
1284 ifaces->destroy(ifaces);
1285 /* lets do this while holding the lock, thus preventing another thread
1286 * from deleting the TUN device concurrently, hopefully listeners are quick
1287 * and cause no deadlocks */
1288 charon->kernel->tun(charon->kernel, tun, TRUE);
1289 this->lock->unlock(this->lock);
1290
1291 return SUCCESS;
1292 }
1293
1294 METHOD(kernel_net_t, del_ip, status_t,
1295 private_kernel_pfroute_net_t *this, host_t *vip, int prefix,
1296 bool wait)
1297 {
1298 enumerator_t *enumerator;
1299 tun_device_t *tun;
1300 host_t *addr;
1301 bool timeout = FALSE, found = FALSE;
1302
1303 if (!this->install_virtual_ip)
1304 { /* disabled by config */
1305 return SUCCESS;
1306 }
1307
1308 this->lock->write_lock(this->lock);
1309 enumerator = this->tuns->create_enumerator(this->tuns);
1310 while (enumerator->enumerate(enumerator, &tun))
1311 {
1312 addr = tun->get_address(tun, NULL);
1313 if (addr && addr->ip_equals(addr, vip))
1314 {
1315 this->tuns->remove_at(this->tuns, enumerator);
1316 charon->kernel->tun(charon->kernel, tun, FALSE);
1317 tun->destroy(tun);
1318 found = TRUE;
1319 break;
1320 }
1321 }
1322 enumerator->destroy(enumerator);
1323 this->lock->unlock(this->lock);
1324
1325 if (!found)
1326 {
1327 return NOT_FOUND;
1328 }
1329 /* wait until address disappears */
1330 if (wait)
1331 {
1332 this->mutex->lock(this->mutex);
1333 while (!timeout && get_interface_name(this, vip, NULL))
1334 {
1335 timeout = this->condvar->timed_wait(this->condvar, this->mutex,
1336 this->vip_wait);
1337 }
1338 this->mutex->unlock(this->mutex);
1339 if (timeout)
1340 {
1341 DBG1(DBG_KNL, "virtual IP %H did not disappear from tun", vip);
1342 return FAILED;
1343 }
1344 }
1345 return SUCCESS;
1346 }
1347
1348 /**
1349 * Append a sockaddr_in/in6 of given type to routing message
1350 */
1351 static void add_rt_addr(struct rt_msghdr *hdr, int type, host_t *addr)
1352 {
1353 if (addr)
1354 {
1355 int len;
1356
1357 len = *addr->get_sockaddr_len(addr);
1358 memcpy((char*)hdr + hdr->rtm_msglen, addr->get_sockaddr(addr), len);
1359 hdr->rtm_msglen += SA_LEN(len);
1360 hdr->rtm_addrs |= type;
1361 }
1362 }
1363
1364 /**
1365 * Append a subnet mask sockaddr using the given prefix to routing message
1366 */
1367 static void add_rt_mask(struct rt_msghdr *hdr, int type, int family, int prefix)
1368 {
1369 host_t *mask;
1370
1371 mask = host_create_netmask(family, prefix);
1372 if (mask)
1373 {
1374 add_rt_addr(hdr, type, mask);
1375 mask->destroy(mask);
1376 }
1377 }
1378
1379 /**
1380 * Append an interface name sockaddr_dl to routing message
1381 */
1382 static void add_rt_ifname(struct rt_msghdr *hdr, int type, char *name)
1383 {
1384 struct sockaddr_dl sdl = {
1385 .sdl_len = sizeof(struct sockaddr_dl),
1386 .sdl_family = AF_LINK,
1387 .sdl_nlen = strlen(name),
1388 };
1389
1390 if (strlen(name) <= sizeof(sdl.sdl_data))
1391 {
1392 memcpy(sdl.sdl_data, name, sdl.sdl_nlen);
1393 memcpy((char*)hdr + hdr->rtm_msglen, &sdl, sdl.sdl_len);
1394 hdr->rtm_msglen += SA_LEN(sdl.sdl_len);
1395 hdr->rtm_addrs |= type;
1396 }
1397 }
1398
1399 /**
1400 * Add or remove a route
1401 */
1402 static status_t manage_route(private_kernel_pfroute_net_t *this, int op,
1403 chunk_t dst_net, uint8_t prefixlen,
1404 host_t *gateway, char *if_name)
1405 {
1406 struct {
1407 struct rt_msghdr hdr;
1408 char buf[sizeof(struct sockaddr_storage) * RTAX_MAX];
1409 } msg = {
1410 .hdr = {
1411 .rtm_version = RTM_VERSION,
1412 .rtm_type = op,
1413 .rtm_flags = RTF_UP | RTF_STATIC,
1414 .rtm_pid = this->pid,
1415 .rtm_seq = ref_get(&this->seq),
1416 },
1417 };
1418 host_t *dst;
1419 int type;
1420
1421 if (prefixlen == 0 && dst_net.len)
1422 {
1423 status_t status;
1424 chunk_t half;
1425
1426 half = chunk_clonea(dst_net);
1427 half.ptr[0] |= 0x80;
1428 prefixlen = 1;
1429 status = manage_route(this, op, half, prefixlen, gateway, if_name);
1430 if (status != SUCCESS)
1431 {
1432 return status;
1433 }
1434 }
1435
1436 dst = host_create_from_chunk(AF_UNSPEC, dst_net, 0);
1437 if (!dst)
1438 {
1439 return FAILED;
1440 }
1441
1442 if ((dst->get_family(dst) == AF_INET && prefixlen == 32) ||
1443 (dst->get_family(dst) == AF_INET6 && prefixlen == 128))
1444 {
1445 msg.hdr.rtm_flags |= RTF_HOST | RTF_GATEWAY;
1446 }
1447
1448 msg.hdr.rtm_msglen = sizeof(struct rt_msghdr);
1449 for (type = 0; type < RTAX_MAX; type++)
1450 {
1451 switch (type)
1452 {
1453 case RTAX_DST:
1454 add_rt_addr(&msg.hdr, RTA_DST, dst);
1455 break;
1456 case RTAX_NETMASK:
1457 if (!(msg.hdr.rtm_flags & RTF_HOST))
1458 {
1459 add_rt_mask(&msg.hdr, RTA_NETMASK,
1460 dst->get_family(dst), prefixlen);
1461 }
1462 break;
1463 case RTAX_IFP:
1464 if (if_name)
1465 {
1466 add_rt_ifname(&msg.hdr, RTA_IFP, if_name);
1467 }
1468 break;
1469 case RTAX_GATEWAY:
1470 if (gateway &&
1471 gateway->get_family(gateway) == dst->get_family(dst))
1472 {
1473 add_rt_addr(&msg.hdr, RTA_GATEWAY, gateway);
1474 }
1475 break;
1476 default:
1477 break;
1478 }
1479 }
1480 dst->destroy(dst);
1481
1482 if (send(this->socket, &msg, msg.hdr.rtm_msglen, 0) != msg.hdr.rtm_msglen)
1483 {
1484 if (errno == EEXIST)
1485 {
1486 return ALREADY_DONE;
1487 }
1488 DBG1(DBG_KNL, "%s PF_ROUTE route failed: %s",
1489 op == RTM_ADD ? "adding" : "deleting", strerror(errno));
1490 return FAILED;
1491 }
1492 return SUCCESS;
1493 }
1494
1495 METHOD(kernel_net_t, add_route, status_t,
1496 private_kernel_pfroute_net_t *this, chunk_t dst_net, uint8_t prefixlen,
1497 host_t *gateway, host_t *src_ip, char *if_name, bool pass)
1498 {
1499 status_t status;
1500 route_entry_t *found, route = {
1501 .dst_net = dst_net,
1502 .prefixlen = prefixlen,
1503 .gateway = gateway,
1504 .if_name = if_name,
1505 };
1506
1507 this->routes_lock->lock(this->routes_lock);
1508 found = this->routes->get(this->routes, &route);
1509 if (found)
1510 {
1511 this->routes_lock->unlock(this->routes_lock);
1512 return ALREADY_DONE;
1513 }
1514 status = manage_route(this, RTM_ADD, dst_net, prefixlen, gateway, if_name);
1515 if (status == SUCCESS)
1516 {
1517 found = route_entry_clone(&route);
1518 this->routes->put(this->routes, found, found);
1519 }
1520 this->routes_lock->unlock(this->routes_lock);
1521 return status;
1522 }
1523
1524 METHOD(kernel_net_t, del_route, status_t,
1525 private_kernel_pfroute_net_t *this, chunk_t dst_net, uint8_t prefixlen,
1526 host_t *gateway, host_t *src_ip, char *if_name, bool pass)
1527 {
1528 status_t status;
1529 route_entry_t *found, route = {
1530 .dst_net = dst_net,
1531 .prefixlen = prefixlen,
1532 .gateway = gateway,
1533 .if_name = if_name,
1534 };
1535
1536 this->routes_lock->lock(this->routes_lock);
1537 found = this->routes->get(this->routes, &route);
1538 if (!found)
1539 {
1540 this->routes_lock->unlock(this->routes_lock);
1541 return NOT_FOUND;
1542 }
1543 this->routes->remove(this->routes, found);
1544 route_entry_destroy(found);
1545 status = manage_route(this, RTM_DELETE, dst_net, prefixlen, gateway,
1546 if_name);
1547 this->routes_lock->unlock(this->routes_lock);
1548 return status;
1549 }
1550
1551 /**
1552 * Do a route lookup for dest and return either the nexthop or the source
1553 * address.
1554 */
1555 static host_t *get_route(private_kernel_pfroute_net_t *this, bool nexthop,
1556 host_t *dest, host_t *src, char **iface)
1557 {
1558 struct {
1559 struct rt_msghdr hdr;
1560 char buf[sizeof(struct sockaddr_storage) * RTAX_MAX];
1561 } msg = {
1562 .hdr = {
1563 .rtm_version = RTM_VERSION,
1564 .rtm_type = RTM_GET,
1565 .rtm_pid = this->pid,
1566 .rtm_seq = ref_get(&this->seq),
1567 },
1568 };
1569 host_t *host = NULL;
1570 enumerator_t *enumerator;
1571 struct sockaddr *addr;
1572 bool failed = FALSE;
1573 int type;
1574
1575 retry:
1576 msg.hdr.rtm_msglen = sizeof(struct rt_msghdr);
1577 for (type = 0; type < RTAX_MAX; type++)
1578 {
1579 switch (type)
1580 {
1581 case RTAX_DST:
1582 add_rt_addr(&msg.hdr, RTA_DST, dest);
1583 break;
1584 case RTAX_IFA:
1585 add_rt_addr(&msg.hdr, RTA_IFA, src);
1586 break;
1587 case RTAX_IFP:
1588 if (!nexthop)
1589 { /* add an empty IFP to ensure we get a source address */
1590 add_rt_ifname(&msg.hdr, RTA_IFP, "");
1591 }
1592 break;
1593 default:
1594 break;
1595 }
1596 }
1597 this->mutex->lock(this->mutex);
1598
1599 while (this->waiting_seq)
1600 {
1601 this->condvar->wait(this->condvar, this->mutex);
1602 }
1603 this->waiting_seq = msg.hdr.rtm_seq;
1604 if (send(this->socket, &msg, msg.hdr.rtm_msglen, 0) == msg.hdr.rtm_msglen)
1605 {
1606 while (TRUE)
1607 {
1608 if (this->condvar->timed_wait(this->condvar, this->mutex, 1000))
1609 { /* timed out? */
1610 break;
1611 }
1612 if (!this->reply)
1613 {
1614 continue;
1615 }
1616 enumerator = create_rtmsg_enumerator(this->reply);
1617 while (enumerator->enumerate(enumerator, &type, &addr))
1618 {
1619 if (nexthop)
1620 {
1621 if (type == RTAX_DST && this->reply->rtm_flags & RTF_HOST)
1622 { /* probably a cloned/cached direct route, only use that
1623 * as fallback if no gateway is found */
1624 host = host ?: host_create_from_sockaddr(addr);
1625 }
1626 if (type == RTAX_GATEWAY)
1627 { /* could actually be a MAC address */
1628 host_t *gtw = host_create_from_sockaddr(addr);
1629 if (gtw)
1630 {
1631 DESTROY_IF(host);
1632 host = gtw;
1633 }
1634 }
1635 if (type == RTAX_IFP && addr->sa_family == AF_LINK)
1636 {
1637 struct sockaddr_dl *sdl = (struct sockaddr_dl*)addr;
1638 if (iface)
1639 {
1640 free(*iface);
1641 *iface = strndup(sdl->sdl_data, sdl->sdl_nlen);
1642 }
1643 }
1644 }
1645 else
1646 {
1647 if (type == RTAX_IFA)
1648 {
1649 host = host_create_from_sockaddr(addr);
1650 }
1651 }
1652 }
1653 enumerator->destroy(enumerator);
1654 break;
1655 }
1656 }
1657 else
1658 {
1659 failed = TRUE;
1660 }
1661 free(this->reply);
1662 this->reply = NULL;
1663 /* signal completion of query to a waiting thread */
1664 this->waiting_seq = 0;
1665 this->condvar->signal(this->condvar);
1666 this->mutex->unlock(this->mutex);
1667
1668 if (failed)
1669 {
1670 if (src)
1671 { /* the given source address might be gone, try again without */
1672 src = NULL;
1673 msg.hdr.rtm_seq = ref_get(&this->seq);
1674 msg.hdr.rtm_addrs = 0;
1675 memset(msg.buf, 0, sizeof(msg.buf));
1676 goto retry;
1677 }
1678 DBG1(DBG_KNL, "PF_ROUTE lookup failed: %s", strerror(errno));
1679 }
1680 if (nexthop)
1681 {
1682 host = host ?: dest->clone(dest);
1683 }
1684 else
1685 { /* make sure the source address is not virtual and usable */
1686 addr_entry_t *entry, lookup = {
1687 .ip = host,
1688 };
1689
1690 if (!host)
1691 {
1692 return NULL;
1693 }
1694 this->lock->read_lock(this->lock);
1695 entry = this->addrs->get_match(this->addrs, &lookup,
1696 (void*)addr_map_entry_match_up_and_usable);
1697 this->lock->unlock(this->lock);
1698 if (!entry)
1699 {
1700 host->destroy(host);
1701 return NULL;
1702 }
1703 }
1704 DBG2(DBG_KNL, "using %H as %s to reach %H", host,
1705 nexthop ? "nexthop" : "address", dest);
1706 return host;
1707 }
1708
1709 METHOD(kernel_net_t, get_source_addr, host_t*,
1710 private_kernel_pfroute_net_t *this, host_t *dest, host_t *src)
1711 {
1712 return get_route(this, FALSE, dest, src, NULL);
1713 }
1714
1715 METHOD(kernel_net_t, get_nexthop, host_t*,
1716 private_kernel_pfroute_net_t *this, host_t *dest, int prefix, host_t *src,
1717 char **iface)
1718 {
1719 if (iface)
1720 {
1721 *iface = NULL;
1722 }
1723 return get_route(this, TRUE, dest, src, iface);
1724 }
1725
1726 /**
1727 * Get the number of set bits in the given netmask
1728 */
1729 static uint8_t sockaddr_to_netmask(sockaddr_t *sockaddr, host_t *dst)
1730 {
1731 uint8_t len = 0, i, byte, mask = 0;
1732 struct sockaddr_storage ss;
1733 char *addr;
1734
1735 /* at least some older FreeBSD versions send us shorter sockaddrs
1736 * with the family set to -1 (255) */
1737 if (sockaddr->sa_family == 255)
1738 {
1739 memset(&ss, 0, sizeof(ss));
1740 memcpy(&ss, sockaddr, sockaddr->sa_len);
1741 /* use the address family and length of the destination as hint */
1742 ss.ss_len = *dst->get_sockaddr_len(dst);
1743 ss.ss_family = dst->get_family(dst);
1744 sockaddr = (sockaddr_t*)&ss;
1745 }
1746
1747 switch (sockaddr->sa_family)
1748 {
1749 case AF_INET:
1750 len = 4;
1751 addr = (char*)&((struct sockaddr_in*)sockaddr)->sin_addr;
1752 break;
1753 case AF_INET6:
1754 len = 16;
1755 addr = (char*)&((struct sockaddr_in6*)sockaddr)->sin6_addr;
1756 break;
1757 default:
1758 break;
1759 }
1760
1761 for (i = 0; i < len; i++)
1762 {
1763 byte = addr[i];
1764
1765 if (byte == 0x00)
1766 {
1767 break;
1768 }
1769 if (byte == 0xff)
1770 {
1771 mask += 8;
1772 }
1773 else
1774 {
1775 while (byte & 0x80)
1776 {
1777 mask++;
1778 byte <<= 1;
1779 }
1780 }
1781 }
1782 return mask;
1783 }
1784
1785 /** enumerator over subnets */
1786 typedef struct {
1787 enumerator_t public;
1788 /** sysctl result */
1789 char *buf;
1790 /** length of the complete result */
1791 size_t len;
1792 /** start of the current route entry */
1793 char *current;
1794 /** last subnet enumerated */
1795 host_t *net;
1796 /** interface of current net */
1797 char *ifname;
1798 } subnet_enumerator_t;
1799
1800 METHOD(enumerator_t, destroy_subnet_enumerator, void,
1801 subnet_enumerator_t *this)
1802 {
1803 DESTROY_IF(this->net);
1804 free(this->ifname);
1805 free(this->buf);
1806 free(this);
1807 }
1808
1809 METHOD(enumerator_t, enumerate_subnets, bool,
1810 subnet_enumerator_t *this, va_list args)
1811 {
1812 enumerator_t *enumerator;
1813 host_t **net;
1814 struct rt_msghdr *rtm;
1815 struct sockaddr *addr;
1816 uint8_t *mask;
1817 char **ifname;
1818 int type;
1819
1820 VA_ARGS_VGET(args, net, mask, ifname);
1821
1822 if (!this->current)
1823 {
1824 this->current = this->buf;
1825 }
1826 else
1827 {
1828 rtm = (struct rt_msghdr*)this->current;
1829 this->current += rtm->rtm_msglen;
1830 DESTROY_IF(this->net);
1831 this->net = NULL;
1832 free(this->ifname);
1833 this->ifname = NULL;
1834 }
1835
1836 for (; this->current < this->buf + this->len;
1837 this->current += rtm->rtm_msglen)
1838 {
1839 struct sockaddr *netmask = NULL;
1840 uint8_t netbits = 0;
1841
1842 rtm = (struct rt_msghdr*)this->current;
1843
1844 if (rtm->rtm_version != RTM_VERSION)
1845 {
1846 continue;
1847 }
1848 if (rtm->rtm_flags & RTF_GATEWAY ||
1849 rtm->rtm_flags & RTF_HOST ||
1850 rtm->rtm_flags & RTF_REJECT)
1851 {
1852 continue;
1853 }
1854 enumerator = create_rtmsg_enumerator(rtm);
1855 while (enumerator->enumerate(enumerator, &type, &addr))
1856 {
1857 if (type == RTAX_DST)
1858 {
1859 this->net = this->net ?: host_create_from_sockaddr(addr);
1860 }
1861 if (type == RTAX_NETMASK)
1862 {
1863 netmask = addr;
1864 }
1865 if (type == RTAX_IFP && addr->sa_family == AF_LINK)
1866 {
1867 struct sockaddr_dl *sdl = (struct sockaddr_dl*)addr;
1868 free(this->ifname);
1869 this->ifname = strndup(sdl->sdl_data, sdl->sdl_nlen);
1870 }
1871 }
1872 if (this->net && netmask)
1873 {
1874 netbits = sockaddr_to_netmask(netmask, this->net);
1875 }
1876 enumerator->destroy(enumerator);
1877
1878 if (this->net && this->ifname)
1879 {
1880 *net = this->net;
1881 *mask = netbits ?: this->net->get_address(this->net).len * 8;
1882 *ifname = this->ifname;
1883 return TRUE;
1884 }
1885 }
1886 return FALSE;
1887 }
1888
1889 METHOD(kernel_net_t, create_local_subnet_enumerator, enumerator_t*,
1890 private_kernel_pfroute_net_t *this)
1891 {
1892 subnet_enumerator_t *enumerator;
1893 char *buf;
1894 size_t len;
1895 int mib[7] = {
1896 CTL_NET, PF_ROUTE, 0, AF_UNSPEC, NET_RT_DUMP, 0, 0
1897 };
1898
1899 if (sysctl(mib, countof(mib), NULL, &len, NULL, 0) < 0)
1900 {
1901 DBG2(DBG_KNL, "enumerating local subnets failed");
1902 return enumerator_create_empty();
1903 }
1904 buf = malloc(len);
1905 if (sysctl(mib, countof(mib), buf, &len, NULL, 0) < 0)
1906 {
1907 DBG2(DBG_KNL, "enumerating local subnets failed");
1908 free(buf);
1909 return enumerator_create_empty();
1910 }
1911
1912 INIT(enumerator,
1913 .public = {
1914 .enumerate = enumerator_enumerate_default,
1915 .venumerate = _enumerate_subnets,
1916 .destroy = _destroy_subnet_enumerator,
1917 },
1918 .buf = buf,
1919 .len = len,
1920 );
1921 return &enumerator->public;
1922 }
1923
1924 /**
1925 * Initialize a list of local addresses.
1926 */
1927 static status_t init_address_list(private_kernel_pfroute_net_t *this)
1928 {
1929 struct ifaddrs *ifap, *ifa;
1930 iface_entry_t *iface, *current;
1931 addr_entry_t *addr;
1932 enumerator_t *ifaces, *addrs;
1933
1934 DBG2(DBG_KNL, "known interfaces and IP addresses:");
1935
1936 if (getifaddrs(&ifap) < 0)
1937 {
1938 DBG1(DBG_KNL, " failed to get interfaces!");
1939 return FAILED;
1940 }
1941
1942 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next)
1943 {
1944 if (ifa->ifa_addr == NULL)
1945 {
1946 continue;
1947 }
1948 switch(ifa->ifa_addr->sa_family)
1949 {
1950 case AF_LINK:
1951 case AF_INET:
1952 case AF_INET6:
1953 {
1954 iface = NULL;
1955 ifaces = this->ifaces->create_enumerator(this->ifaces);
1956 while (ifaces->enumerate(ifaces, &current))
1957 {
1958 if (streq(current->ifname, ifa->ifa_name))
1959 {
1960 iface = current;
1961 break;
1962 }
1963 }
1964 ifaces->destroy(ifaces);
1965
1966 if (!iface)
1967 {
1968 INIT(iface,
1969 .ifindex = if_nametoindex(ifa->ifa_name),
1970 .flags = ifa->ifa_flags,
1971 .addrs = linked_list_create(),
1972 .usable = charon->kernel->is_interface_usable(
1973 charon->kernel, ifa->ifa_name),
1974 );
1975 memcpy(iface->ifname, ifa->ifa_name, IFNAMSIZ);
1976 this->ifaces->insert_last(this->ifaces, iface);
1977 }
1978
1979 if (ifa->ifa_addr->sa_family != AF_LINK)
1980 {
1981 INIT(addr,
1982 .ip = host_create_from_sockaddr(ifa->ifa_addr),
1983 );
1984 iface->addrs->insert_last(iface->addrs, addr);
1985 addr_map_entry_add(this, addr, iface);
1986 }
1987 }
1988 }
1989 }
1990 freeifaddrs(ifap);
1991
1992 ifaces = this->ifaces->create_enumerator(this->ifaces);
1993 while (ifaces->enumerate(ifaces, &iface))
1994 {
1995 if (iface->usable && iface->flags & IFF_UP)
1996 {
1997 DBG2(DBG_KNL, " %s", iface->ifname);
1998 addrs = iface->addrs->create_enumerator(iface->addrs);
1999 while (addrs->enumerate(addrs, (void**)&addr))
2000 {
2001 DBG2(DBG_KNL, " %H", addr->ip);
2002 }
2003 addrs->destroy(addrs);
2004 }
2005 }
2006 ifaces->destroy(ifaces);
2007
2008 return SUCCESS;
2009 }
2010
2011 METHOD(kernel_net_t, destroy, void,
2012 private_kernel_pfroute_net_t *this)
2013 {
2014 enumerator_t *enumerator;
2015 route_entry_t *route;
2016 addr_entry_t *addr;
2017
2018 enumerator = this->routes->create_enumerator(this->routes);
2019 while (enumerator->enumerate(enumerator, NULL, (void**)&route))
2020 {
2021 manage_route(this, RTM_DELETE, route->dst_net, route->prefixlen,
2022 route->gateway, route->if_name);
2023 route_entry_destroy(route);
2024 }
2025 enumerator->destroy(enumerator);
2026 this->routes->destroy(this->routes);
2027 this->routes_lock->destroy(this->routes_lock);
2028
2029 if (this->socket != -1)
2030 {
2031 lib->watcher->remove(lib->watcher, this->socket);
2032 close(this->socket);
2033 }
2034
2035 net_changes_clear(this);
2036 this->net_changes->destroy(this->net_changes);
2037 this->net_changes_lock->destroy(this->net_changes_lock);
2038
2039 enumerator = this->addrs->create_enumerator(this->addrs);
2040 while (enumerator->enumerate(enumerator, NULL, (void**)&addr))
2041 {
2042 free(addr);
2043 }
2044 enumerator->destroy(enumerator);
2045 this->addrs->destroy(this->addrs);
2046 this->ifaces->destroy_function(this->ifaces, (void*)iface_entry_destroy);
2047 this->tuns->destroy(this->tuns);
2048 this->lock->destroy(this->lock);
2049 this->mutex->destroy(this->mutex);
2050 this->condvar->destroy(this->condvar);
2051 this->roam_lock->destroy(this->roam_lock);
2052 free(this->reply);
2053 free(this);
2054 }
2055
2056 /*
2057 * Described in header.
2058 */
2059 kernel_pfroute_net_t *kernel_pfroute_net_create()
2060 {
2061 private_kernel_pfroute_net_t *this;
2062
2063 INIT(this,
2064 .public = {
2065 .interface = {
2066 .get_features = _get_features,
2067 .get_interface = _get_interface_name,
2068 .create_address_enumerator = _create_address_enumerator,
2069 .create_local_subnet_enumerator = _create_local_subnet_enumerator,
2070 .get_source_addr = _get_source_addr,
2071 .get_nexthop = _get_nexthop,
2072 .add_ip = _add_ip,
2073 .del_ip = _del_ip,
2074 .add_route = _add_route,
2075 .del_route = _del_route,
2076 .destroy = _destroy,
2077 },
2078 },
2079 .pid = getpid(),
2080 .ifaces = linked_list_create(),
2081 .addrs = hashtable_create(
2082 (hashtable_hash_t)addr_map_entry_hash,
2083 (hashtable_equals_t)addr_map_entry_equals, 16),
2084 .routes = hashtable_create((hashtable_hash_t)route_entry_hash,
2085 (hashtable_equals_t)route_entry_equals, 16),
2086 .net_changes = hashtable_create(
2087 (hashtable_hash_t)net_change_hash,
2088 (hashtable_equals_t)net_change_equals, 16),
2089 .tuns = linked_list_create(),
2090 .lock = rwlock_create(RWLOCK_TYPE_DEFAULT),
2091 .mutex = mutex_create(MUTEX_TYPE_DEFAULT),
2092 .condvar = condvar_create(CONDVAR_TYPE_DEFAULT),
2093 .routes_lock = mutex_create(MUTEX_TYPE_DEFAULT),
2094 .net_changes_lock = mutex_create(MUTEX_TYPE_DEFAULT),
2095 .roam_lock = spinlock_create(),
2096 .vip_wait = lib->settings->get_int(lib->settings,
2097 "%s.plugins.kernel-pfroute.vip_wait", 1000, lib->ns),
2098 .install_virtual_ip = lib->settings->get_bool(lib->settings,
2099 "%s.install_virtual_ip", TRUE, lib->ns),
2100 );
2101 timerclear(&this->last_route_reinstall);
2102 timerclear(&this->next_roam);
2103
2104 /* create a PF_ROUTE socket to communicate with the kernel */
2105 this->socket = socket(PF_ROUTE, SOCK_RAW, AF_UNSPEC);
2106 if (this->socket == -1)
2107 {
2108 DBG1(DBG_KNL, "unable to create PF_ROUTE socket");
2109 destroy(this);
2110 return NULL;
2111 }
2112
2113 if (streq(lib->ns, "starter"))
2114 {
2115 /* starter has no threads, so we do not register for kernel events */
2116 if (shutdown(this->socket, SHUT_RD) != 0)
2117 {
2118 DBG1(DBG_KNL, "closing read end of PF_ROUTE socket failed: %s",
2119 strerror(errno));
2120 }
2121 }
2122 else
2123 {
2124 lib->watcher->add(lib->watcher, this->socket, WATCHER_READ,
2125 (watcher_cb_t)receive_events, this);
2126 }
2127 if (init_address_list(this) != SUCCESS)
2128 {
2129 DBG1(DBG_KNL, "unable to get interface list");
2130 destroy(this);
2131 return NULL;
2132 }
2133
2134 return &this->public;
2135 }