]> git.ipfire.org Git - thirdparty/strongswan.git/blob - src/libcharon/plugins/kernel_netlink/kernel_netlink_ipsec.c
kernel-netlink: Don't install routes for CHILD_SAs with interface ID
[thirdparty/strongswan.git] / src / libcharon / plugins / kernel_netlink / kernel_netlink_ipsec.c
1 /*
2 * Copyright (C) 2006-2019 Tobias Brunner
3 * Copyright (C) 2005-2009 Martin Willi
4 * Copyright (C) 2008-2016 Andreas Steffen
5 * Copyright (C) 2006-2007 Fabian Hartmann, Noah Heusser
6 * Copyright (C) 2006 Daniel Roethlisberger
7 * Copyright (C) 2005 Jan Hutter
8 * HSR Hochschule fuer Technik Rapperswil
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; either version 2 of the License, or (at your
13 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 * for more details.
19 */
20 /*
21 * Copyright (C) 2018 Mellanox Technologies.
22 *
23 * Permission is hereby granted, free of charge, to any person obtaining a copy
24 * of this software and associated documentation files (the "Software"), to deal
25 * in the Software without restriction, including without limitation the rights
26 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
27 * copies of the Software, and to permit persons to whom the Software is
28 * furnished to do so, subject to the following conditions:
29 *
30 * The above copyright notice and this permission notice shall be included in
31 * all copies or substantial portions of the Software.
32 *
33 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
36 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
37 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
38 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
39 * THE SOFTWARE.
40 */
41
42 #define _GNU_SOURCE
43 #include <sys/types.h>
44 #include <sys/socket.h>
45 #include <sys/ioctl.h>
46 #include <stdint.h>
47 #include <linux/ipsec.h>
48 #include <linux/netlink.h>
49 #include <linux/rtnetlink.h>
50 #include <linux/xfrm.h>
51 #include <linux/udp.h>
52 #include <linux/ethtool.h>
53 #include <linux/sockios.h>
54 #include <net/if.h>
55 #include <unistd.h>
56 #include <time.h>
57 #include <errno.h>
58 #include <string.h>
59 #include <fcntl.h>
60 #include <dlfcn.h>
61
62 #include "kernel_netlink_ipsec.h"
63 #include "kernel_netlink_shared.h"
64
65 #include <daemon.h>
66 #include <utils/debug.h>
67 #include <threading/mutex.h>
68 #include <threading/condvar.h>
69 #include <collections/array.h>
70 #include <collections/hashtable.h>
71 #include <collections/linked_list.h>
72
73 /** Required for Linux 2.6.26 kernel and later */
74 #ifndef XFRM_STATE_AF_UNSPEC
75 #define XFRM_STATE_AF_UNSPEC 32
76 #endif
77
78 /** From linux/in.h */
79 #ifndef IP_XFRM_POLICY
80 #define IP_XFRM_POLICY 17
81 #endif
82
83 /** Missing on uclibc */
84 #ifndef IPV6_XFRM_POLICY
85 #define IPV6_XFRM_POLICY 34
86 #endif /*IPV6_XFRM_POLICY*/
87
88 /* from linux/udp.h */
89 #ifndef UDP_ENCAP
90 #define UDP_ENCAP 100
91 #endif
92
93 #ifndef UDP_ENCAP_ESPINUDP
94 #define UDP_ENCAP_ESPINUDP 2
95 #endif
96
97 /* this is not defined on some platforms */
98 #ifndef SOL_UDP
99 #define SOL_UDP IPPROTO_UDP
100 #endif
101
102 /** Base priority for installed policies */
103 #define PRIO_BASE 200000
104
105 /**
106 * Map the limit for bytes and packets to XFRM_INF by default
107 */
108 #define XFRM_LIMIT(x) ((x) == 0 ? XFRM_INF : (x))
109
110 /**
111 * Create ORable bitfield of XFRM NL groups
112 */
113 #define XFRMNLGRP(x) (1<<(XFRMNLGRP_##x-1))
114
115 /**
116 * Returns a pointer to the first rtattr following the nlmsghdr *nlh and the
117 * 'usual' netlink data x like 'struct xfrm_usersa_info'
118 */
119 #define XFRM_RTA(nlh, x) ((struct rtattr*)(NLMSG_DATA(nlh) + \
120 NLMSG_ALIGN(sizeof(x))))
121 /**
122 * Returns the total size of attached rta data
123 * (after 'usual' netlink data x like 'struct xfrm_usersa_info')
124 */
125 #define XFRM_PAYLOAD(nlh, x) NLMSG_PAYLOAD(nlh, sizeof(x))
126
127 typedef struct kernel_algorithm_t kernel_algorithm_t;
128
129 /**
130 * Mapping of IKEv2 kernel identifier to linux crypto API names
131 */
132 struct kernel_algorithm_t {
133 /**
134 * Identifier specified in IKEv2
135 */
136 int ikev2;
137
138 /**
139 * Name of the algorithm in linux crypto API
140 */
141 const char *name;
142 };
143
144 ENUM(xfrm_msg_names, XFRM_MSG_NEWSA, XFRM_MSG_MAPPING,
145 "XFRM_MSG_NEWSA",
146 "XFRM_MSG_DELSA",
147 "XFRM_MSG_GETSA",
148 "XFRM_MSG_NEWPOLICY",
149 "XFRM_MSG_DELPOLICY",
150 "XFRM_MSG_GETPOLICY",
151 "XFRM_MSG_ALLOCSPI",
152 "XFRM_MSG_ACQUIRE",
153 "XFRM_MSG_EXPIRE",
154 "XFRM_MSG_UPDPOLICY",
155 "XFRM_MSG_UPDSA",
156 "XFRM_MSG_POLEXPIRE",
157 "XFRM_MSG_FLUSHSA",
158 "XFRM_MSG_FLUSHPOLICY",
159 "XFRM_MSG_NEWAE",
160 "XFRM_MSG_GETAE",
161 "XFRM_MSG_REPORT",
162 "XFRM_MSG_MIGRATE",
163 "XFRM_MSG_NEWSADINFO",
164 "XFRM_MSG_GETSADINFO",
165 "XFRM_MSG_NEWSPDINFO",
166 "XFRM_MSG_GETSPDINFO",
167 "XFRM_MSG_MAPPING"
168 );
169
170 ENUM(xfrm_attr_type_names, XFRMA_UNSPEC, XFRMA_OFFLOAD_DEV,
171 "XFRMA_UNSPEC",
172 "XFRMA_ALG_AUTH",
173 "XFRMA_ALG_CRYPT",
174 "XFRMA_ALG_COMP",
175 "XFRMA_ENCAP",
176 "XFRMA_TMPL",
177 "XFRMA_SA",
178 "XFRMA_POLICY",
179 "XFRMA_SEC_CTX",
180 "XFRMA_LTIME_VAL",
181 "XFRMA_REPLAY_VAL",
182 "XFRMA_REPLAY_THRESH",
183 "XFRMA_ETIMER_THRESH",
184 "XFRMA_SRCADDR",
185 "XFRMA_COADDR",
186 "XFRMA_LASTUSED",
187 "XFRMA_POLICY_TYPE",
188 "XFRMA_MIGRATE",
189 "XFRMA_ALG_AEAD",
190 "XFRMA_KMADDRESS",
191 "XFRMA_ALG_AUTH_TRUNC",
192 "XFRMA_MARK",
193 "XFRMA_TFCPAD",
194 "XFRMA_REPLAY_ESN_VAL",
195 "XFRMA_SA_EXTRA_FLAGS",
196 "XFRMA_PROTO",
197 "XFRMA_ADDRESS_FILTER",
198 "XFRMA_PAD",
199 "XFRMA_OFFLOAD_DEV",
200 );
201
202 /**
203 * Algorithms for encryption
204 */
205 static kernel_algorithm_t encryption_algs[] = {
206 /* {ENCR_DES_IV64, "***" }, */
207 {ENCR_DES, "des" },
208 {ENCR_3DES, "des3_ede" },
209 /* {ENCR_RC5, "***" }, */
210 /* {ENCR_IDEA, "***" }, */
211 {ENCR_CAST, "cast5" },
212 {ENCR_BLOWFISH, "blowfish" },
213 /* {ENCR_3IDEA, "***" }, */
214 /* {ENCR_DES_IV32, "***" }, */
215 {ENCR_NULL, "cipher_null" },
216 {ENCR_AES_CBC, "aes" },
217 {ENCR_AES_CTR, "rfc3686(ctr(aes))" },
218 {ENCR_AES_CCM_ICV8, "rfc4309(ccm(aes))" },
219 {ENCR_AES_CCM_ICV12, "rfc4309(ccm(aes))" },
220 {ENCR_AES_CCM_ICV16, "rfc4309(ccm(aes))" },
221 {ENCR_AES_GCM_ICV8, "rfc4106(gcm(aes))" },
222 {ENCR_AES_GCM_ICV12, "rfc4106(gcm(aes))" },
223 {ENCR_AES_GCM_ICV16, "rfc4106(gcm(aes))" },
224 {ENCR_NULL_AUTH_AES_GMAC, "rfc4543(gcm(aes))" },
225 {ENCR_CAMELLIA_CBC, "cbc(camellia)" },
226 /* {ENCR_CAMELLIA_CTR, "***" }, */
227 /* {ENCR_CAMELLIA_CCM_ICV8, "***" }, */
228 /* {ENCR_CAMELLIA_CCM_ICV12, "***" }, */
229 /* {ENCR_CAMELLIA_CCM_ICV16, "***" }, */
230 {ENCR_SERPENT_CBC, "serpent" },
231 {ENCR_TWOFISH_CBC, "twofish" },
232 {ENCR_CHACHA20_POLY1305, "rfc7539esp(chacha20,poly1305)"},
233 };
234
235 /**
236 * Algorithms for integrity protection
237 */
238 static kernel_algorithm_t integrity_algs[] = {
239 {AUTH_HMAC_MD5_96, "md5" },
240 {AUTH_HMAC_MD5_128, "hmac(md5)" },
241 {AUTH_HMAC_SHA1_96, "sha1" },
242 {AUTH_HMAC_SHA1_160, "hmac(sha1)" },
243 {AUTH_HMAC_SHA2_256_96, "sha256" },
244 {AUTH_HMAC_SHA2_256_128, "hmac(sha256)" },
245 {AUTH_HMAC_SHA2_384_192, "hmac(sha384)" },
246 {AUTH_HMAC_SHA2_512_256, "hmac(sha512)" },
247 /* {AUTH_DES_MAC, "***" }, */
248 /* {AUTH_KPDK_MD5, "***" }, */
249 {AUTH_AES_XCBC_96, "xcbc(aes)" },
250 {AUTH_AES_CMAC_96, "cmac(aes)" },
251 };
252
253 /**
254 * Algorithms for IPComp
255 */
256 static kernel_algorithm_t compression_algs[] = {
257 /* {IPCOMP_OUI, "***" }, */
258 {IPCOMP_DEFLATE, "deflate" },
259 {IPCOMP_LZS, "lzs" },
260 {IPCOMP_LZJH, "lzjh" },
261 };
262
263 /**
264 * Look up a kernel algorithm name and its key size
265 */
266 static const char* lookup_algorithm(transform_type_t type, int ikev2)
267 {
268 kernel_algorithm_t *list;
269 int i, count;
270 char *name;
271
272 switch (type)
273 {
274 case ENCRYPTION_ALGORITHM:
275 list = encryption_algs;
276 count = countof(encryption_algs);
277 break;
278 case INTEGRITY_ALGORITHM:
279 list = integrity_algs;
280 count = countof(integrity_algs);
281 break;
282 case COMPRESSION_ALGORITHM:
283 list = compression_algs;
284 count = countof(compression_algs);
285 break;
286 default:
287 return NULL;
288 }
289 for (i = 0; i < count; i++)
290 {
291 if (list[i].ikev2 == ikev2)
292 {
293 return list[i].name;
294 }
295 }
296 if (charon->kernel->lookup_algorithm(charon->kernel, ikev2, type, NULL,
297 &name))
298 {
299 return name;
300 }
301 return NULL;
302 }
303
304 typedef struct private_kernel_netlink_ipsec_t private_kernel_netlink_ipsec_t;
305
306 /**
307 * Private variables and functions of kernel_netlink class.
308 */
309 struct private_kernel_netlink_ipsec_t {
310 /**
311 * Public part of the kernel_netlink_t object
312 */
313 kernel_netlink_ipsec_t public;
314
315 /**
316 * Mutex to lock access to installed policies
317 */
318 mutex_t *mutex;
319
320 /**
321 * Condvar to synchronize access to individual policies
322 */
323 condvar_t *condvar;
324
325 /**
326 * Hash table of installed policies (policy_entry_t)
327 */
328 hashtable_t *policies;
329
330 /**
331 * Hash table of IPsec SAs using policies (ipsec_sa_t)
332 */
333 hashtable_t *sas;
334
335 /**
336 * Netlink xfrm socket (IPsec)
337 */
338 netlink_socket_t *socket_xfrm;
339
340 /**
341 * Netlink xfrm socket to receive acquire and expire events
342 */
343 int socket_xfrm_events;
344
345 /**
346 * Whether to install routes along policies
347 */
348 bool install_routes;
349
350 /**
351 * Whether to set protocol and ports on selector installed with transport
352 * mode IPsec SAs
353 */
354 bool proto_port_transport;
355
356 /**
357 * Whether to always use UPDATE to install policies
358 */
359 bool policy_update;
360
361 /**
362 * Installed port based IKE bypass policies, as bypass_t
363 */
364 array_t *bypass;
365
366 /**
367 * Custom priority calculation function
368 */
369 uint32_t (*get_priority)(kernel_ipsec_policy_id_t *id,
370 kernel_ipsec_manage_policy_t *data);
371 };
372
373 typedef struct route_entry_t route_entry_t;
374
375 /**
376 * Installed routing entry
377 */
378 struct route_entry_t {
379 /** Name of the interface the route is bound to */
380 char *if_name;
381
382 /** Source ip of the route */
383 host_t *src_ip;
384
385 /** Gateway for this route */
386 host_t *gateway;
387
388 /** Destination net */
389 chunk_t dst_net;
390
391 /** Destination net prefixlen */
392 uint8_t prefixlen;
393 };
394
395 /**
396 * Destroy a route_entry_t object
397 */
398 static void route_entry_destroy(route_entry_t *this)
399 {
400 free(this->if_name);
401 this->src_ip->destroy(this->src_ip);
402 DESTROY_IF(this->gateway);
403 chunk_free(&this->dst_net);
404 free(this);
405 }
406
407 /**
408 * Compare two route_entry_t objects
409 */
410 static bool route_entry_equals(route_entry_t *a, route_entry_t *b)
411 {
412 return a->if_name && b->if_name && streq(a->if_name, b->if_name) &&
413 a->src_ip->ip_equals(a->src_ip, b->src_ip) &&
414 a->gateway->ip_equals(a->gateway, b->gateway) &&
415 chunk_equals(a->dst_net, b->dst_net) && a->prefixlen == b->prefixlen;
416 }
417
418 typedef struct ipsec_sa_t ipsec_sa_t;
419
420 /**
421 * IPsec SA assigned to a policy.
422 */
423 struct ipsec_sa_t {
424 /** Source address of this SA */
425 host_t *src;
426
427 /** Destination address of this SA */
428 host_t *dst;
429
430 /** Optional mark */
431 mark_t mark;
432
433 /** Optional mark */
434 uint32_t if_id;
435
436 /** Description of this SA */
437 ipsec_sa_cfg_t cfg;
438
439 /** Reference count for this SA */
440 refcount_t refcount;
441 };
442
443 /**
444 * Hash function for ipsec_sa_t objects
445 */
446 static u_int ipsec_sa_hash(ipsec_sa_t *sa)
447 {
448 return chunk_hash_inc(sa->src->get_address(sa->src),
449 chunk_hash_inc(sa->dst->get_address(sa->dst),
450 chunk_hash_inc(chunk_from_thing(sa->mark),
451 chunk_hash_inc(chunk_from_thing(sa->if_id),
452 chunk_hash(chunk_from_thing(sa->cfg))))));
453 }
454
455 /**
456 * Equality function for ipsec_sa_t objects
457 */
458 static bool ipsec_sa_equals(ipsec_sa_t *sa, ipsec_sa_t *other_sa)
459 {
460 return sa->src->ip_equals(sa->src, other_sa->src) &&
461 sa->dst->ip_equals(sa->dst, other_sa->dst) &&
462 sa->mark.value == other_sa->mark.value &&
463 sa->mark.mask == other_sa->mark.mask &&
464 sa->if_id == other_sa->if_id &&
465 ipsec_sa_cfg_equals(&sa->cfg, &other_sa->cfg);
466 }
467
468 /**
469 * Allocate or reference an IPsec SA object
470 */
471 static ipsec_sa_t *ipsec_sa_create(private_kernel_netlink_ipsec_t *this,
472 host_t *src, host_t *dst, mark_t mark,
473 uint32_t if_id, ipsec_sa_cfg_t *cfg)
474 {
475 ipsec_sa_t *sa, *found;
476 INIT(sa,
477 .src = src,
478 .dst = dst,
479 .mark = mark,
480 .if_id = if_id,
481 .cfg = *cfg,
482 );
483 found = this->sas->get(this->sas, sa);
484 if (!found)
485 {
486 sa->src = src->clone(src);
487 sa->dst = dst->clone(dst);
488 this->sas->put(this->sas, sa, sa);
489 }
490 else
491 {
492 free(sa);
493 sa = found;
494 }
495 ref_get(&sa->refcount);
496 return sa;
497 }
498
499 /**
500 * Release and destroy an IPsec SA object
501 */
502 static void ipsec_sa_destroy(private_kernel_netlink_ipsec_t *this,
503 ipsec_sa_t *sa)
504 {
505 if (ref_put(&sa->refcount))
506 {
507 this->sas->remove(this->sas, sa);
508 DESTROY_IF(sa->src);
509 DESTROY_IF(sa->dst);
510 free(sa);
511 }
512 }
513
514 typedef struct policy_sa_t policy_sa_t;
515 typedef struct policy_sa_out_t policy_sa_out_t;
516
517 /**
518 * Mapping between a policy and an IPsec SA.
519 */
520 struct policy_sa_t {
521 /** Priority assigned to the policy when installed with this SA */
522 uint32_t priority;
523
524 /** Automatic priority assigned to the policy when installed with this SA */
525 uint32_t auto_priority;
526
527 /** Type of the policy */
528 policy_type_t type;
529
530 /** Assigned SA */
531 ipsec_sa_t *sa;
532 };
533
534 /**
535 * For outbound policies we also cache the traffic selectors in order to install
536 * the route.
537 */
538 struct policy_sa_out_t {
539 /** Generic interface */
540 policy_sa_t generic;
541
542 /** Source traffic selector of this policy */
543 traffic_selector_t *src_ts;
544
545 /** Destination traffic selector of this policy */
546 traffic_selector_t *dst_ts;
547 };
548
549 /**
550 * Create a policy_sa(_in)_t object
551 */
552 static policy_sa_t *policy_sa_create(private_kernel_netlink_ipsec_t *this,
553 policy_dir_t dir, policy_type_t type, host_t *src, host_t *dst,
554 traffic_selector_t *src_ts, traffic_selector_t *dst_ts, mark_t mark,
555 uint32_t if_id, ipsec_sa_cfg_t *cfg)
556 {
557 policy_sa_t *policy;
558
559 if (dir == POLICY_OUT)
560 {
561 policy_sa_out_t *out;
562 INIT(out,
563 .src_ts = src_ts->clone(src_ts),
564 .dst_ts = dst_ts->clone(dst_ts),
565 );
566 policy = &out->generic;
567 }
568 else
569 {
570 INIT(policy, .priority = 0);
571 }
572 policy->type = type;
573 policy->sa = ipsec_sa_create(this, src, dst, mark, if_id, cfg);
574 return policy;
575 }
576
577 /**
578 * Destroy a policy_sa(_in)_t object
579 */
580 static void policy_sa_destroy(policy_sa_t *policy, policy_dir_t dir,
581 private_kernel_netlink_ipsec_t *this)
582 {
583 if (dir == POLICY_OUT)
584 {
585 policy_sa_out_t *out = (policy_sa_out_t*)policy;
586 out->src_ts->destroy(out->src_ts);
587 out->dst_ts->destroy(out->dst_ts);
588 }
589 ipsec_sa_destroy(this, policy->sa);
590 free(policy);
591 }
592
593 CALLBACK(policy_sa_destroy_cb, void,
594 policy_sa_t *policy, va_list args)
595 {
596 private_kernel_netlink_ipsec_t *this;
597 policy_dir_t dir;
598
599 VA_ARGS_VGET(args, dir, this);
600 policy_sa_destroy(policy, dir, this);
601 }
602
603 typedef struct policy_entry_t policy_entry_t;
604
605 /**
606 * Installed kernel policy.
607 */
608 struct policy_entry_t {
609
610 /** Direction of this policy: in, out, forward */
611 uint8_t direction;
612
613 /** Parameters of installed policy */
614 struct xfrm_selector sel;
615
616 /** Optional mark */
617 uint32_t mark;
618
619 /** Optional interface ID */
620 uint32_t if_id;
621
622 /** Associated route installed for this policy */
623 route_entry_t *route;
624
625 /** List of SAs this policy is used by, ordered by priority */
626 linked_list_t *used_by;
627
628 /** reqid for this policy */
629 uint32_t reqid;
630
631 /** Number of threads waiting to work on this policy */
632 int waiting;
633
634 /** TRUE if a thread is working on this policy */
635 bool working;
636 };
637
638 /**
639 * Destroy a policy_entry_t object
640 */
641 static void policy_entry_destroy(private_kernel_netlink_ipsec_t *this,
642 policy_entry_t *policy)
643 {
644 if (policy->route)
645 {
646 route_entry_destroy(policy->route);
647 }
648 if (policy->used_by)
649 {
650 policy->used_by->invoke_function(policy->used_by, policy_sa_destroy_cb,
651 policy->direction, this);
652 policy->used_by->destroy(policy->used_by);
653 }
654 free(policy);
655 }
656
657 /**
658 * Hash function for policy_entry_t objects
659 */
660 static u_int policy_hash(policy_entry_t *key)
661 {
662 chunk_t chunk = chunk_from_thing(key->sel);
663 return chunk_hash_inc(chunk, chunk_hash_inc(chunk_from_thing(key->mark),
664 chunk_hash(chunk_from_thing(key->if_id))));
665 }
666
667 /**
668 * Equality function for policy_entry_t objects
669 */
670 static bool policy_equals(policy_entry_t *key, policy_entry_t *other_key)
671 {
672 return memeq(&key->sel, &other_key->sel, sizeof(struct xfrm_selector)) &&
673 key->mark == other_key->mark &&
674 key->if_id == other_key->if_id &&
675 key->direction == other_key->direction;
676 }
677
678 /**
679 * Determine number of set bits in 16 bit port mask
680 */
681 static inline uint32_t port_mask_bits(uint16_t port_mask)
682 {
683 uint32_t bits;
684 uint16_t bit_mask = 0x8000;
685
686 port_mask = ntohs(port_mask);
687
688 for (bits = 0; bits < 16; bits++)
689 {
690 if (!(port_mask & bit_mask))
691 {
692 break;
693 }
694 bit_mask >>= 1;
695 }
696 return bits;
697 }
698
699 /**
700 * Calculate the priority of a policy
701 *
702 * bits 0-0: separate trap and regular policies (0..1) 1 bit
703 * bits 1-1: restriction to network interface (0..1) 1 bit
704 * bits 2-7: src + dst port mask bits (2 * 0..16) 6 bits
705 * bits 8-8: restriction to protocol (0..1) 1 bit
706 * bits 9-17: src + dst network mask bits (2 * 0..128) 9 bits
707 * 18 bits
708 *
709 * smallest value: 000000000 0 000000 0 0: 0, lowest priority = 200'000
710 * largest value : 100000000 1 100000 1 1: 131'459, highst priority = 68'541
711 */
712 static uint32_t get_priority(policy_entry_t *policy, policy_priority_t prio,
713 char *interface)
714 {
715 uint32_t priority = PRIO_BASE, sport_mask_bits, dport_mask_bits;
716
717 switch (prio)
718 {
719 case POLICY_PRIORITY_FALLBACK:
720 priority += PRIO_BASE;
721 /* fall-through to next case */
722 case POLICY_PRIORITY_ROUTED:
723 case POLICY_PRIORITY_DEFAULT:
724 priority += PRIO_BASE;
725 /* fall-through to next case */
726 case POLICY_PRIORITY_PASS:
727 break;
728 }
729 sport_mask_bits = port_mask_bits(policy->sel.sport_mask);
730 dport_mask_bits = port_mask_bits(policy->sel.dport_mask);
731
732 /* calculate priority */
733 priority -= (policy->sel.prefixlen_s + policy->sel.prefixlen_d) * 512;
734 priority -= policy->sel.proto ? 256 : 0;
735 priority -= (sport_mask_bits + dport_mask_bits) * 4;
736 priority -= (interface != NULL) * 2;
737 priority -= (prio != POLICY_PRIORITY_ROUTED);
738
739 return priority;
740 }
741
742 /**
743 * Convert the general ipsec mode to the one defined in xfrm.h
744 */
745 static uint8_t mode2kernel(ipsec_mode_t mode)
746 {
747 switch (mode)
748 {
749 case MODE_TRANSPORT:
750 return XFRM_MODE_TRANSPORT;
751 case MODE_TUNNEL:
752 return XFRM_MODE_TUNNEL;
753 case MODE_BEET:
754 return XFRM_MODE_BEET;
755 default:
756 return mode;
757 }
758 }
759
760 /**
761 * Convert a host_t to a struct xfrm_address
762 */
763 static void host2xfrm(host_t *host, xfrm_address_t *xfrm)
764 {
765 chunk_t chunk = host->get_address(host);
766 memcpy(xfrm, chunk.ptr, min(chunk.len, sizeof(xfrm_address_t)));
767 }
768
769 /**
770 * Convert a struct xfrm_address to a host_t
771 */
772 static host_t* xfrm2host(int family, xfrm_address_t *xfrm, uint16_t port)
773 {
774 chunk_t chunk;
775
776 switch (family)
777 {
778 case AF_INET:
779 chunk = chunk_create((u_char*)&xfrm->a4, sizeof(xfrm->a4));
780 break;
781 case AF_INET6:
782 chunk = chunk_create((u_char*)&xfrm->a6, sizeof(xfrm->a6));
783 break;
784 default:
785 return NULL;
786 }
787 return host_create_from_chunk(family, chunk, ntohs(port));
788 }
789
790 /**
791 * Convert a traffic selector address range to subnet and its mask.
792 */
793 static void ts2subnet(traffic_selector_t* ts,
794 xfrm_address_t *net, uint8_t *mask)
795 {
796 host_t *net_host;
797 chunk_t net_chunk;
798
799 ts->to_subnet(ts, &net_host, mask);
800 net_chunk = net_host->get_address(net_host);
801 memcpy(net, net_chunk.ptr, net_chunk.len);
802 net_host->destroy(net_host);
803 }
804
805 /**
806 * Convert a traffic selector port range to port/portmask
807 */
808 static void ts2ports(traffic_selector_t* ts,
809 uint16_t *port, uint16_t *mask)
810 {
811 uint16_t from, to, bitmask;
812 int bit;
813
814 from = ts->get_from_port(ts);
815 to = ts->get_to_port(ts);
816
817 /* Quick check for a single port */
818 if (from == to)
819 {
820 *port = htons(from);
821 *mask = ~0;
822 }
823 else
824 {
825 /* Compute the port mask for port ranges */
826 *mask = 0;
827
828 for (bit = 15; bit >= 0; bit--)
829 {
830 bitmask = 1 << bit;
831
832 if ((bitmask & from) != (bitmask & to))
833 {
834 *port = htons(from & *mask);
835 *mask = htons(*mask);
836 return;
837 }
838 *mask |= bitmask;
839 }
840 }
841 return;
842 }
843
844 /**
845 * Convert a pair of traffic_selectors to an xfrm_selector
846 */
847 static struct xfrm_selector ts2selector(traffic_selector_t *src,
848 traffic_selector_t *dst,
849 char *interface)
850 {
851 struct xfrm_selector sel;
852 uint16_t port;
853
854 memset(&sel, 0, sizeof(sel));
855 sel.family = (src->get_type(src) == TS_IPV4_ADDR_RANGE) ? AF_INET : AF_INET6;
856 /* src or dest proto may be "any" (0), use more restrictive one */
857 sel.proto = max(src->get_protocol(src), dst->get_protocol(dst));
858 ts2subnet(dst, &sel.daddr, &sel.prefixlen_d);
859 ts2subnet(src, &sel.saddr, &sel.prefixlen_s);
860 ts2ports(dst, &sel.dport, &sel.dport_mask);
861 ts2ports(src, &sel.sport, &sel.sport_mask);
862 if ((sel.proto == IPPROTO_ICMP || sel.proto == IPPROTO_ICMPV6) &&
863 (sel.dport || sel.sport))
864 {
865 /* the kernel expects the ICMP type and code in the source and
866 * destination port fields, respectively. */
867 port = ntohs(max(sel.dport, sel.sport));
868 sel.sport = htons(traffic_selector_icmp_type(port));
869 sel.sport_mask = sel.sport ? ~0 : 0;
870 sel.dport = htons(traffic_selector_icmp_code(port));
871 sel.dport_mask = sel.dport ? ~0 : 0;
872 }
873 sel.ifindex = interface ? if_nametoindex(interface) : 0;
874 sel.user = 0;
875
876 return sel;
877 }
878
879 /**
880 * Convert an xfrm_selector to a src|dst traffic_selector
881 */
882 static traffic_selector_t* selector2ts(struct xfrm_selector *sel, bool src)
883 {
884 u_char *addr;
885 uint8_t prefixlen;
886 uint16_t port = 0;
887 host_t *host = NULL;
888
889 if (src)
890 {
891 addr = (u_char*)&sel->saddr;
892 prefixlen = sel->prefixlen_s;
893 if (sel->sport_mask)
894 {
895 port = ntohs(sel->sport);
896 }
897 }
898 else
899 {
900 addr = (u_char*)&sel->daddr;
901 prefixlen = sel->prefixlen_d;
902 if (sel->dport_mask)
903 {
904 port = ntohs(sel->dport);
905 }
906 }
907 if (sel->proto == IPPROTO_ICMP || sel->proto == IPPROTO_ICMPV6)
908 { /* convert ICMP[v6] message type and code as supplied by the kernel in
909 * source and destination ports (both in network order) */
910 port = (sel->sport >> 8) | (sel->dport & 0xff00);
911 port = ntohs(port);
912 }
913 /* The Linux 2.6 kernel does not set the selector's family field,
914 * so as a kludge we additionally test the prefix length.
915 */
916 if (sel->family == AF_INET || sel->prefixlen_s == 32)
917 {
918 host = host_create_from_chunk(AF_INET, chunk_create(addr, 4), 0);
919 }
920 else if (sel->family == AF_INET6 || sel->prefixlen_s == 128)
921 {
922 host = host_create_from_chunk(AF_INET6, chunk_create(addr, 16), 0);
923 }
924
925 if (host)
926 {
927 return traffic_selector_create_from_subnet(host, prefixlen,
928 sel->proto, port, port ?: 65535);
929 }
930 return NULL;
931 }
932
933 /**
934 * Process a XFRM_MSG_ACQUIRE from kernel
935 */
936 static void process_acquire(private_kernel_netlink_ipsec_t *this,
937 struct nlmsghdr *hdr)
938 {
939 struct xfrm_user_acquire *acquire;
940 struct rtattr *rta;
941 size_t rtasize;
942 traffic_selector_t *src_ts, *dst_ts;
943 uint32_t reqid = 0;
944 int proto = 0;
945
946 acquire = NLMSG_DATA(hdr);
947 rta = XFRM_RTA(hdr, struct xfrm_user_acquire);
948 rtasize = XFRM_PAYLOAD(hdr, struct xfrm_user_acquire);
949
950 DBG2(DBG_KNL, "received a XFRM_MSG_ACQUIRE");
951
952 while (RTA_OK(rta, rtasize))
953 {
954 DBG2(DBG_KNL, " %N", xfrm_attr_type_names, rta->rta_type);
955
956 if (rta->rta_type == XFRMA_TMPL)
957 {
958 struct xfrm_user_tmpl* tmpl;
959 tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rta);
960 reqid = tmpl->reqid;
961 proto = tmpl->id.proto;
962 }
963 rta = RTA_NEXT(rta, rtasize);
964 }
965 switch (proto)
966 {
967 case 0:
968 case IPPROTO_ESP:
969 case IPPROTO_AH:
970 break;
971 default:
972 /* acquire for AH/ESP only, not for IPCOMP */
973 return;
974 }
975 src_ts = selector2ts(&acquire->sel, TRUE);
976 dst_ts = selector2ts(&acquire->sel, FALSE);
977
978 charon->kernel->acquire(charon->kernel, reqid, src_ts, dst_ts);
979 }
980
981 /**
982 * Process a XFRM_MSG_EXPIRE from kernel
983 */
984 static void process_expire(private_kernel_netlink_ipsec_t *this,
985 struct nlmsghdr *hdr)
986 {
987 struct xfrm_user_expire *expire;
988 uint32_t spi;
989 uint8_t protocol;
990 host_t *dst;
991
992 expire = NLMSG_DATA(hdr);
993 protocol = expire->state.id.proto;
994 spi = expire->state.id.spi;
995
996 DBG2(DBG_KNL, "received a XFRM_MSG_EXPIRE");
997
998 if (protocol == IPPROTO_ESP || protocol == IPPROTO_AH)
999 {
1000 dst = xfrm2host(expire->state.family, &expire->state.id.daddr, 0);
1001 if (dst)
1002 {
1003 charon->kernel->expire(charon->kernel, protocol, spi, dst,
1004 expire->hard != 0);
1005 dst->destroy(dst);
1006 }
1007 }
1008 }
1009
1010 /**
1011 * Process a XFRM_MSG_MIGRATE from kernel
1012 */
1013 static void process_migrate(private_kernel_netlink_ipsec_t *this,
1014 struct nlmsghdr *hdr)
1015 {
1016 struct xfrm_userpolicy_id *policy_id;
1017 struct rtattr *rta;
1018 size_t rtasize;
1019 traffic_selector_t *src_ts, *dst_ts;
1020 host_t *local = NULL, *remote = NULL;
1021 host_t *old_src = NULL, *old_dst = NULL;
1022 host_t *new_src = NULL, *new_dst = NULL;
1023 uint32_t reqid = 0;
1024 policy_dir_t dir;
1025
1026 policy_id = NLMSG_DATA(hdr);
1027 rta = XFRM_RTA(hdr, struct xfrm_userpolicy_id);
1028 rtasize = XFRM_PAYLOAD(hdr, struct xfrm_userpolicy_id);
1029
1030 DBG2(DBG_KNL, "received a XFRM_MSG_MIGRATE");
1031
1032 src_ts = selector2ts(&policy_id->sel, TRUE);
1033 dst_ts = selector2ts(&policy_id->sel, FALSE);
1034 dir = (policy_dir_t)policy_id->dir;
1035
1036 DBG2(DBG_KNL, " policy: %R === %R %N", src_ts, dst_ts, policy_dir_names);
1037
1038 while (RTA_OK(rta, rtasize))
1039 {
1040 DBG2(DBG_KNL, " %N", xfrm_attr_type_names, rta->rta_type);
1041 if (rta->rta_type == XFRMA_KMADDRESS)
1042 {
1043 struct xfrm_user_kmaddress *kmaddress;
1044
1045 kmaddress = (struct xfrm_user_kmaddress*)RTA_DATA(rta);
1046 local = xfrm2host(kmaddress->family, &kmaddress->local, 0);
1047 remote = xfrm2host(kmaddress->family, &kmaddress->remote, 0);
1048 DBG2(DBG_KNL, " kmaddress: %H...%H", local, remote);
1049 }
1050 else if (rta->rta_type == XFRMA_MIGRATE)
1051 {
1052 struct xfrm_user_migrate *migrate;
1053
1054 migrate = (struct xfrm_user_migrate*)RTA_DATA(rta);
1055 old_src = xfrm2host(migrate->old_family, &migrate->old_saddr, 0);
1056 old_dst = xfrm2host(migrate->old_family, &migrate->old_daddr, 0);
1057 new_src = xfrm2host(migrate->new_family, &migrate->new_saddr, 0);
1058 new_dst = xfrm2host(migrate->new_family, &migrate->new_daddr, 0);
1059 reqid = migrate->reqid;
1060 DBG2(DBG_KNL, " migrate %H...%H to %H...%H, reqid {%u}",
1061 old_src, old_dst, new_src, new_dst, reqid);
1062 DESTROY_IF(old_src);
1063 DESTROY_IF(old_dst);
1064 DESTROY_IF(new_src);
1065 DESTROY_IF(new_dst);
1066 }
1067 rta = RTA_NEXT(rta, rtasize);
1068 }
1069
1070 if (src_ts && dst_ts && local && remote)
1071 {
1072 charon->kernel->migrate(charon->kernel, reqid, src_ts, dst_ts, dir,
1073 local, remote);
1074 }
1075 else
1076 {
1077 DESTROY_IF(src_ts);
1078 DESTROY_IF(dst_ts);
1079 DESTROY_IF(local);
1080 DESTROY_IF(remote);
1081 }
1082 }
1083
1084 /**
1085 * Process a XFRM_MSG_MAPPING from kernel
1086 */
1087 static void process_mapping(private_kernel_netlink_ipsec_t *this,
1088 struct nlmsghdr *hdr)
1089 {
1090 struct xfrm_user_mapping *mapping;
1091 uint32_t spi;
1092
1093 mapping = NLMSG_DATA(hdr);
1094 spi = mapping->id.spi;
1095
1096 DBG2(DBG_KNL, "received a XFRM_MSG_MAPPING");
1097
1098 if (mapping->id.proto == IPPROTO_ESP)
1099 {
1100 host_t *dst, *new;
1101
1102 dst = xfrm2host(mapping->id.family, &mapping->id.daddr, 0);
1103 if (dst)
1104 {
1105 new = xfrm2host(mapping->id.family, &mapping->new_saddr,
1106 mapping->new_sport);
1107 if (new)
1108 {
1109 charon->kernel->mapping(charon->kernel, IPPROTO_ESP, spi, dst,
1110 new);
1111 new->destroy(new);
1112 }
1113 dst->destroy(dst);
1114 }
1115 }
1116 }
1117
1118 /**
1119 * Receives events from kernel
1120 */
1121 static bool receive_events(private_kernel_netlink_ipsec_t *this, int fd,
1122 watcher_event_t event)
1123 {
1124 char response[netlink_get_buflen()];
1125 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
1126 struct sockaddr_nl addr;
1127 socklen_t addr_len = sizeof(addr);
1128 int len;
1129
1130 len = recvfrom(this->socket_xfrm_events, response, sizeof(response),
1131 MSG_DONTWAIT, (struct sockaddr*)&addr, &addr_len);
1132 if (len < 0)
1133 {
1134 switch (errno)
1135 {
1136 case EINTR:
1137 /* interrupted, try again */
1138 return TRUE;
1139 case EAGAIN:
1140 /* no data ready, select again */
1141 return TRUE;
1142 default:
1143 DBG1(DBG_KNL, "unable to receive from XFRM event socket: %s "
1144 "(%d)", strerror(errno), errno);
1145 sleep(1);
1146 return TRUE;
1147 }
1148 }
1149
1150 if (addr.nl_pid != 0)
1151 { /* not from kernel. not interested, try another one */
1152 return TRUE;
1153 }
1154
1155 while (NLMSG_OK(hdr, len))
1156 {
1157 switch (hdr->nlmsg_type)
1158 {
1159 case XFRM_MSG_ACQUIRE:
1160 process_acquire(this, hdr);
1161 break;
1162 case XFRM_MSG_EXPIRE:
1163 process_expire(this, hdr);
1164 break;
1165 case XFRM_MSG_MIGRATE:
1166 process_migrate(this, hdr);
1167 break;
1168 case XFRM_MSG_MAPPING:
1169 process_mapping(this, hdr);
1170 break;
1171 default:
1172 DBG1(DBG_KNL, "received unknown event from XFRM event "
1173 "socket: %d", hdr->nlmsg_type);
1174 break;
1175 }
1176 hdr = NLMSG_NEXT(hdr, len);
1177 }
1178 return TRUE;
1179 }
1180
1181 METHOD(kernel_ipsec_t, get_features, kernel_feature_t,
1182 private_kernel_netlink_ipsec_t *this)
1183 {
1184 return KERNEL_ESP_V3_TFC | KERNEL_POLICY_SPI;
1185 }
1186
1187 /**
1188 * Get an SPI for a specific protocol from the kernel.
1189 */
1190 static status_t get_spi_internal(private_kernel_netlink_ipsec_t *this,
1191 host_t *src, host_t *dst, uint8_t proto, uint32_t min, uint32_t max,
1192 uint32_t *spi)
1193 {
1194 netlink_buf_t request;
1195 struct nlmsghdr *hdr, *out;
1196 struct xfrm_userspi_info *userspi;
1197 uint32_t received_spi = 0;
1198 size_t len;
1199
1200 memset(&request, 0, sizeof(request));
1201
1202 hdr = &request.hdr;
1203 hdr->nlmsg_flags = NLM_F_REQUEST;
1204 hdr->nlmsg_type = XFRM_MSG_ALLOCSPI;
1205 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userspi_info));
1206
1207 userspi = NLMSG_DATA(hdr);
1208 host2xfrm(src, &userspi->info.saddr);
1209 host2xfrm(dst, &userspi->info.id.daddr);
1210 userspi->info.id.proto = proto;
1211 userspi->info.mode = XFRM_MODE_TUNNEL;
1212 userspi->info.family = src->get_family(src);
1213 userspi->min = min;
1214 userspi->max = max;
1215
1216 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
1217 {
1218 hdr = out;
1219 while (NLMSG_OK(hdr, len))
1220 {
1221 switch (hdr->nlmsg_type)
1222 {
1223 case XFRM_MSG_NEWSA:
1224 {
1225 struct xfrm_usersa_info* usersa = NLMSG_DATA(hdr);
1226 received_spi = usersa->id.spi;
1227 break;
1228 }
1229 case NLMSG_ERROR:
1230 {
1231 struct nlmsgerr *err = NLMSG_DATA(hdr);
1232 DBG1(DBG_KNL, "allocating SPI failed: %s (%d)",
1233 strerror(-err->error), -err->error);
1234 break;
1235 }
1236 default:
1237 hdr = NLMSG_NEXT(hdr, len);
1238 continue;
1239 case NLMSG_DONE:
1240 break;
1241 }
1242 break;
1243 }
1244 free(out);
1245 }
1246
1247 if (received_spi == 0)
1248 {
1249 return FAILED;
1250 }
1251
1252 *spi = received_spi;
1253 return SUCCESS;
1254 }
1255
1256 METHOD(kernel_ipsec_t, get_spi, status_t,
1257 private_kernel_netlink_ipsec_t *this, host_t *src, host_t *dst,
1258 uint8_t protocol, uint32_t *spi)
1259 {
1260 uint32_t spi_min, spi_max;
1261
1262 spi_min = lib->settings->get_int(lib->settings, "%s.spi_min",
1263 KERNEL_SPI_MIN, lib->ns);
1264 spi_max = lib->settings->get_int(lib->settings, "%s.spi_max",
1265 KERNEL_SPI_MAX, lib->ns);
1266
1267 if (get_spi_internal(this, src, dst, protocol, min(spi_min, spi_max),
1268 max(spi_min, spi_max), spi) != SUCCESS)
1269 {
1270 DBG1(DBG_KNL, "unable to get SPI");
1271 return FAILED;
1272 }
1273
1274 DBG2(DBG_KNL, "got SPI %.8x", ntohl(*spi));
1275 return SUCCESS;
1276 }
1277
1278 METHOD(kernel_ipsec_t, get_cpi, status_t,
1279 private_kernel_netlink_ipsec_t *this, host_t *src, host_t *dst,
1280 uint16_t *cpi)
1281 {
1282 uint32_t received_spi = 0;
1283
1284 if (get_spi_internal(this, src, dst, IPPROTO_COMP,
1285 0x100, 0xEFFF, &received_spi) != SUCCESS)
1286 {
1287 DBG1(DBG_KNL, "unable to get CPI");
1288 return FAILED;
1289 }
1290
1291 *cpi = htons((uint16_t)ntohl(received_spi));
1292
1293 DBG2(DBG_KNL, "got CPI %.4x", ntohs(*cpi));
1294 return SUCCESS;
1295 }
1296
1297 /**
1298 * Format the mark for debug messages
1299 */
1300 static void format_mark(char *buf, int buflen, mark_t mark)
1301 {
1302 if (mark.value | mark.mask)
1303 {
1304 snprintf(buf, buflen, " (mark %u/0x%08x)", mark.value, mark.mask);
1305 }
1306 }
1307
1308 /**
1309 * Add a XFRM mark to message if required
1310 */
1311 static bool add_mark(struct nlmsghdr *hdr, int buflen, mark_t mark)
1312 {
1313 if (mark.value | mark.mask)
1314 {
1315 struct xfrm_mark *xmrk;
1316
1317 xmrk = netlink_reserve(hdr, buflen, XFRMA_MARK, sizeof(*xmrk));
1318 if (!xmrk)
1319 {
1320 return FALSE;
1321 }
1322 xmrk->v = mark.value;
1323 xmrk->m = mark.mask;
1324 }
1325 return TRUE;
1326 }
1327
1328 /**
1329 * Add a uint32 attribute to message
1330 */
1331 static bool add_uint32(struct nlmsghdr *hdr, int buflen,
1332 enum xfrm_attr_type_t type, uint32_t value)
1333 {
1334 uint32_t *xvalue;
1335
1336 xvalue = netlink_reserve(hdr, buflen, type, sizeof(*xvalue));
1337 if (!xvalue)
1338 {
1339 return FALSE;
1340 }
1341 *xvalue = value;
1342 return TRUE;
1343 }
1344
1345 /* ETHTOOL_GSSET_INFO is available since 2.6.34 and ETH_SS_FEATURES (enum) and
1346 * ETHTOOL_GFEATURES since 2.6.39, so check for the latter */
1347 #ifdef ETHTOOL_GFEATURES
1348
1349 /**
1350 * IPsec HW offload state in kernel
1351 */
1352 typedef enum {
1353 NL_OFFLOAD_UNKNOWN,
1354 NL_OFFLOAD_UNSUPPORTED,
1355 NL_OFFLOAD_SUPPORTED
1356 } nl_offload_state_t;
1357
1358 /**
1359 * Global metadata used for IPsec HW offload
1360 */
1361 static struct {
1362 /** bit in feature set */
1363 u_int bit;
1364 /** total number of device feature blocks */
1365 u_int total_blocks;
1366 /** determined HW offload state */
1367 nl_offload_state_t state;
1368 } netlink_hw_offload;
1369
1370 /**
1371 * Check if kernel supports HW offload
1372 */
1373 static void netlink_find_offload_feature(const char *ifname, int query_socket)
1374 {
1375 struct ethtool_sset_info *sset_info;
1376 struct ethtool_gstrings *cmd = NULL;
1377 struct ifreq ifr;
1378 uint32_t sset_len, i;
1379 char *str;
1380 int err;
1381
1382 netlink_hw_offload.state = NL_OFFLOAD_UNSUPPORTED;
1383
1384 /* determine number of device features */
1385 INIT_EXTRA(sset_info, sizeof(uint32_t),
1386 .cmd = ETHTOOL_GSSET_INFO,
1387 .sset_mask = 1ULL << ETH_SS_FEATURES,
1388 );
1389 strncpy(ifr.ifr_name, ifname, IFNAMSIZ);
1390 ifr.ifr_name[IFNAMSIZ-1] = '\0';
1391 ifr.ifr_data = (void*)sset_info;
1392
1393 err = ioctl(query_socket, SIOCETHTOOL, &ifr);
1394 if (err || sset_info->sset_mask != 1ULL << ETH_SS_FEATURES)
1395 {
1396 goto out;
1397 }
1398 sset_len = sset_info->data[0];
1399
1400 /* retrieve names of device features */
1401 INIT_EXTRA(cmd, ETH_GSTRING_LEN * sset_len,
1402 .cmd = ETHTOOL_GSTRINGS,
1403 .string_set = ETH_SS_FEATURES,
1404 );
1405 strncpy(ifr.ifr_name, ifname, IFNAMSIZ);
1406 ifr.ifr_name[IFNAMSIZ-1] = '\0';
1407 ifr.ifr_data = (void*)cmd;
1408
1409 err = ioctl(query_socket, SIOCETHTOOL, &ifr);
1410 if (err)
1411 {
1412 goto out;
1413 }
1414
1415 /* look for the ESP_HW feature bit */
1416 str = (char*)cmd->data;
1417 for (i = 0; i < cmd->len; i++)
1418 {
1419 if (strneq(str, "esp-hw-offload", ETH_GSTRING_LEN))
1420 {
1421 netlink_hw_offload.bit = i;
1422 netlink_hw_offload.total_blocks = (sset_len + 31) / 32;
1423 netlink_hw_offload.state = NL_OFFLOAD_SUPPORTED;
1424 break;
1425 }
1426 str += ETH_GSTRING_LEN;
1427 }
1428
1429 out:
1430 free(sset_info);
1431 free(cmd);
1432 }
1433
1434 /**
1435 * Check if interface supported HW offload
1436 */
1437 static bool netlink_detect_offload(const char *ifname)
1438 {
1439 struct ethtool_gfeatures *cmd;
1440 uint32_t feature_bit;
1441 struct ifreq ifr;
1442 int query_socket;
1443 int block;
1444 bool ret = FALSE;
1445
1446 query_socket = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_XFRM);
1447 if (query_socket < 0)
1448 {
1449 return FALSE;
1450 }
1451
1452 /* kernel requires a real interface in order to query the kernel-wide
1453 * capability, so we do it here on first invocation.
1454 */
1455 if (netlink_hw_offload.state == NL_OFFLOAD_UNKNOWN)
1456 {
1457 netlink_find_offload_feature(ifname, query_socket);
1458 }
1459 if (netlink_hw_offload.state == NL_OFFLOAD_UNSUPPORTED)
1460 {
1461 DBG1(DBG_KNL, "HW offload is not supported by kernel");
1462 goto out;
1463 }
1464
1465 /* feature is supported by kernel, query device features */
1466 INIT_EXTRA(cmd, sizeof(cmd->features[0]) * netlink_hw_offload.total_blocks,
1467 .cmd = ETHTOOL_GFEATURES,
1468 .size = netlink_hw_offload.total_blocks,
1469 );
1470 strncpy(ifr.ifr_name, ifname, IFNAMSIZ);
1471 ifr.ifr_name[IFNAMSIZ-1] = '\0';
1472 ifr.ifr_data = (void*)cmd;
1473
1474 if (ioctl(query_socket, SIOCETHTOOL, &ifr))
1475 {
1476 goto out_free;
1477 }
1478
1479 block = netlink_hw_offload.bit / 32;
1480 feature_bit = 1U << (netlink_hw_offload.bit % 32);
1481 if (cmd->features[block].active & feature_bit)
1482 {
1483 ret = TRUE;
1484 }
1485
1486 out_free:
1487 free(cmd);
1488 if (!ret)
1489 {
1490 DBG1(DBG_KNL, "HW offload is not supported by device");
1491 }
1492 out:
1493 close(query_socket);
1494 return ret;
1495 }
1496
1497 #else
1498
1499 static bool netlink_detect_offload(const char *ifname)
1500 {
1501 return FALSE;
1502 }
1503
1504 #endif
1505
1506 /**
1507 * There are 3 HW offload configuration values:
1508 * 1. HW_OFFLOAD_NO : Do not configure HW offload.
1509 * 2. HW_OFFLOAD_YES : Configure HW offload.
1510 * Fail SA addition if offload is not supported.
1511 * 3. HW_OFFLOAD_AUTO : Configure HW offload if supported by the kernel
1512 * and device.
1513 * Do not fail SA addition otherwise.
1514 */
1515 static bool config_hw_offload(kernel_ipsec_sa_id_t *id,
1516 kernel_ipsec_add_sa_t *data, struct nlmsghdr *hdr,
1517 int buflen)
1518 {
1519 host_t *local = data->inbound ? id->dst : id->src;
1520 struct xfrm_user_offload *offload;
1521 bool hw_offload_yes, ret = FALSE;
1522 char *ifname;
1523
1524 /* do Ipsec configuration without offload */
1525 if (data->hw_offload == HW_OFFLOAD_NO)
1526 {
1527 return TRUE;
1528 }
1529
1530 hw_offload_yes = (data->hw_offload == HW_OFFLOAD_YES);
1531
1532 if (!charon->kernel->get_interface(charon->kernel, local, &ifname))
1533 {
1534 return !hw_offload_yes;
1535 }
1536
1537 /* check if interface supports hw_offload */
1538 if (!netlink_detect_offload(ifname))
1539 {
1540 ret = !hw_offload_yes;
1541 goto out;
1542 }
1543
1544 /* activate HW offload */
1545 offload = netlink_reserve(hdr, buflen,
1546 XFRMA_OFFLOAD_DEV, sizeof(*offload));
1547 if (!offload)
1548 {
1549 ret = !hw_offload_yes;
1550 goto out;
1551 }
1552 offload->ifindex = if_nametoindex(ifname);
1553 if (local->get_family(local) == AF_INET6)
1554 {
1555 offload->flags |= XFRM_OFFLOAD_IPV6;
1556 }
1557 offload->flags |= data->inbound ? XFRM_OFFLOAD_INBOUND : 0;
1558
1559 ret = TRUE;
1560
1561 out:
1562 free(ifname);
1563 return ret;
1564 }
1565
1566 METHOD(kernel_ipsec_t, add_sa, status_t,
1567 private_kernel_netlink_ipsec_t *this, kernel_ipsec_sa_id_t *id,
1568 kernel_ipsec_add_sa_t *data)
1569 {
1570 netlink_buf_t request;
1571 const char *alg_name;
1572 char markstr[32] = "";
1573 struct nlmsghdr *hdr;
1574 struct xfrm_usersa_info *sa;
1575 uint16_t icv_size = 64, ipcomp = data->ipcomp;
1576 ipsec_mode_t mode = data->mode, original_mode = data->mode;
1577 traffic_selector_t *first_src_ts, *first_dst_ts;
1578 status_t status = FAILED;
1579
1580 /* if IPComp is used, we install an additional IPComp SA. if the cpi is 0
1581 * we are in the recursive call below */
1582 if (ipcomp != IPCOMP_NONE && data->cpi != 0)
1583 {
1584 lifetime_cfg_t lft = {{0,0,0},{0,0,0},{0,0,0}};
1585 kernel_ipsec_sa_id_t ipcomp_id = {
1586 .src = id->src,
1587 .dst = id->dst,
1588 .spi = htonl(ntohs(data->cpi)),
1589 .proto = IPPROTO_COMP,
1590 .mark = id->mark,
1591 .if_id = id->if_id,
1592 };
1593 kernel_ipsec_add_sa_t ipcomp_sa = {
1594 .reqid = data->reqid,
1595 .mode = data->mode,
1596 .src_ts = data->src_ts,
1597 .dst_ts = data->dst_ts,
1598 .lifetime = &lft,
1599 .enc_alg = ENCR_UNDEFINED,
1600 .int_alg = AUTH_UNDEFINED,
1601 .tfc = data->tfc,
1602 .ipcomp = data->ipcomp,
1603 .initiator = data->initiator,
1604 .inbound = data->inbound,
1605 .update = data->update,
1606 };
1607 add_sa(this, &ipcomp_id, &ipcomp_sa);
1608 ipcomp = IPCOMP_NONE;
1609 /* use transport mode ESP SA, IPComp uses tunnel mode */
1610 mode = MODE_TRANSPORT;
1611 }
1612
1613 memset(&request, 0, sizeof(request));
1614 format_mark(markstr, sizeof(markstr), id->mark);
1615
1616 DBG2(DBG_KNL, "adding SAD entry with SPI %.8x and reqid {%u}%s",
1617 ntohl(id->spi), data->reqid, markstr);
1618
1619 hdr = &request.hdr;
1620 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1621 hdr->nlmsg_type = data->update ? XFRM_MSG_UPDSA : XFRM_MSG_NEWSA;
1622 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
1623
1624 sa = NLMSG_DATA(hdr);
1625 host2xfrm(id->src, &sa->saddr);
1626 host2xfrm(id->dst, &sa->id.daddr);
1627 sa->id.spi = id->spi;
1628 sa->id.proto = id->proto;
1629 sa->family = id->src->get_family(id->src);
1630 sa->mode = mode2kernel(mode);
1631
1632 if (!data->copy_df)
1633 {
1634 sa->flags |= XFRM_STATE_NOPMTUDISC;
1635 }
1636
1637 if (!data->copy_ecn)
1638 {
1639 sa->flags |= XFRM_STATE_NOECN;
1640 }
1641
1642 if (data->inbound)
1643 {
1644 switch (data->copy_dscp)
1645 {
1646 case DSCP_COPY_YES:
1647 case DSCP_COPY_IN_ONLY:
1648 sa->flags |= XFRM_STATE_DECAP_DSCP;
1649 break;
1650 default:
1651 break;
1652 }
1653 }
1654 else
1655 {
1656 switch (data->copy_dscp)
1657 {
1658 case DSCP_COPY_IN_ONLY:
1659 case DSCP_COPY_NO:
1660 {
1661 /* currently the only extra flag */
1662 if (!add_uint32(hdr, sizeof(request), XFRMA_SA_EXTRA_FLAGS,
1663 XFRM_SA_XFLAG_DONT_ENCAP_DSCP))
1664 {
1665 goto failed;
1666 }
1667 break;
1668 }
1669 default:
1670 break;
1671 }
1672 }
1673
1674 switch (mode)
1675 {
1676 case MODE_TUNNEL:
1677 sa->flags |= XFRM_STATE_AF_UNSPEC;
1678 break;
1679 case MODE_BEET:
1680 case MODE_TRANSPORT:
1681 if (original_mode == MODE_TUNNEL)
1682 { /* don't install selectors for switched SAs. because only one
1683 * selector can be installed other traffic would get dropped */
1684 break;
1685 }
1686 if (data->src_ts->get_first(data->src_ts,
1687 (void**)&first_src_ts) == SUCCESS &&
1688 data->dst_ts->get_first(data->dst_ts,
1689 (void**)&first_dst_ts) == SUCCESS)
1690 {
1691 sa->sel = ts2selector(first_src_ts, first_dst_ts,
1692 data->interface);
1693 if (!this->proto_port_transport)
1694 {
1695 /* don't install proto/port on SA. This would break
1696 * potential secondary SAs for the same address using a
1697 * different prot/port. */
1698 sa->sel.proto = 0;
1699 sa->sel.dport = sa->sel.dport_mask = 0;
1700 sa->sel.sport = sa->sel.sport_mask = 0;
1701 }
1702 }
1703 break;
1704 default:
1705 break;
1706 }
1707 if (id->proto == IPPROTO_AH && sa->family == AF_INET)
1708 { /* use alignment to 4 bytes for IPv4 instead of the incorrect 8 byte
1709 * alignment that's used by default but is only valid for IPv6 */
1710 sa->flags |= XFRM_STATE_ALIGN4;
1711 }
1712
1713 sa->reqid = data->reqid;
1714 sa->lft.soft_byte_limit = XFRM_LIMIT(data->lifetime->bytes.rekey);
1715 sa->lft.hard_byte_limit = XFRM_LIMIT(data->lifetime->bytes.life);
1716 sa->lft.soft_packet_limit = XFRM_LIMIT(data->lifetime->packets.rekey);
1717 sa->lft.hard_packet_limit = XFRM_LIMIT(data->lifetime->packets.life);
1718 /* we use lifetimes since added, not since used */
1719 sa->lft.soft_add_expires_seconds = data->lifetime->time.rekey;
1720 sa->lft.hard_add_expires_seconds = data->lifetime->time.life;
1721 sa->lft.soft_use_expires_seconds = 0;
1722 sa->lft.hard_use_expires_seconds = 0;
1723
1724 switch (data->enc_alg)
1725 {
1726 case ENCR_UNDEFINED:
1727 /* no encryption */
1728 break;
1729 case ENCR_AES_CCM_ICV16:
1730 case ENCR_AES_GCM_ICV16:
1731 case ENCR_NULL_AUTH_AES_GMAC:
1732 case ENCR_CAMELLIA_CCM_ICV16:
1733 case ENCR_CHACHA20_POLY1305:
1734 icv_size += 32;
1735 /* FALL */
1736 case ENCR_AES_CCM_ICV12:
1737 case ENCR_AES_GCM_ICV12:
1738 case ENCR_CAMELLIA_CCM_ICV12:
1739 icv_size += 32;
1740 /* FALL */
1741 case ENCR_AES_CCM_ICV8:
1742 case ENCR_AES_GCM_ICV8:
1743 case ENCR_CAMELLIA_CCM_ICV8:
1744 {
1745 struct xfrm_algo_aead *algo;
1746
1747 alg_name = lookup_algorithm(ENCRYPTION_ALGORITHM, data->enc_alg);
1748 if (alg_name == NULL)
1749 {
1750 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1751 encryption_algorithm_names, data->enc_alg);
1752 goto failed;
1753 }
1754 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
1755 encryption_algorithm_names, data->enc_alg,
1756 data->enc_key.len * 8);
1757
1758 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_AEAD,
1759 sizeof(*algo) + data->enc_key.len);
1760 if (!algo)
1761 {
1762 goto failed;
1763 }
1764 algo->alg_key_len = data->enc_key.len * 8;
1765 algo->alg_icv_len = icv_size;
1766 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name));
1767 algo->alg_name[sizeof(algo->alg_name) - 1] = '\0';
1768 memcpy(algo->alg_key, data->enc_key.ptr, data->enc_key.len);
1769 break;
1770 }
1771 default:
1772 {
1773 struct xfrm_algo *algo;
1774
1775 alg_name = lookup_algorithm(ENCRYPTION_ALGORITHM, data->enc_alg);
1776 if (alg_name == NULL)
1777 {
1778 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1779 encryption_algorithm_names, data->enc_alg);
1780 goto failed;
1781 }
1782 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
1783 encryption_algorithm_names, data->enc_alg,
1784 data->enc_key.len * 8);
1785
1786 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_CRYPT,
1787 sizeof(*algo) + data->enc_key.len);
1788 if (!algo)
1789 {
1790 goto failed;
1791 }
1792 algo->alg_key_len = data->enc_key.len * 8;
1793 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name));
1794 algo->alg_name[sizeof(algo->alg_name) - 1] = '\0';
1795 memcpy(algo->alg_key, data->enc_key.ptr, data->enc_key.len);
1796 }
1797 }
1798
1799 if (data->int_alg != AUTH_UNDEFINED)
1800 {
1801 u_int trunc_len = 0;
1802
1803 alg_name = lookup_algorithm(INTEGRITY_ALGORITHM, data->int_alg);
1804 if (alg_name == NULL)
1805 {
1806 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1807 integrity_algorithm_names, data->int_alg);
1808 goto failed;
1809 }
1810 DBG2(DBG_KNL, " using integrity algorithm %N with key size %d",
1811 integrity_algorithm_names, data->int_alg, data->int_key.len * 8);
1812
1813 switch (data->int_alg)
1814 {
1815 case AUTH_HMAC_MD5_128:
1816 case AUTH_HMAC_SHA2_256_128:
1817 trunc_len = 128;
1818 break;
1819 case AUTH_HMAC_SHA1_160:
1820 trunc_len = 160;
1821 break;
1822 default:
1823 break;
1824 }
1825
1826 if (trunc_len)
1827 {
1828 struct xfrm_algo_auth* algo;
1829
1830 /* the kernel uses SHA256 with 96 bit truncation by default,
1831 * use specified truncation size supported by newer kernels.
1832 * also use this for untruncated MD5 and SHA1. */
1833 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_AUTH_TRUNC,
1834 sizeof(*algo) + data->int_key.len);
1835 if (!algo)
1836 {
1837 goto failed;
1838 }
1839 algo->alg_key_len = data->int_key.len * 8;
1840 algo->alg_trunc_len = trunc_len;
1841 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name));
1842 algo->alg_name[sizeof(algo->alg_name) - 1] = '\0';
1843 memcpy(algo->alg_key, data->int_key.ptr, data->int_key.len);
1844 }
1845 else
1846 {
1847 struct xfrm_algo* algo;
1848
1849 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_AUTH,
1850 sizeof(*algo) + data->int_key.len);
1851 if (!algo)
1852 {
1853 goto failed;
1854 }
1855 algo->alg_key_len = data->int_key.len * 8;
1856 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name));
1857 algo->alg_name[sizeof(algo->alg_name) - 1] = '\0';
1858 memcpy(algo->alg_key, data->int_key.ptr, data->int_key.len);
1859 }
1860 }
1861
1862 if (ipcomp != IPCOMP_NONE)
1863 {
1864 struct xfrm_algo* algo;
1865
1866 alg_name = lookup_algorithm(COMPRESSION_ALGORITHM, ipcomp);
1867 if (alg_name == NULL)
1868 {
1869 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1870 ipcomp_transform_names, ipcomp);
1871 goto failed;
1872 }
1873 DBG2(DBG_KNL, " using compression algorithm %N",
1874 ipcomp_transform_names, ipcomp);
1875
1876 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_COMP,
1877 sizeof(*algo));
1878 if (!algo)
1879 {
1880 goto failed;
1881 }
1882 algo->alg_key_len = 0;
1883 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name));
1884 algo->alg_name[sizeof(algo->alg_name) - 1] = '\0';
1885 }
1886
1887 if (data->encap)
1888 {
1889 struct xfrm_encap_tmpl *tmpl;
1890
1891 tmpl = netlink_reserve(hdr, sizeof(request), XFRMA_ENCAP, sizeof(*tmpl));
1892 if (!tmpl)
1893 {
1894 goto failed;
1895 }
1896 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
1897 tmpl->encap_sport = htons(id->src->get_port(id->src));
1898 tmpl->encap_dport = htons(id->dst->get_port(id->dst));
1899 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
1900 /* encap_oa could probably be derived from the
1901 * traffic selectors [rfc4306, p39]. In the netlink kernel
1902 * implementation pluto does the same as we do here but it uses
1903 * encap_oa in the pfkey implementation.
1904 * BUT as /usr/src/linux/net/key/af_key.c indicates the kernel ignores
1905 * it anyway
1906 * -> does that mean that NAT-T encap doesn't work in transport mode?
1907 * No. The reason the kernel ignores NAT-OA is that it recomputes
1908 * (or, rather, just ignores) the checksum. If packets pass the IPsec
1909 * checks it marks them "checksum ok" so OA isn't needed. */
1910 }
1911
1912 if (!add_mark(hdr, sizeof(request), id->mark))
1913 {
1914 goto failed;
1915 }
1916
1917 if (id->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, id->if_id))
1918 {
1919 goto failed;
1920 }
1921
1922 if (ipcomp == IPCOMP_NONE && (data->mark.value | data->mark.mask))
1923 {
1924 if (!add_uint32(hdr, sizeof(request), XFRMA_SET_MARK,
1925 data->mark.value) ||
1926 !add_uint32(hdr, sizeof(request), XFRMA_SET_MARK_MASK,
1927 data->mark.mask))
1928 {
1929 goto failed;
1930 }
1931 }
1932
1933 if (data->tfc && id->proto == IPPROTO_ESP && mode == MODE_TUNNEL)
1934 { /* the kernel supports TFC padding only for tunnel mode ESP SAs */
1935 if (!add_uint32(hdr, sizeof(request), XFRMA_TFCPAD, data->tfc))
1936 {
1937 goto failed;
1938 }
1939 }
1940
1941 if (id->proto != IPPROTO_COMP)
1942 {
1943 /* generally, we don't need a replay window for outbound SAs, however,
1944 * when using ESN the kernel rejects the attribute if it is 0 */
1945 if (!data->inbound && data->replay_window)
1946 {
1947 data->replay_window = data->esn ? 1 : 0;
1948 }
1949 if (data->replay_window != 0 && (data->esn || data->replay_window > 32))
1950 {
1951 /* for ESN or larger replay windows we need the new
1952 * XFRMA_REPLAY_ESN_VAL attribute to configure a bitmap */
1953 struct xfrm_replay_state_esn *replay;
1954 uint32_t bmp_size;
1955
1956 bmp_size = round_up(data->replay_window, sizeof(uint32_t) * 8) / 8;
1957 replay = netlink_reserve(hdr, sizeof(request), XFRMA_REPLAY_ESN_VAL,
1958 sizeof(*replay) + bmp_size);
1959 if (!replay)
1960 {
1961 goto failed;
1962 }
1963 /* bmp_len contains number uf __u32's */
1964 replay->bmp_len = bmp_size / sizeof(uint32_t);
1965 replay->replay_window = data->replay_window;
1966 DBG2(DBG_KNL, " using replay window of %u packets",
1967 data->replay_window);
1968
1969 if (data->esn)
1970 {
1971 DBG2(DBG_KNL, " using extended sequence numbers (ESN)");
1972 sa->flags |= XFRM_STATE_ESN;
1973 }
1974 }
1975 else
1976 {
1977 DBG2(DBG_KNL, " using replay window of %u packets",
1978 data->replay_window);
1979 sa->replay_window = data->replay_window;
1980 }
1981
1982 DBG2(DBG_KNL, " HW offload: %N", hw_offload_names, data->hw_offload);
1983 if (!config_hw_offload(id, data, hdr, sizeof(request)))
1984 {
1985 DBG1(DBG_KNL, "failed to configure HW offload");
1986 goto failed;
1987 }
1988 }
1989
1990 status = this->socket_xfrm->send_ack(this->socket_xfrm, hdr);
1991 if (status == NOT_FOUND && data->update)
1992 {
1993 DBG1(DBG_KNL, "allocated SPI not found anymore, try to add SAD entry");
1994 hdr->nlmsg_type = XFRM_MSG_NEWSA;
1995 status = this->socket_xfrm->send_ack(this->socket_xfrm, hdr);
1996 }
1997
1998 if (status != SUCCESS)
1999 {
2000 DBG1(DBG_KNL, "unable to add SAD entry with SPI %.8x%s (%N)", ntohl(id->spi),
2001 markstr, status_names, status);
2002 status = FAILED;
2003 goto failed;
2004 }
2005
2006 status = SUCCESS;
2007
2008 failed:
2009 memwipe(&request, sizeof(request));
2010 return status;
2011 }
2012
2013 /**
2014 * Get the ESN replay state (i.e. sequence numbers) of an SA.
2015 *
2016 * Allocates into one the replay state structure we get from the kernel.
2017 */
2018 static void get_replay_state(private_kernel_netlink_ipsec_t *this,
2019 kernel_ipsec_sa_id_t *sa,
2020 struct xfrm_replay_state_esn **replay_esn,
2021 uint32_t *replay_esn_len,
2022 struct xfrm_replay_state **replay,
2023 struct xfrm_lifetime_cur **lifetime)
2024 {
2025 netlink_buf_t request;
2026 struct nlmsghdr *hdr, *out = NULL;
2027 struct xfrm_aevent_id *out_aevent = NULL, *aevent_id;
2028 size_t len;
2029 struct rtattr *rta;
2030 size_t rtasize;
2031
2032 memset(&request, 0, sizeof(request));
2033
2034 DBG2(DBG_KNL, "querying replay state from SAD entry with SPI %.8x",
2035 ntohl(sa->spi));
2036
2037 hdr = &request.hdr;
2038 hdr->nlmsg_flags = NLM_F_REQUEST;
2039 hdr->nlmsg_type = XFRM_MSG_GETAE;
2040 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_aevent_id));
2041
2042 aevent_id = NLMSG_DATA(hdr);
2043 aevent_id->flags = XFRM_AE_RVAL;
2044
2045 host2xfrm(sa->dst, &aevent_id->sa_id.daddr);
2046 aevent_id->sa_id.spi = sa->spi;
2047 aevent_id->sa_id.proto = sa->proto;
2048 aevent_id->sa_id.family = sa->dst->get_family(sa->dst);
2049
2050 if (!add_mark(hdr, sizeof(request), sa->mark))
2051 {
2052 return;
2053 }
2054 if (sa->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, sa->if_id))
2055 {
2056 return;
2057 }
2058
2059 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2060 {
2061 hdr = out;
2062 while (NLMSG_OK(hdr, len))
2063 {
2064 switch (hdr->nlmsg_type)
2065 {
2066 case XFRM_MSG_NEWAE:
2067 {
2068 out_aevent = NLMSG_DATA(hdr);
2069 break;
2070 }
2071 case NLMSG_ERROR:
2072 {
2073 struct nlmsgerr *err = NLMSG_DATA(hdr);
2074 DBG1(DBG_KNL, "querying replay state from SAD entry "
2075 "failed: %s (%d)", strerror(-err->error), -err->error);
2076 break;
2077 }
2078 default:
2079 hdr = NLMSG_NEXT(hdr, len);
2080 continue;
2081 case NLMSG_DONE:
2082 break;
2083 }
2084 break;
2085 }
2086 }
2087
2088 if (out_aevent)
2089 {
2090 rta = XFRM_RTA(out, struct xfrm_aevent_id);
2091 rtasize = XFRM_PAYLOAD(out, struct xfrm_aevent_id);
2092 while (RTA_OK(rta, rtasize))
2093 {
2094 if (rta->rta_type == XFRMA_LTIME_VAL &&
2095 RTA_PAYLOAD(rta) == sizeof(**lifetime))
2096 {
2097 free(*lifetime);
2098 *lifetime = malloc(RTA_PAYLOAD(rta));
2099 memcpy(*lifetime, RTA_DATA(rta), RTA_PAYLOAD(rta));
2100 }
2101 if (rta->rta_type == XFRMA_REPLAY_VAL &&
2102 RTA_PAYLOAD(rta) == sizeof(**replay))
2103 {
2104 free(*replay);
2105 *replay = malloc(RTA_PAYLOAD(rta));
2106 memcpy(*replay, RTA_DATA(rta), RTA_PAYLOAD(rta));
2107 }
2108 if (rta->rta_type == XFRMA_REPLAY_ESN_VAL &&
2109 RTA_PAYLOAD(rta) >= sizeof(**replay_esn))
2110 {
2111 free(*replay_esn);
2112 *replay_esn = malloc(RTA_PAYLOAD(rta));
2113 *replay_esn_len = RTA_PAYLOAD(rta);
2114 memcpy(*replay_esn, RTA_DATA(rta), RTA_PAYLOAD(rta));
2115 }
2116 rta = RTA_NEXT(rta, rtasize);
2117 }
2118 }
2119 free(out);
2120 }
2121
2122 METHOD(kernel_ipsec_t, query_sa, status_t,
2123 private_kernel_netlink_ipsec_t *this, kernel_ipsec_sa_id_t *id,
2124 kernel_ipsec_query_sa_t *data, uint64_t *bytes, uint64_t *packets,
2125 time_t *time)
2126 {
2127 netlink_buf_t request;
2128 struct nlmsghdr *out = NULL, *hdr;
2129 struct xfrm_usersa_id *sa_id;
2130 struct xfrm_usersa_info *sa = NULL;
2131 status_t status = FAILED;
2132 size_t len;
2133 char markstr[32] = "";
2134
2135 memset(&request, 0, sizeof(request));
2136 format_mark(markstr, sizeof(markstr), id->mark);
2137
2138 DBG2(DBG_KNL, "querying SAD entry with SPI %.8x%s", ntohl(id->spi),
2139 markstr);
2140
2141 hdr = &request.hdr;
2142 hdr->nlmsg_flags = NLM_F_REQUEST;
2143 hdr->nlmsg_type = XFRM_MSG_GETSA;
2144 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2145
2146 sa_id = NLMSG_DATA(hdr);
2147 host2xfrm(id->dst, &sa_id->daddr);
2148 sa_id->spi = id->spi;
2149 sa_id->proto = id->proto;
2150 sa_id->family = id->dst->get_family(id->dst);
2151
2152 if (!add_mark(hdr, sizeof(request), id->mark))
2153 {
2154 return FAILED;
2155 }
2156 if (id->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, id->if_id))
2157 {
2158 return FAILED;
2159 }
2160
2161 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2162 {
2163 hdr = out;
2164 while (NLMSG_OK(hdr, len))
2165 {
2166 switch (hdr->nlmsg_type)
2167 {
2168 case XFRM_MSG_NEWSA:
2169 {
2170 sa = NLMSG_DATA(hdr);
2171 break;
2172 }
2173 case NLMSG_ERROR:
2174 {
2175 struct nlmsgerr *err = NLMSG_DATA(hdr);
2176
2177 DBG1(DBG_KNL, "querying SAD entry with SPI %.8x%s failed: "
2178 "%s (%d)", ntohl(id->spi), markstr,
2179 strerror(-err->error), -err->error);
2180 break;
2181 }
2182 default:
2183 hdr = NLMSG_NEXT(hdr, len);
2184 continue;
2185 case NLMSG_DONE:
2186 break;
2187 }
2188 break;
2189 }
2190 }
2191
2192 if (sa == NULL)
2193 {
2194 DBG2(DBG_KNL, "unable to query SAD entry with SPI %.8x%s",
2195 ntohl(id->spi), markstr);
2196 }
2197 else
2198 {
2199 if (bytes)
2200 {
2201 *bytes = sa->curlft.bytes;
2202 }
2203 if (packets)
2204 {
2205 *packets = sa->curlft.packets;
2206 }
2207 if (time)
2208 { /* curlft contains an "use" time, but that contains a timestamp
2209 * of the first use, not the last. Last use time must be queried
2210 * on the policy on Linux */
2211 *time = 0;
2212 }
2213 status = SUCCESS;
2214 }
2215 memwipe(out, len);
2216 free(out);
2217 return status;
2218 }
2219
2220 METHOD(kernel_ipsec_t, del_sa, status_t,
2221 private_kernel_netlink_ipsec_t *this, kernel_ipsec_sa_id_t *id,
2222 kernel_ipsec_del_sa_t *data)
2223 {
2224 netlink_buf_t request;
2225 struct nlmsghdr *hdr;
2226 struct xfrm_usersa_id *sa_id;
2227 char markstr[32] = "";
2228
2229 /* if IPComp was used, we first delete the additional IPComp SA */
2230 if (data->cpi)
2231 {
2232 kernel_ipsec_sa_id_t ipcomp_id = {
2233 .src = id->src,
2234 .dst = id->dst,
2235 .spi = htonl(ntohs(data->cpi)),
2236 .proto = IPPROTO_COMP,
2237 .mark = id->mark,
2238 };
2239 kernel_ipsec_del_sa_t ipcomp = {};
2240 del_sa(this, &ipcomp_id, &ipcomp);
2241 }
2242
2243 memset(&request, 0, sizeof(request));
2244 format_mark(markstr, sizeof(markstr), id->mark);
2245
2246 DBG2(DBG_KNL, "deleting SAD entry with SPI %.8x%s", ntohl(id->spi),
2247 markstr);
2248
2249 hdr = &request.hdr;
2250 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2251 hdr->nlmsg_type = XFRM_MSG_DELSA;
2252 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2253
2254 sa_id = NLMSG_DATA(hdr);
2255 host2xfrm(id->dst, &sa_id->daddr);
2256 sa_id->spi = id->spi;
2257 sa_id->proto = id->proto;
2258 sa_id->family = id->dst->get_family(id->dst);
2259
2260 if (!add_mark(hdr, sizeof(request), id->mark))
2261 {
2262 return FAILED;
2263 }
2264 if (id->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, id->if_id))
2265 {
2266 return FAILED;
2267 }
2268
2269 switch (this->socket_xfrm->send_ack(this->socket_xfrm, hdr))
2270 {
2271 case SUCCESS:
2272 DBG2(DBG_KNL, "deleted SAD entry with SPI %.8x%s",
2273 ntohl(id->spi), markstr);
2274 return SUCCESS;
2275 case NOT_FOUND:
2276 return NOT_FOUND;
2277 default:
2278 DBG1(DBG_KNL, "unable to delete SAD entry with SPI %.8x%s",
2279 ntohl(id->spi), markstr);
2280 return FAILED;
2281 }
2282 }
2283
2284 METHOD(kernel_ipsec_t, update_sa, status_t,
2285 private_kernel_netlink_ipsec_t *this, kernel_ipsec_sa_id_t *id,
2286 kernel_ipsec_update_sa_t *data)
2287 {
2288 netlink_buf_t request;
2289 struct nlmsghdr *hdr, *out_hdr = NULL, *out = NULL;
2290 struct xfrm_usersa_id *sa_id;
2291 struct xfrm_usersa_info *sa;
2292 size_t len;
2293 struct rtattr *rta;
2294 size_t rtasize;
2295 struct xfrm_encap_tmpl* encap = NULL;
2296 struct xfrm_replay_state *replay = NULL;
2297 struct xfrm_replay_state_esn *replay_esn = NULL;
2298 struct xfrm_lifetime_cur *lifetime = NULL;
2299 uint32_t replay_esn_len = 0;
2300 kernel_ipsec_del_sa_t del = { 0 };
2301 status_t status = FAILED;
2302 traffic_selector_t *ts;
2303 char markstr[32] = "";
2304
2305 /* if IPComp is used, we first update the IPComp SA */
2306 if (data->cpi)
2307 {
2308 kernel_ipsec_sa_id_t ipcomp_id = {
2309 .src = id->src,
2310 .dst = id->dst,
2311 .spi = htonl(ntohs(data->cpi)),
2312 .proto = IPPROTO_COMP,
2313 .mark = id->mark,
2314 .if_id = id->if_id,
2315 };
2316 kernel_ipsec_update_sa_t ipcomp = {
2317 .new_src = data->new_src,
2318 .new_dst = data->new_dst,
2319 };
2320 update_sa(this, &ipcomp_id, &ipcomp);
2321 }
2322
2323 memset(&request, 0, sizeof(request));
2324 format_mark(markstr, sizeof(markstr), id->mark);
2325
2326 DBG2(DBG_KNL, "querying SAD entry with SPI %.8x%s for update",
2327 ntohl(id->spi), markstr);
2328
2329 /* query the existing SA first */
2330 hdr = &request.hdr;
2331 hdr->nlmsg_flags = NLM_F_REQUEST;
2332 hdr->nlmsg_type = XFRM_MSG_GETSA;
2333 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2334
2335 sa_id = NLMSG_DATA(hdr);
2336 host2xfrm(id->dst, &sa_id->daddr);
2337 sa_id->spi = id->spi;
2338 sa_id->proto = id->proto;
2339 sa_id->family = id->dst->get_family(id->dst);
2340
2341 if (!add_mark(hdr, sizeof(request), id->mark))
2342 {
2343 return FAILED;
2344 }
2345 if (id->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, id->if_id))
2346 {
2347 return FAILED;
2348 }
2349
2350 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2351 {
2352 hdr = out;
2353 while (NLMSG_OK(hdr, len))
2354 {
2355 switch (hdr->nlmsg_type)
2356 {
2357 case XFRM_MSG_NEWSA:
2358 {
2359 out_hdr = hdr;
2360 break;
2361 }
2362 case NLMSG_ERROR:
2363 {
2364 struct nlmsgerr *err = NLMSG_DATA(hdr);
2365 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
2366 strerror(-err->error), -err->error);
2367 break;
2368 }
2369 default:
2370 hdr = NLMSG_NEXT(hdr, len);
2371 continue;
2372 case NLMSG_DONE:
2373 break;
2374 }
2375 break;
2376 }
2377 }
2378 if (!out_hdr)
2379 {
2380 DBG1(DBG_KNL, "unable to update SAD entry with SPI %.8x%s",
2381 ntohl(id->spi), markstr);
2382 goto failed;
2383 }
2384
2385 get_replay_state(this, id, &replay_esn, &replay_esn_len, &replay,
2386 &lifetime);
2387
2388 /* delete the old SA (without affecting the IPComp SA) */
2389 if (del_sa(this, id, &del) != SUCCESS)
2390 {
2391 DBG1(DBG_KNL, "unable to delete old SAD entry with SPI %.8x%s",
2392 ntohl(id->spi), markstr);
2393 goto failed;
2394 }
2395
2396 DBG2(DBG_KNL, "updating SAD entry with SPI %.8x%s from %#H..%#H to "
2397 "%#H..%#H", ntohl(id->spi), markstr, id->src, id->dst, data->new_src,
2398 data->new_dst);
2399 /* copy over the SA from out to request */
2400 hdr = &request.hdr;
2401 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2402 hdr->nlmsg_type = XFRM_MSG_NEWSA;
2403 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2404 sa = NLMSG_DATA(hdr);
2405 memcpy(sa, NLMSG_DATA(out_hdr), sizeof(struct xfrm_usersa_info));
2406 sa->family = data->new_dst->get_family(data->new_dst);
2407
2408 if (!id->src->ip_equals(id->src, data->new_src))
2409 {
2410 host2xfrm(data->new_src, &sa->saddr);
2411
2412 ts = selector2ts(&sa->sel, TRUE);
2413 if (ts && ts->is_host(ts, id->src))
2414 {
2415 ts->set_address(ts, data->new_src);
2416 ts2subnet(ts, &sa->sel.saddr, &sa->sel.prefixlen_s);
2417 }
2418 DESTROY_IF(ts);
2419 }
2420 if (!id->dst->ip_equals(id->dst, data->new_dst))
2421 {
2422 host2xfrm(data->new_dst, &sa->id.daddr);
2423
2424 ts = selector2ts(&sa->sel, FALSE);
2425 if (ts && ts->is_host(ts, id->dst))
2426 {
2427 ts->set_address(ts, data->new_dst);
2428 ts2subnet(ts, &sa->sel.daddr, &sa->sel.prefixlen_d);
2429 }
2430 DESTROY_IF(ts);
2431 }
2432
2433 rta = XFRM_RTA(out_hdr, struct xfrm_usersa_info);
2434 rtasize = XFRM_PAYLOAD(out_hdr, struct xfrm_usersa_info);
2435 while (RTA_OK(rta, rtasize))
2436 {
2437 /* copy all attributes, but not XFRMA_ENCAP if we are disabling it */
2438 if (rta->rta_type != XFRMA_ENCAP || data->new_encap)
2439 {
2440 if (rta->rta_type == XFRMA_ENCAP)
2441 { /* update encap tmpl */
2442 encap = RTA_DATA(rta);
2443 encap->encap_sport = ntohs(data->new_src->get_port(data->new_src));
2444 encap->encap_dport = ntohs(data->new_dst->get_port(data->new_dst));
2445 }
2446 if (rta->rta_type == XFRMA_OFFLOAD_DEV)
2447 { /* update offload device */
2448 struct xfrm_user_offload *offload;
2449 host_t *local;
2450 char *ifname;
2451
2452 offload = RTA_DATA(rta);
2453 local = offload->flags & XFRM_OFFLOAD_INBOUND ? data->new_dst
2454 : data->new_src;
2455
2456 if (charon->kernel->get_interface(charon->kernel, local,
2457 &ifname))
2458 {
2459 offload->ifindex = if_nametoindex(ifname);
2460 if (local->get_family(local) == AF_INET6)
2461 {
2462 offload->flags |= XFRM_OFFLOAD_IPV6;
2463 }
2464 else
2465 {
2466 offload->flags &= ~XFRM_OFFLOAD_IPV6;
2467 }
2468 free(ifname);
2469 }
2470 }
2471 netlink_add_attribute(hdr, rta->rta_type,
2472 chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta)),
2473 sizeof(request));
2474 }
2475 rta = RTA_NEXT(rta, rtasize);
2476 }
2477
2478 if (encap == NULL && data->new_encap)
2479 { /* add tmpl if we are enabling it */
2480 encap = netlink_reserve(hdr, sizeof(request), XFRMA_ENCAP,
2481 sizeof(*encap));
2482 if (!encap)
2483 {
2484 goto failed;
2485 }
2486 encap->encap_type = UDP_ENCAP_ESPINUDP;
2487 encap->encap_sport = ntohs(data->new_src->get_port(data->new_src));
2488 encap->encap_dport = ntohs(data->new_dst->get_port(data->new_dst));
2489 memset(&encap->encap_oa, 0, sizeof (xfrm_address_t));
2490 }
2491
2492 if (replay_esn)
2493 {
2494 struct xfrm_replay_state_esn *state;
2495
2496 state = netlink_reserve(hdr, sizeof(request), XFRMA_REPLAY_ESN_VAL,
2497 replay_esn_len);
2498 if (!state)
2499 {
2500 goto failed;
2501 }
2502 memcpy(state, replay_esn, replay_esn_len);
2503 }
2504 else if (replay)
2505 {
2506 struct xfrm_replay_state *state;
2507
2508 state = netlink_reserve(hdr, sizeof(request), XFRMA_REPLAY_VAL,
2509 sizeof(*state));
2510 if (!state)
2511 {
2512 goto failed;
2513 }
2514 memcpy(state, replay, sizeof(*state));
2515 }
2516 else
2517 {
2518 DBG1(DBG_KNL, "unable to copy replay state from old SAD entry with "
2519 "SPI %.8x%s", ntohl(id->spi), markstr);
2520 }
2521 if (lifetime)
2522 {
2523 struct xfrm_lifetime_cur *state;
2524
2525 state = netlink_reserve(hdr, sizeof(request), XFRMA_LTIME_VAL,
2526 sizeof(*state));
2527 if (!state)
2528 {
2529 goto failed;
2530 }
2531 memcpy(state, lifetime, sizeof(*state));
2532 }
2533 else
2534 {
2535 DBG1(DBG_KNL, "unable to copy usage stats from old SAD entry with "
2536 "SPI %.8x%s", ntohl(id->spi), markstr);
2537 }
2538
2539 if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
2540 {
2541 DBG1(DBG_KNL, "unable to update SAD entry with SPI %.8x%s",
2542 ntohl(id->spi), markstr);
2543 goto failed;
2544 }
2545
2546 status = SUCCESS;
2547 failed:
2548 free(replay);
2549 free(replay_esn);
2550 free(lifetime);
2551 memwipe(out, len);
2552 memwipe(&request, sizeof(request));
2553 free(out);
2554
2555 return status;
2556 }
2557
2558 METHOD(kernel_ipsec_t, flush_sas, status_t,
2559 private_kernel_netlink_ipsec_t *this)
2560 {
2561 netlink_buf_t request;
2562 struct nlmsghdr *hdr;
2563 struct xfrm_usersa_flush *flush;
2564 struct {
2565 uint8_t proto;
2566 char *name;
2567 } protos[] = {
2568 { IPPROTO_AH, "AH" },
2569 { IPPROTO_ESP, "ESP" },
2570 { IPPROTO_COMP, "IPComp" },
2571 };
2572 int i;
2573
2574 memset(&request, 0, sizeof(request));
2575
2576 hdr = &request.hdr;
2577 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2578 hdr->nlmsg_type = XFRM_MSG_FLUSHSA;
2579 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush));
2580
2581 flush = NLMSG_DATA(hdr);
2582
2583 for (i = 0; i < countof(protos); i++)
2584 {
2585 DBG2(DBG_KNL, "flushing all %s SAD entries", protos[i].name);
2586
2587 flush->proto = protos[i].proto;
2588
2589 if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
2590 {
2591 DBG1(DBG_KNL, "unable to flush %s SAD entries", protos[i].name);
2592 return FAILED;
2593 }
2594 }
2595 return SUCCESS;
2596 }
2597
2598 /**
2599 * Unlock the mutex and signal waiting threads
2600 */
2601 static void policy_change_done(private_kernel_netlink_ipsec_t *this,
2602 policy_entry_t *policy)
2603 {
2604 policy->working = FALSE;
2605 if (policy->waiting)
2606 { /* don't need to wake threads waiting for other policies */
2607 this->condvar->broadcast(this->condvar);
2608 }
2609 this->mutex->unlock(this->mutex);
2610 }
2611
2612 /**
2613 * Install a route for the given policy if enabled and required
2614 */
2615 static void install_route(private_kernel_netlink_ipsec_t *this,
2616 policy_entry_t *policy, policy_sa_t *mapping, ipsec_sa_t *ipsec)
2617 {
2618 policy_sa_out_t *out = (policy_sa_out_t*)mapping;
2619 route_entry_t *route;
2620 host_t *iface;
2621
2622 INIT(route,
2623 .prefixlen = policy->sel.prefixlen_d,
2624 );
2625
2626 if (charon->kernel->get_address_by_ts(charon->kernel, out->src_ts,
2627 &route->src_ip, NULL) == SUCCESS)
2628 {
2629 if (!ipsec->dst->is_anyaddr(ipsec->dst))
2630 {
2631 route->gateway = charon->kernel->get_nexthop(charon->kernel,
2632 ipsec->dst, -1, ipsec->src,
2633 &route->if_name);
2634 }
2635 else
2636 { /* for shunt policies */
2637 iface = xfrm2host(policy->sel.family, &policy->sel.daddr, 0);
2638 route->gateway = charon->kernel->get_nexthop(charon->kernel,
2639 iface, policy->sel.prefixlen_d,
2640 route->src_ip, &route->if_name);
2641 iface->destroy(iface);
2642 }
2643 route->dst_net = chunk_alloc(policy->sel.family == AF_INET ? 4 : 16);
2644 memcpy(route->dst_net.ptr, &policy->sel.daddr, route->dst_net.len);
2645
2646 /* get the interface to install the route for, if we haven't one yet.
2647 * If we have a local address, use it. Otherwise (for shunt policies)
2648 * use the route's source address. */
2649 if (!route->if_name)
2650 {
2651 iface = ipsec->src;
2652 if (iface->is_anyaddr(iface))
2653 {
2654 iface = route->src_ip;
2655 }
2656 if (!charon->kernel->get_interface(charon->kernel, iface,
2657 &route->if_name))
2658 {
2659 route_entry_destroy(route);
2660 return;
2661 }
2662 }
2663 if (policy->route)
2664 {
2665 route_entry_t *old = policy->route;
2666 if (route_entry_equals(old, route))
2667 {
2668 route_entry_destroy(route);
2669 return;
2670 }
2671 /* uninstall previously installed route */
2672 if (charon->kernel->del_route(charon->kernel, old->dst_net,
2673 old->prefixlen, old->gateway,
2674 old->src_ip, old->if_name) != SUCCESS)
2675 {
2676 DBG1(DBG_KNL, "error uninstalling route installed with policy "
2677 "%R === %R %N", out->src_ts, out->dst_ts, policy_dir_names,
2678 policy->direction);
2679 }
2680 route_entry_destroy(old);
2681 policy->route = NULL;
2682 }
2683
2684 DBG2(DBG_KNL, "installing route: %R via %H src %H dev %s", out->dst_ts,
2685 route->gateway, route->src_ip, route->if_name);
2686 switch (charon->kernel->add_route(charon->kernel, route->dst_net,
2687 route->prefixlen, route->gateway,
2688 route->src_ip, route->if_name))
2689 {
2690 default:
2691 DBG1(DBG_KNL, "unable to install source route for %H",
2692 route->src_ip);
2693 /* FALL */
2694 case ALREADY_DONE:
2695 /* route exists, do not uninstall */
2696 route_entry_destroy(route);
2697 break;
2698 case SUCCESS:
2699 /* cache the installed route */
2700 policy->route = route;
2701 break;
2702 }
2703 }
2704 else
2705 {
2706 free(route);
2707 }
2708 }
2709
2710 /**
2711 * Add or update a policy in the kernel.
2712 *
2713 * Note: The mutex has to be locked when entering this function
2714 * and is unlocked here in any case.
2715 */
2716 static status_t add_policy_internal(private_kernel_netlink_ipsec_t *this,
2717 policy_entry_t *policy, policy_sa_t *mapping, bool update)
2718 {
2719 netlink_buf_t request;
2720 policy_entry_t clone;
2721 ipsec_sa_t *ipsec = mapping->sa;
2722 struct xfrm_userpolicy_info *policy_info;
2723 struct nlmsghdr *hdr;
2724 status_t status;
2725 int i;
2726
2727 /* clone the policy so we are able to check it out again later */
2728 memcpy(&clone, policy, sizeof(policy_entry_t));
2729
2730 memset(&request, 0, sizeof(request));
2731 hdr = &request.hdr;
2732 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2733 hdr->nlmsg_type = update ? XFRM_MSG_UPDPOLICY : XFRM_MSG_NEWPOLICY;
2734 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info));
2735
2736 policy_info = NLMSG_DATA(hdr);
2737 policy_info->sel = policy->sel;
2738 policy_info->dir = policy->direction;
2739
2740 /* calculate priority based on selector size, small size = high prio */
2741 policy_info->priority = mapping->priority;
2742 policy_info->action = mapping->type != POLICY_DROP ? XFRM_POLICY_ALLOW
2743 : XFRM_POLICY_BLOCK;
2744 policy_info->share = XFRM_SHARE_ANY;
2745
2746 /* policies don't expire */
2747 policy_info->lft.soft_byte_limit = XFRM_INF;
2748 policy_info->lft.soft_packet_limit = XFRM_INF;
2749 policy_info->lft.hard_byte_limit = XFRM_INF;
2750 policy_info->lft.hard_packet_limit = XFRM_INF;
2751 policy_info->lft.soft_add_expires_seconds = 0;
2752 policy_info->lft.hard_add_expires_seconds = 0;
2753 policy_info->lft.soft_use_expires_seconds = 0;
2754 policy_info->lft.hard_use_expires_seconds = 0;
2755
2756 if (mapping->type == POLICY_IPSEC && ipsec->cfg.reqid)
2757 {
2758 struct xfrm_user_tmpl *tmpl;
2759 struct {
2760 uint8_t proto;
2761 uint32_t spi;
2762 bool use;
2763 } protos[] = {
2764 { IPPROTO_COMP, htonl(ntohs(ipsec->cfg.ipcomp.cpi)),
2765 ipsec->cfg.ipcomp.transform != IPCOMP_NONE },
2766 { IPPROTO_ESP, ipsec->cfg.esp.spi, ipsec->cfg.esp.use },
2767 { IPPROTO_AH, ipsec->cfg.ah.spi, ipsec->cfg.ah.use },
2768 };
2769 ipsec_mode_t proto_mode = ipsec->cfg.mode;
2770 int count = 0;
2771
2772 for (i = 0; i < countof(protos); i++)
2773 {
2774 if (protos[i].use)
2775 {
2776 count++;
2777 }
2778 }
2779 tmpl = netlink_reserve(hdr, sizeof(request), XFRMA_TMPL,
2780 count * sizeof(*tmpl));
2781 if (!tmpl)
2782 {
2783 policy_change_done(this, policy);
2784 return FAILED;
2785 }
2786
2787 for (i = 0; i < countof(protos); i++)
2788 {
2789 if (!protos[i].use)
2790 {
2791 continue;
2792 }
2793 tmpl->reqid = ipsec->cfg.reqid;
2794 tmpl->id.proto = protos[i].proto;
2795 if (policy->direction == POLICY_OUT)
2796 {
2797 tmpl->id.spi = protos[i].spi;
2798 }
2799 tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
2800 tmpl->mode = mode2kernel(proto_mode);
2801 tmpl->optional = protos[i].proto == IPPROTO_COMP &&
2802 policy->direction != POLICY_OUT;
2803 tmpl->family = ipsec->src->get_family(ipsec->src);
2804
2805 if (proto_mode == MODE_TUNNEL || proto_mode == MODE_BEET)
2806 { /* only for tunnel mode */
2807 host2xfrm(ipsec->src, &tmpl->saddr);
2808 host2xfrm(ipsec->dst, &tmpl->id.daddr);
2809 }
2810
2811 tmpl++;
2812
2813 /* use transport mode for other SAs */
2814 proto_mode = MODE_TRANSPORT;
2815 }
2816 }
2817
2818 if (!add_mark(hdr, sizeof(request), ipsec->mark))
2819 {
2820 policy_change_done(this, policy);
2821 return FAILED;
2822 }
2823 if (ipsec->if_id &&
2824 !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, ipsec->if_id))
2825 {
2826 policy_change_done(this, policy);
2827 return FAILED;
2828 }
2829 this->mutex->unlock(this->mutex);
2830
2831 status = this->socket_xfrm->send_ack(this->socket_xfrm, hdr);
2832 if (status == ALREADY_DONE && !update)
2833 {
2834 DBG1(DBG_KNL, "policy already exists, try to update it");
2835 hdr->nlmsg_type = XFRM_MSG_UPDPOLICY;
2836 status = this->socket_xfrm->send_ack(this->socket_xfrm, hdr);
2837 }
2838
2839 this->mutex->lock(this->mutex);
2840 if (status != SUCCESS)
2841 {
2842 policy_change_done(this, policy);
2843 return FAILED;
2844 }
2845 /* install a route, if:
2846 * - this is an outbound policy (to just get one for each child)
2847 * - routing is not disabled via strongswan.conf
2848 * - the selector is not for a specific protocol/port
2849 * - no XFRM interface ID is configured
2850 * - we are in tunnel/BEET mode or install a bypass policy
2851 */
2852 if (policy->direction == POLICY_OUT && this->install_routes &&
2853 !policy->sel.proto && !policy->sel.dport && !policy->sel.sport &&
2854 !policy->if_id)
2855 {
2856 if (mapping->type == POLICY_PASS ||
2857 (mapping->type == POLICY_IPSEC && ipsec->cfg.mode != MODE_TRANSPORT))
2858 {
2859 install_route(this, policy, mapping, ipsec);
2860 }
2861 }
2862 policy_change_done(this, policy);
2863 return SUCCESS;
2864 }
2865
2866 METHOD(kernel_ipsec_t, add_policy, status_t,
2867 private_kernel_netlink_ipsec_t *this, kernel_ipsec_policy_id_t *id,
2868 kernel_ipsec_manage_policy_t *data)
2869 {
2870 policy_entry_t *policy, *current;
2871 policy_sa_t *assigned_sa, *current_sa;
2872 enumerator_t *enumerator;
2873 bool found = FALSE, update = TRUE;
2874 char markstr[32] = "";
2875 uint32_t cur_priority = 0;
2876 int use_count;
2877
2878 /* create a policy */
2879 INIT(policy,
2880 .sel = ts2selector(id->src_ts, id->dst_ts, id->interface),
2881 .mark = id->mark.value & id->mark.mask,
2882 .if_id = id->if_id,
2883 .direction = id->dir,
2884 .reqid = data->sa->reqid,
2885 );
2886 format_mark(markstr, sizeof(markstr), id->mark);
2887
2888 /* find the policy, which matches EXACTLY */
2889 this->mutex->lock(this->mutex);
2890 current = this->policies->get(this->policies, policy);
2891 if (current)
2892 {
2893 if (current->reqid && data->sa->reqid &&
2894 current->reqid != data->sa->reqid)
2895 {
2896 DBG1(DBG_CFG, "unable to install policy %R === %R %N%s for reqid "
2897 "%u, the same policy for reqid %u exists",
2898 id->src_ts, id->dst_ts, policy_dir_names, id->dir, markstr,
2899 data->sa->reqid, current->reqid);
2900 policy_entry_destroy(this, policy);
2901 this->mutex->unlock(this->mutex);
2902 return INVALID_STATE;
2903 }
2904 /* use existing policy */
2905 DBG2(DBG_KNL, "policy %R === %R %N%s already exists, increasing "
2906 "refcount", id->src_ts, id->dst_ts, policy_dir_names, id->dir,
2907 markstr);
2908 policy_entry_destroy(this, policy);
2909 policy = current;
2910 found = TRUE;
2911
2912 policy->waiting++;
2913 while (policy->working)
2914 {
2915 this->condvar->wait(this->condvar, this->mutex);
2916 }
2917 policy->waiting--;
2918 policy->working = TRUE;
2919 }
2920 else
2921 { /* use the new one, if we have no such policy */
2922 policy->used_by = linked_list_create();
2923 this->policies->put(this->policies, policy, policy);
2924 }
2925
2926 /* cache the assigned IPsec SA */
2927 assigned_sa = policy_sa_create(this, id->dir, data->type, data->src,
2928 data->dst, id->src_ts, id->dst_ts, id->mark,
2929 id->if_id, data->sa);
2930 assigned_sa->auto_priority = get_priority(policy, data->prio, id->interface);
2931 assigned_sa->priority = this->get_priority ? this->get_priority(id, data)
2932 : data->manual_prio;
2933 assigned_sa->priority = assigned_sa->priority ?: assigned_sa->auto_priority;
2934
2935 /* insert the SA according to its priority */
2936 enumerator = policy->used_by->create_enumerator(policy->used_by);
2937 while (enumerator->enumerate(enumerator, (void**)&current_sa))
2938 {
2939 if (current_sa->priority > assigned_sa->priority)
2940 {
2941 break;
2942 }
2943 if (current_sa->priority == assigned_sa->priority)
2944 {
2945 /* in case of equal manual prios order SAs by automatic priority */
2946 if (current_sa->auto_priority > assigned_sa->auto_priority)
2947 {
2948 break;
2949 }
2950 /* prefer SAs with a reqid over those without */
2951 if (current_sa->auto_priority == assigned_sa->auto_priority &&
2952 (!current_sa->sa->cfg.reqid || assigned_sa->sa->cfg.reqid))
2953 {
2954 break;
2955 }
2956 }
2957 if (update)
2958 {
2959 cur_priority = current_sa->priority;
2960 update = FALSE;
2961 }
2962 }
2963 policy->used_by->insert_before(policy->used_by, enumerator, assigned_sa);
2964 enumerator->destroy(enumerator);
2965
2966 use_count = policy->used_by->get_count(policy->used_by);
2967 if (!update)
2968 { /* we don't update the policy if the priority is lower than that of
2969 * the currently installed one */
2970 policy_change_done(this, policy);
2971 DBG2(DBG_KNL, "not updating policy %R === %R %N%s [priority %u, "
2972 "refcount %d]", id->src_ts, id->dst_ts, policy_dir_names,
2973 id->dir, markstr, cur_priority, use_count);
2974 return SUCCESS;
2975 }
2976 policy->reqid = assigned_sa->sa->cfg.reqid;
2977
2978 if (this->policy_update)
2979 {
2980 found = TRUE;
2981 }
2982
2983 DBG2(DBG_KNL, "%s policy %R === %R %N%s [priority %u, refcount %d]",
2984 found ? "updating" : "adding", id->src_ts, id->dst_ts,
2985 policy_dir_names, id->dir, markstr, assigned_sa->priority, use_count);
2986
2987 if (add_policy_internal(this, policy, assigned_sa, found) != SUCCESS)
2988 {
2989 DBG1(DBG_KNL, "unable to %s policy %R === %R %N%s",
2990 found ? "update" : "add", id->src_ts, id->dst_ts,
2991 policy_dir_names, id->dir, markstr);
2992 return FAILED;
2993 }
2994 return SUCCESS;
2995 }
2996
2997 METHOD(kernel_ipsec_t, query_policy, status_t,
2998 private_kernel_netlink_ipsec_t *this, kernel_ipsec_policy_id_t *id,
2999 kernel_ipsec_query_policy_t *data, time_t *use_time)
3000 {
3001 netlink_buf_t request;
3002 struct nlmsghdr *out = NULL, *hdr;
3003 struct xfrm_userpolicy_id *policy_id;
3004 struct xfrm_userpolicy_info *policy = NULL;
3005 size_t len;
3006 char markstr[32] = "";
3007
3008 memset(&request, 0, sizeof(request));
3009 format_mark(markstr, sizeof(markstr), id->mark);
3010
3011 DBG2(DBG_KNL, "querying policy %R === %R %N%s", id->src_ts, id->dst_ts,
3012 policy_dir_names, id->dir, markstr);
3013
3014 hdr = &request.hdr;
3015 hdr->nlmsg_flags = NLM_F_REQUEST;
3016 hdr->nlmsg_type = XFRM_MSG_GETPOLICY;
3017 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
3018
3019 policy_id = NLMSG_DATA(hdr);
3020 policy_id->sel = ts2selector(id->src_ts, id->dst_ts, id->interface);
3021 policy_id->dir = id->dir;
3022
3023 if (!add_mark(hdr, sizeof(request), id->mark))
3024 {
3025 return FAILED;
3026 }
3027 if (id->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, id->if_id))
3028 {
3029 return FAILED;
3030 }
3031
3032 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
3033 {
3034 hdr = out;
3035 while (NLMSG_OK(hdr, len))
3036 {
3037 switch (hdr->nlmsg_type)
3038 {
3039 case XFRM_MSG_NEWPOLICY:
3040 {
3041 policy = NLMSG_DATA(hdr);
3042 break;
3043 }
3044 case NLMSG_ERROR:
3045 {
3046 struct nlmsgerr *err = NLMSG_DATA(hdr);
3047 DBG1(DBG_KNL, "querying policy failed: %s (%d)",
3048 strerror(-err->error), -err->error);
3049 break;
3050 }
3051 default:
3052 hdr = NLMSG_NEXT(hdr, len);
3053 continue;
3054 case NLMSG_DONE:
3055 break;
3056 }
3057 break;
3058 }
3059 }
3060
3061 if (policy == NULL)
3062 {
3063 DBG2(DBG_KNL, "unable to query policy %R === %R %N%s", id->src_ts,
3064 id->dst_ts, policy_dir_names, id->dir, markstr);
3065 free(out);
3066 return FAILED;
3067 }
3068
3069 if (policy->curlft.use_time)
3070 {
3071 /* we need the monotonic time, but the kernel returns system time. */
3072 *use_time = time_monotonic(NULL) - (time(NULL) - policy->curlft.use_time);
3073 }
3074 else
3075 {
3076 *use_time = 0;
3077 }
3078
3079 free(out);
3080 return SUCCESS;
3081 }
3082
3083 METHOD(kernel_ipsec_t, del_policy, status_t,
3084 private_kernel_netlink_ipsec_t *this, kernel_ipsec_policy_id_t *id,
3085 kernel_ipsec_manage_policy_t *data)
3086 {
3087 policy_entry_t *current, policy;
3088 enumerator_t *enumerator;
3089 policy_sa_t *mapping;
3090 netlink_buf_t request;
3091 struct nlmsghdr *hdr;
3092 struct xfrm_userpolicy_id *policy_id;
3093 bool is_installed = TRUE;
3094 uint32_t priority, auto_priority, cur_priority;
3095 ipsec_sa_t assigned_sa = {
3096 .src = data->src,
3097 .dst = data->dst,
3098 .mark = id->mark,
3099 .if_id = id->if_id,
3100 .cfg = *data->sa,
3101 };
3102 char markstr[32] = "";
3103 int use_count;
3104 status_t status = SUCCESS;
3105
3106 format_mark(markstr, sizeof(markstr), id->mark);
3107
3108 DBG2(DBG_KNL, "deleting policy %R === %R %N%s", id->src_ts, id->dst_ts,
3109 policy_dir_names, id->dir, markstr);
3110
3111 /* create a policy */
3112 memset(&policy, 0, sizeof(policy_entry_t));
3113 policy.sel = ts2selector(id->src_ts, id->dst_ts, id->interface);
3114 policy.mark = id->mark.value & id->mark.mask;
3115 policy.if_id = id->if_id;
3116 policy.direction = id->dir;
3117
3118 /* find the policy */
3119 this->mutex->lock(this->mutex);
3120 current = this->policies->get(this->policies, &policy);
3121 if (!current)
3122 {
3123 DBG1(DBG_KNL, "deleting policy %R === %R %N%s failed, not found",
3124 id->src_ts, id->dst_ts, policy_dir_names, id->dir, markstr);
3125 this->mutex->unlock(this->mutex);
3126 return NOT_FOUND;
3127 }
3128 current->waiting++;
3129 while (current->working)
3130 {
3131 this->condvar->wait(this->condvar, this->mutex);
3132 }
3133 current->working = TRUE;
3134 current->waiting--;
3135
3136 /* remove mapping to SA by reqid and priority */
3137 auto_priority = get_priority(current, data->prio,id->interface);
3138 priority = this->get_priority ? this->get_priority(id, data)
3139 : data->manual_prio;
3140 priority = priority ?: auto_priority;
3141
3142 enumerator = current->used_by->create_enumerator(current->used_by);
3143 while (enumerator->enumerate(enumerator, (void**)&mapping))
3144 {
3145 if (priority == mapping->priority &&
3146 auto_priority == mapping->auto_priority &&
3147 data->type == mapping->type &&
3148 ipsec_sa_equals(mapping->sa, &assigned_sa))
3149 {
3150 current->used_by->remove_at(current->used_by, enumerator);
3151 policy_sa_destroy(mapping, id->dir, this);
3152 break;
3153 }
3154 if (is_installed)
3155 {
3156 cur_priority = mapping->priority;
3157 is_installed = FALSE;
3158 }
3159 }
3160 enumerator->destroy(enumerator);
3161
3162 use_count = current->used_by->get_count(current->used_by);
3163 if (use_count > 0)
3164 { /* policy is used by more SAs, keep in kernel */
3165 DBG2(DBG_KNL, "policy still used by another CHILD_SA, not removed");
3166 if (!is_installed)
3167 { /* no need to update as the policy was not installed for this SA */
3168 policy_change_done(this, current);
3169 DBG2(DBG_KNL, "not updating policy %R === %R %N%s [priority %u, "
3170 "refcount %d]", id->src_ts, id->dst_ts, policy_dir_names,
3171 id->dir, markstr, cur_priority, use_count);
3172 return SUCCESS;
3173 }
3174 current->used_by->get_first(current->used_by, (void**)&mapping);
3175 current->reqid = mapping->sa->cfg.reqid;
3176
3177 DBG2(DBG_KNL, "updating policy %R === %R %N%s [priority %u, "
3178 "refcount %d]", id->src_ts, id->dst_ts, policy_dir_names, id->dir,
3179 markstr, mapping->priority, use_count);
3180
3181 if (add_policy_internal(this, current, mapping, TRUE) != SUCCESS)
3182 {
3183 DBG1(DBG_KNL, "unable to update policy %R === %R %N%s",
3184 id->src_ts, id->dst_ts, policy_dir_names, id->dir, markstr);
3185 return FAILED;
3186 }
3187 return SUCCESS;
3188 }
3189
3190 memset(&request, 0, sizeof(request));
3191
3192 hdr = &request.hdr;
3193 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3194 hdr->nlmsg_type = XFRM_MSG_DELPOLICY;
3195 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
3196
3197 policy_id = NLMSG_DATA(hdr);
3198 policy_id->sel = current->sel;
3199 policy_id->dir = id->dir;
3200
3201 if (!add_mark(hdr, sizeof(request), id->mark))
3202 {
3203 policy_change_done(this, current);
3204 return FAILED;
3205 }
3206 if (id->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, id->if_id))
3207 {
3208 policy_change_done(this, current);
3209 return FAILED;
3210 }
3211
3212 if (current->route)
3213 {
3214 route_entry_t *route = current->route;
3215 if (charon->kernel->del_route(charon->kernel, route->dst_net,
3216 route->prefixlen, route->gateway,
3217 route->src_ip, route->if_name) != SUCCESS)
3218 {
3219 DBG1(DBG_KNL, "error uninstalling route installed with policy "
3220 "%R === %R %N%s", id->src_ts, id->dst_ts, policy_dir_names,
3221 id->dir, markstr);
3222 }
3223 }
3224 this->mutex->unlock(this->mutex);
3225
3226 if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
3227 {
3228 DBG1(DBG_KNL, "unable to delete policy %R === %R %N%s", id->src_ts,
3229 id->dst_ts, policy_dir_names, id->dir, markstr);
3230 status = FAILED;
3231 }
3232
3233 this->mutex->lock(this->mutex);
3234 if (!current->waiting)
3235 { /* only if no other thread still needs the policy */
3236 this->policies->remove(this->policies, current);
3237 policy_entry_destroy(this, current);
3238 this->mutex->unlock(this->mutex);
3239 }
3240 else
3241 {
3242 policy_change_done(this, current);
3243 }
3244 return status;
3245 }
3246
3247 METHOD(kernel_ipsec_t, flush_policies, status_t,
3248 private_kernel_netlink_ipsec_t *this)
3249 {
3250 netlink_buf_t request;
3251 struct nlmsghdr *hdr;
3252
3253 memset(&request, 0, sizeof(request));
3254
3255 DBG2(DBG_KNL, "flushing all policies from SPD");
3256
3257 hdr = &request.hdr;
3258 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3259 hdr->nlmsg_type = XFRM_MSG_FLUSHPOLICY;
3260 hdr->nlmsg_len = NLMSG_LENGTH(0); /* no data associated */
3261
3262 /* by adding an rtattr of type XFRMA_POLICY_TYPE we could restrict this
3263 * to main or sub policies (default is main) */
3264
3265 if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
3266 {
3267 DBG1(DBG_KNL, "unable to flush SPD entries");
3268 return FAILED;
3269 }
3270 return SUCCESS;
3271 }
3272
3273 /**
3274 * Bypass socket using a per-socket policy
3275 */
3276 static bool add_socket_bypass(private_kernel_netlink_ipsec_t *this,
3277 int fd, int family)
3278 {
3279 struct xfrm_userpolicy_info policy;
3280 u_int sol, ipsec_policy;
3281
3282 switch (family)
3283 {
3284 case AF_INET:
3285 sol = SOL_IP;
3286 ipsec_policy = IP_XFRM_POLICY;
3287 break;
3288 case AF_INET6:
3289 sol = SOL_IPV6;
3290 ipsec_policy = IPV6_XFRM_POLICY;
3291 break;
3292 default:
3293 return FALSE;
3294 }
3295
3296 memset(&policy, 0, sizeof(policy));
3297 policy.action = XFRM_POLICY_ALLOW;
3298 policy.sel.family = family;
3299
3300 policy.dir = XFRM_POLICY_OUT;
3301 if (setsockopt(fd, sol, ipsec_policy, &policy, sizeof(policy)) < 0)
3302 {
3303 DBG1(DBG_KNL, "unable to set IPSEC_POLICY on socket: %s (%d)",
3304 strerror(errno), errno);
3305 return FALSE;
3306 }
3307 policy.dir = XFRM_POLICY_IN;
3308 if (setsockopt(fd, sol, ipsec_policy, &policy, sizeof(policy)) < 0)
3309 {
3310 DBG1(DBG_KNL, "unable to set IPSEC_POLICY on socket: %s (%d)",
3311 strerror(errno), errno);
3312 return FALSE;
3313 }
3314 return TRUE;
3315 }
3316
3317 /**
3318 * Port based IKE bypass policy
3319 */
3320 typedef struct {
3321 /** address family */
3322 int family;
3323 /** layer 4 protocol */
3324 int proto;
3325 /** port number, network order */
3326 uint16_t port;
3327 } bypass_t;
3328
3329 /**
3330 * Add or remove a bypass policy from/to kernel
3331 */
3332 static bool manage_bypass(private_kernel_netlink_ipsec_t *this,
3333 int type, policy_dir_t dir, bypass_t *bypass)
3334 {
3335 netlink_buf_t request;
3336 struct xfrm_selector *sel;
3337 struct nlmsghdr *hdr;
3338
3339 memset(&request, 0, sizeof(request));
3340 hdr = &request.hdr;
3341 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3342 hdr->nlmsg_type = type;
3343
3344 if (type == XFRM_MSG_NEWPOLICY)
3345 {
3346 struct xfrm_userpolicy_info *policy;
3347
3348 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info));
3349
3350 policy = NLMSG_DATA(hdr);
3351 policy->dir = dir;
3352 policy->priority = 32;
3353 policy->action = XFRM_POLICY_ALLOW;
3354 policy->share = XFRM_SHARE_ANY;
3355
3356 policy->lft.soft_byte_limit = XFRM_INF;
3357 policy->lft.soft_packet_limit = XFRM_INF;
3358 policy->lft.hard_byte_limit = XFRM_INF;
3359 policy->lft.hard_packet_limit = XFRM_INF;
3360
3361 sel = &policy->sel;
3362 }
3363 else /* XFRM_MSG_DELPOLICY */
3364 {
3365 struct xfrm_userpolicy_id *policy;
3366
3367 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
3368
3369 policy = NLMSG_DATA(hdr);
3370 policy->dir = dir;
3371
3372 sel = &policy->sel;
3373 }
3374
3375 sel->family = bypass->family;
3376 sel->proto = bypass->proto;
3377 if (dir == POLICY_IN)
3378 {
3379 sel->dport = bypass->port;
3380 sel->dport_mask = 0xffff;
3381 }
3382 else
3383 {
3384 sel->sport = bypass->port;
3385 sel->sport_mask = 0xffff;
3386 }
3387 return this->socket_xfrm->send_ack(this->socket_xfrm, hdr) == SUCCESS;
3388 }
3389
3390 /**
3391 * Bypass socket using a port-based bypass policy
3392 */
3393 static bool add_port_bypass(private_kernel_netlink_ipsec_t *this,
3394 int fd, int family)
3395 {
3396 union {
3397 struct sockaddr sa;
3398 struct sockaddr_in in;
3399 struct sockaddr_in6 in6;
3400 } saddr;
3401 socklen_t len;
3402 bypass_t bypass = {
3403 .family = family,
3404 };
3405
3406 len = sizeof(saddr);
3407 if (getsockname(fd, &saddr.sa, &len) != 0)
3408 {
3409 return FALSE;
3410 }
3411 #ifdef SO_PROTOCOL /* since 2.6.32 */
3412 len = sizeof(bypass.proto);
3413 if (getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &bypass.proto, &len) != 0)
3414 #endif
3415 { /* assume UDP if SO_PROTOCOL not supported */
3416 bypass.proto = IPPROTO_UDP;
3417 }
3418 switch (family)
3419 {
3420 case AF_INET:
3421 bypass.port = saddr.in.sin_port;
3422 break;
3423 case AF_INET6:
3424 bypass.port = saddr.in6.sin6_port;
3425 break;
3426 default:
3427 return FALSE;
3428 }
3429
3430 if (!manage_bypass(this, XFRM_MSG_NEWPOLICY, POLICY_IN, &bypass))
3431 {
3432 return FALSE;
3433 }
3434 if (!manage_bypass(this, XFRM_MSG_NEWPOLICY, POLICY_OUT, &bypass))
3435 {
3436 manage_bypass(this, XFRM_MSG_DELPOLICY, POLICY_IN, &bypass);
3437 return FALSE;
3438 }
3439 array_insert(this->bypass, ARRAY_TAIL, &bypass);
3440
3441 return TRUE;
3442 }
3443
3444 /**
3445 * Remove installed port based bypass policy
3446 */
3447 static void remove_port_bypass(bypass_t *bypass, int idx,
3448 private_kernel_netlink_ipsec_t *this)
3449 {
3450 manage_bypass(this, XFRM_MSG_DELPOLICY, POLICY_OUT, bypass);
3451 manage_bypass(this, XFRM_MSG_DELPOLICY, POLICY_IN, bypass);
3452 }
3453
3454 METHOD(kernel_ipsec_t, bypass_socket, bool,
3455 private_kernel_netlink_ipsec_t *this, int fd, int family)
3456 {
3457 if (lib->settings->get_bool(lib->settings,
3458 "%s.plugins.kernel-netlink.port_bypass", FALSE, lib->ns))
3459 {
3460 return add_port_bypass(this, fd, family);
3461 }
3462 return add_socket_bypass(this, fd, family);
3463 }
3464
3465 METHOD(kernel_ipsec_t, enable_udp_decap, bool,
3466 private_kernel_netlink_ipsec_t *this, int fd, int family, uint16_t port)
3467 {
3468 int type = UDP_ENCAP_ESPINUDP;
3469
3470 if (setsockopt(fd, SOL_UDP, UDP_ENCAP, &type, sizeof(type)) < 0)
3471 {
3472 DBG1(DBG_KNL, "unable to set UDP_ENCAP: %s", strerror(errno));
3473 return FALSE;
3474 }
3475 return TRUE;
3476 }
3477
3478 METHOD(kernel_ipsec_t, destroy, void,
3479 private_kernel_netlink_ipsec_t *this)
3480 {
3481 enumerator_t *enumerator;
3482 policy_entry_t *policy;
3483
3484 array_destroy_function(this->bypass,
3485 (array_callback_t)remove_port_bypass, this);
3486 if (this->socket_xfrm_events > 0)
3487 {
3488 lib->watcher->remove(lib->watcher, this->socket_xfrm_events);
3489 close(this->socket_xfrm_events);
3490 }
3491 DESTROY_IF(this->socket_xfrm);
3492 enumerator = this->policies->create_enumerator(this->policies);
3493 while (enumerator->enumerate(enumerator, &policy, &policy))
3494 {
3495 policy_entry_destroy(this, policy);
3496 }
3497 enumerator->destroy(enumerator);
3498 this->policies->destroy(this->policies);
3499 this->sas->destroy(this->sas);
3500 this->condvar->destroy(this->condvar);
3501 this->mutex->destroy(this->mutex);
3502 free(this);
3503 }
3504
3505 /**
3506 * Get the currently configured SPD hashing thresholds for an address family
3507 */
3508 static bool get_spd_hash_thresh(private_kernel_netlink_ipsec_t *this,
3509 int type, uint8_t *lbits, uint8_t *rbits)
3510 {
3511 netlink_buf_t request;
3512 struct nlmsghdr *hdr, *out;
3513 struct xfrmu_spdhthresh *thresh;
3514 struct rtattr *rta;
3515 size_t len, rtasize;
3516 bool success = FALSE;
3517
3518 memset(&request, 0, sizeof(request));
3519
3520 hdr = &request.hdr;
3521 hdr->nlmsg_flags = NLM_F_REQUEST;
3522 hdr->nlmsg_type = XFRM_MSG_GETSPDINFO;
3523 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(uint32_t));
3524
3525 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
3526 {
3527 hdr = out;
3528 while (NLMSG_OK(hdr, len))
3529 {
3530 switch (hdr->nlmsg_type)
3531 {
3532 case XFRM_MSG_NEWSPDINFO:
3533 {
3534 rta = XFRM_RTA(hdr, uint32_t);
3535 rtasize = XFRM_PAYLOAD(hdr, uint32_t);
3536 while (RTA_OK(rta, rtasize))
3537 {
3538 if (rta->rta_type == type &&
3539 RTA_PAYLOAD(rta) == sizeof(*thresh))
3540 {
3541 thresh = RTA_DATA(rta);
3542 *lbits = thresh->lbits;
3543 *rbits = thresh->rbits;
3544 success = TRUE;
3545 break;
3546 }
3547 rta = RTA_NEXT(rta, rtasize);
3548 }
3549 break;
3550 }
3551 case NLMSG_ERROR:
3552 {
3553 struct nlmsgerr *err = NLMSG_DATA(hdr);
3554 DBG1(DBG_KNL, "getting SPD hash threshold failed: %s (%d)",
3555 strerror(-err->error), -err->error);
3556 break;
3557 }
3558 default:
3559 hdr = NLMSG_NEXT(hdr, len);
3560 continue;
3561 case NLMSG_DONE:
3562 break;
3563 }
3564 break;
3565 }
3566 free(out);
3567 }
3568 return success;
3569 }
3570
3571 /**
3572 * Configure SPD hashing threshold for an address family
3573 */
3574 static void setup_spd_hash_thresh(private_kernel_netlink_ipsec_t *this,
3575 char *key, int type, uint8_t def)
3576 {
3577 struct xfrmu_spdhthresh *thresh;
3578 struct nlmsghdr *hdr;
3579 netlink_buf_t request;
3580 uint8_t lbits, rbits;
3581
3582 if (!get_spd_hash_thresh(this, type, &lbits, &rbits))
3583 {
3584 return;
3585 }
3586 memset(&request, 0, sizeof(request));
3587
3588 hdr = &request.hdr;
3589 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3590 hdr->nlmsg_type = XFRM_MSG_NEWSPDINFO;
3591 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(uint32_t));
3592
3593 thresh = netlink_reserve(hdr, sizeof(request), type, sizeof(*thresh));
3594 thresh->lbits = lib->settings->get_int(lib->settings,
3595 "%s.plugins.kernel-netlink.spdh_thresh.%s.lbits",
3596 def, lib->ns, key);
3597 thresh->rbits = lib->settings->get_int(lib->settings,
3598 "%s.plugins.kernel-netlink.spdh_thresh.%s.rbits",
3599 def, lib->ns, key);
3600 if (thresh->lbits != lbits || thresh->rbits != rbits)
3601 {
3602 if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
3603 {
3604 DBG1(DBG_KNL, "setting SPD hash threshold failed");
3605 }
3606 }
3607 }
3608
3609 /*
3610 * Described in header.
3611 */
3612 kernel_netlink_ipsec_t *kernel_netlink_ipsec_create()
3613 {
3614 private_kernel_netlink_ipsec_t *this;
3615 bool register_for_events = TRUE;
3616
3617 INIT(this,
3618 .public = {
3619 .interface = {
3620 .get_features = _get_features,
3621 .get_spi = _get_spi,
3622 .get_cpi = _get_cpi,
3623 .add_sa = _add_sa,
3624 .update_sa = _update_sa,
3625 .query_sa = _query_sa,
3626 .del_sa = _del_sa,
3627 .flush_sas = _flush_sas,
3628 .add_policy = _add_policy,
3629 .query_policy = _query_policy,
3630 .del_policy = _del_policy,
3631 .flush_policies = _flush_policies,
3632 .bypass_socket = _bypass_socket,
3633 .enable_udp_decap = _enable_udp_decap,
3634 .destroy = _destroy,
3635 },
3636 },
3637 .policies = hashtable_create((hashtable_hash_t)policy_hash,
3638 (hashtable_equals_t)policy_equals, 32),
3639 .sas = hashtable_create((hashtable_hash_t)ipsec_sa_hash,
3640 (hashtable_equals_t)ipsec_sa_equals, 32),
3641 .bypass = array_create(sizeof(bypass_t), 0),
3642 .mutex = mutex_create(MUTEX_TYPE_DEFAULT),
3643 .condvar = condvar_create(CONDVAR_TYPE_DEFAULT),
3644 .get_priority = dlsym(RTLD_DEFAULT,
3645 "kernel_netlink_get_priority_custom"),
3646 .policy_update = lib->settings->get_bool(lib->settings,
3647 "%s.plugins.kernel-netlink.policy_update", FALSE, lib->ns),
3648 .install_routes = lib->settings->get_bool(lib->settings,
3649 "%s.install_routes", TRUE, lib->ns),
3650 .proto_port_transport = lib->settings->get_bool(lib->settings,
3651 "%s.plugins.kernel-netlink.set_proto_port_transport_sa",
3652 FALSE, lib->ns),
3653 );
3654
3655 if (streq(lib->ns, "starter"))
3656 { /* starter has no threads, so we do not register for kernel events */
3657 register_for_events = FALSE;
3658 }
3659
3660 this->socket_xfrm = netlink_socket_create(NETLINK_XFRM, xfrm_msg_names,
3661 lib->settings->get_bool(lib->settings,
3662 "%s.plugins.kernel-netlink.parallel_xfrm", FALSE, lib->ns));
3663 if (!this->socket_xfrm)
3664 {
3665 destroy(this);
3666 return NULL;
3667 }
3668
3669 setup_spd_hash_thresh(this, "ipv4", XFRMA_SPD_IPV4_HTHRESH, 32);
3670 setup_spd_hash_thresh(this, "ipv6", XFRMA_SPD_IPV6_HTHRESH, 128);
3671
3672 if (register_for_events)
3673 {
3674 struct sockaddr_nl addr;
3675
3676 memset(&addr, 0, sizeof(addr));
3677 addr.nl_family = AF_NETLINK;
3678
3679 /* create and bind XFRM socket for ACQUIRE, EXPIRE, MIGRATE & MAPPING */
3680 this->socket_xfrm_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
3681 if (this->socket_xfrm_events <= 0)
3682 {
3683 DBG1(DBG_KNL, "unable to create XFRM event socket: %s (%d)",
3684 strerror(errno), errno);
3685 destroy(this);
3686 return NULL;
3687 }
3688 addr.nl_groups = XFRMNLGRP(ACQUIRE) | XFRMNLGRP(EXPIRE) |
3689 XFRMNLGRP(MIGRATE) | XFRMNLGRP(MAPPING);
3690 if (bind(this->socket_xfrm_events, (struct sockaddr*)&addr, sizeof(addr)))
3691 {
3692 DBG1(DBG_KNL, "unable to bind XFRM event socket: %s (%d)",
3693 strerror(errno), errno);
3694 destroy(this);
3695 return NULL;
3696 }
3697 lib->watcher->add(lib->watcher, this->socket_xfrm_events, WATCHER_READ,
3698 (watcher_cb_t)receive_events, this);
3699 }
3700
3701 return &this->public;
3702 }