]> git.ipfire.org Git - thirdparty/strongswan.git/blob - src/libcharon/plugins/kernel_netlink/kernel_netlink_ipsec.c
kernel: Add option to control DS field behavior
[thirdparty/strongswan.git] / src / libcharon / plugins / kernel_netlink / kernel_netlink_ipsec.c
1 /*
2 * Copyright (C) 2006-2018 Tobias Brunner
3 * Copyright (C) 2005-2009 Martin Willi
4 * Copyright (C) 2008-2016 Andreas Steffen
5 * Copyright (C) 2006-2007 Fabian Hartmann, Noah Heusser
6 * Copyright (C) 2006 Daniel Roethlisberger
7 * Copyright (C) 2005 Jan Hutter
8 * HSR Hochschule fuer Technik Rapperswil
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; either version 2 of the License, or (at your
13 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 * for more details.
19 */
20 /*
21 * Copyright (C) 2018 Mellanox Technologies.
22 *
23 * Permission is hereby granted, free of charge, to any person obtaining a copy
24 * of this software and associated documentation files (the "Software"), to deal
25 * in the Software without restriction, including without limitation the rights
26 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
27 * copies of the Software, and to permit persons to whom the Software is
28 * furnished to do so, subject to the following conditions:
29 *
30 * The above copyright notice and this permission notice shall be included in
31 * all copies or substantial portions of the Software.
32 *
33 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
36 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
37 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
38 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
39 * THE SOFTWARE.
40 */
41
42 #define _GNU_SOURCE
43 #include <sys/types.h>
44 #include <sys/socket.h>
45 #include <sys/ioctl.h>
46 #include <stdint.h>
47 #include <linux/ipsec.h>
48 #include <linux/netlink.h>
49 #include <linux/rtnetlink.h>
50 #include <linux/xfrm.h>
51 #include <linux/udp.h>
52 #include <linux/ethtool.h>
53 #include <linux/sockios.h>
54 #include <net/if.h>
55 #include <unistd.h>
56 #include <time.h>
57 #include <errno.h>
58 #include <string.h>
59 #include <fcntl.h>
60 #include <dlfcn.h>
61
62 #include "kernel_netlink_ipsec.h"
63 #include "kernel_netlink_shared.h"
64
65 #include <daemon.h>
66 #include <utils/debug.h>
67 #include <threading/mutex.h>
68 #include <threading/condvar.h>
69 #include <collections/array.h>
70 #include <collections/hashtable.h>
71 #include <collections/linked_list.h>
72
73 /** Required for Linux 2.6.26 kernel and later */
74 #ifndef XFRM_STATE_AF_UNSPEC
75 #define XFRM_STATE_AF_UNSPEC 32
76 #endif
77
78 /** From linux/in.h */
79 #ifndef IP_XFRM_POLICY
80 #define IP_XFRM_POLICY 17
81 #endif
82
83 /** Missing on uclibc */
84 #ifndef IPV6_XFRM_POLICY
85 #define IPV6_XFRM_POLICY 34
86 #endif /*IPV6_XFRM_POLICY*/
87
88 /* from linux/udp.h */
89 #ifndef UDP_ENCAP
90 #define UDP_ENCAP 100
91 #endif
92
93 #ifndef UDP_ENCAP_ESPINUDP
94 #define UDP_ENCAP_ESPINUDP 2
95 #endif
96
97 /* this is not defined on some platforms */
98 #ifndef SOL_UDP
99 #define SOL_UDP IPPROTO_UDP
100 #endif
101
102 /** Base priority for installed policies */
103 #define PRIO_BASE 200000
104
105 /**
106 * Map the limit for bytes and packets to XFRM_INF by default
107 */
108 #define XFRM_LIMIT(x) ((x) == 0 ? XFRM_INF : (x))
109
110 /**
111 * Create ORable bitfield of XFRM NL groups
112 */
113 #define XFRMNLGRP(x) (1<<(XFRMNLGRP_##x-1))
114
115 /**
116 * Returns a pointer to the first rtattr following the nlmsghdr *nlh and the
117 * 'usual' netlink data x like 'struct xfrm_usersa_info'
118 */
119 #define XFRM_RTA(nlh, x) ((struct rtattr*)(NLMSG_DATA(nlh) + \
120 NLMSG_ALIGN(sizeof(x))))
121 /**
122 * Returns the total size of attached rta data
123 * (after 'usual' netlink data x like 'struct xfrm_usersa_info')
124 */
125 #define XFRM_PAYLOAD(nlh, x) NLMSG_PAYLOAD(nlh, sizeof(x))
126
127 typedef struct kernel_algorithm_t kernel_algorithm_t;
128
129 /**
130 * Mapping of IKEv2 kernel identifier to linux crypto API names
131 */
132 struct kernel_algorithm_t {
133 /**
134 * Identifier specified in IKEv2
135 */
136 int ikev2;
137
138 /**
139 * Name of the algorithm in linux crypto API
140 */
141 const char *name;
142 };
143
144 ENUM(xfrm_msg_names, XFRM_MSG_NEWSA, XFRM_MSG_MAPPING,
145 "XFRM_MSG_NEWSA",
146 "XFRM_MSG_DELSA",
147 "XFRM_MSG_GETSA",
148 "XFRM_MSG_NEWPOLICY",
149 "XFRM_MSG_DELPOLICY",
150 "XFRM_MSG_GETPOLICY",
151 "XFRM_MSG_ALLOCSPI",
152 "XFRM_MSG_ACQUIRE",
153 "XFRM_MSG_EXPIRE",
154 "XFRM_MSG_UPDPOLICY",
155 "XFRM_MSG_UPDSA",
156 "XFRM_MSG_POLEXPIRE",
157 "XFRM_MSG_FLUSHSA",
158 "XFRM_MSG_FLUSHPOLICY",
159 "XFRM_MSG_NEWAE",
160 "XFRM_MSG_GETAE",
161 "XFRM_MSG_REPORT",
162 "XFRM_MSG_MIGRATE",
163 "XFRM_MSG_NEWSADINFO",
164 "XFRM_MSG_GETSADINFO",
165 "XFRM_MSG_NEWSPDINFO",
166 "XFRM_MSG_GETSPDINFO",
167 "XFRM_MSG_MAPPING"
168 );
169
170 ENUM(xfrm_attr_type_names, XFRMA_UNSPEC, XFRMA_OFFLOAD_DEV,
171 "XFRMA_UNSPEC",
172 "XFRMA_ALG_AUTH",
173 "XFRMA_ALG_CRYPT",
174 "XFRMA_ALG_COMP",
175 "XFRMA_ENCAP",
176 "XFRMA_TMPL",
177 "XFRMA_SA",
178 "XFRMA_POLICY",
179 "XFRMA_SEC_CTX",
180 "XFRMA_LTIME_VAL",
181 "XFRMA_REPLAY_VAL",
182 "XFRMA_REPLAY_THRESH",
183 "XFRMA_ETIMER_THRESH",
184 "XFRMA_SRCADDR",
185 "XFRMA_COADDR",
186 "XFRMA_LASTUSED",
187 "XFRMA_POLICY_TYPE",
188 "XFRMA_MIGRATE",
189 "XFRMA_ALG_AEAD",
190 "XFRMA_KMADDRESS",
191 "XFRMA_ALG_AUTH_TRUNC",
192 "XFRMA_MARK",
193 "XFRMA_TFCPAD",
194 "XFRMA_REPLAY_ESN_VAL",
195 "XFRMA_SA_EXTRA_FLAGS",
196 "XFRMA_PROTO",
197 "XFRMA_ADDRESS_FILTER",
198 "XFRMA_PAD",
199 "XFRMA_OFFLOAD_DEV",
200 );
201
202 /**
203 * Algorithms for encryption
204 */
205 static kernel_algorithm_t encryption_algs[] = {
206 /* {ENCR_DES_IV64, "***" }, */
207 {ENCR_DES, "des" },
208 {ENCR_3DES, "des3_ede" },
209 /* {ENCR_RC5, "***" }, */
210 /* {ENCR_IDEA, "***" }, */
211 {ENCR_CAST, "cast5" },
212 {ENCR_BLOWFISH, "blowfish" },
213 /* {ENCR_3IDEA, "***" }, */
214 /* {ENCR_DES_IV32, "***" }, */
215 {ENCR_NULL, "cipher_null" },
216 {ENCR_AES_CBC, "aes" },
217 {ENCR_AES_CTR, "rfc3686(ctr(aes))" },
218 {ENCR_AES_CCM_ICV8, "rfc4309(ccm(aes))" },
219 {ENCR_AES_CCM_ICV12, "rfc4309(ccm(aes))" },
220 {ENCR_AES_CCM_ICV16, "rfc4309(ccm(aes))" },
221 {ENCR_AES_GCM_ICV8, "rfc4106(gcm(aes))" },
222 {ENCR_AES_GCM_ICV12, "rfc4106(gcm(aes))" },
223 {ENCR_AES_GCM_ICV16, "rfc4106(gcm(aes))" },
224 {ENCR_NULL_AUTH_AES_GMAC, "rfc4543(gcm(aes))" },
225 {ENCR_CAMELLIA_CBC, "cbc(camellia)" },
226 /* {ENCR_CAMELLIA_CTR, "***" }, */
227 /* {ENCR_CAMELLIA_CCM_ICV8, "***" }, */
228 /* {ENCR_CAMELLIA_CCM_ICV12, "***" }, */
229 /* {ENCR_CAMELLIA_CCM_ICV16, "***" }, */
230 {ENCR_SERPENT_CBC, "serpent" },
231 {ENCR_TWOFISH_CBC, "twofish" },
232 {ENCR_CHACHA20_POLY1305, "rfc7539esp(chacha20,poly1305)"},
233 };
234
235 /**
236 * Algorithms for integrity protection
237 */
238 static kernel_algorithm_t integrity_algs[] = {
239 {AUTH_HMAC_MD5_96, "md5" },
240 {AUTH_HMAC_MD5_128, "hmac(md5)" },
241 {AUTH_HMAC_SHA1_96, "sha1" },
242 {AUTH_HMAC_SHA1_160, "hmac(sha1)" },
243 {AUTH_HMAC_SHA2_256_96, "sha256" },
244 {AUTH_HMAC_SHA2_256_128, "hmac(sha256)" },
245 {AUTH_HMAC_SHA2_384_192, "hmac(sha384)" },
246 {AUTH_HMAC_SHA2_512_256, "hmac(sha512)" },
247 /* {AUTH_DES_MAC, "***" }, */
248 /* {AUTH_KPDK_MD5, "***" }, */
249 {AUTH_AES_XCBC_96, "xcbc(aes)" },
250 {AUTH_AES_CMAC_96, "cmac(aes)" },
251 };
252
253 /**
254 * Algorithms for IPComp
255 */
256 static kernel_algorithm_t compression_algs[] = {
257 /* {IPCOMP_OUI, "***" }, */
258 {IPCOMP_DEFLATE, "deflate" },
259 {IPCOMP_LZS, "lzs" },
260 {IPCOMP_LZJH, "lzjh" },
261 };
262
263 /**
264 * IPsec HW offload state in kernel
265 */
266 typedef enum {
267 NL_OFFLOAD_UNKNOWN,
268 NL_OFFLOAD_UNSUPPORTED,
269 NL_OFFLOAD_SUPPORTED
270 } nl_offload_state_t;
271
272 /**
273 * Global metadata used for IPsec HW offload
274 */
275 static struct {
276 /** bit in feature set */
277 u_int bit;
278 /** total number of device feature blocks */
279 u_int total_blocks;
280 /** determined HW offload state */
281 nl_offload_state_t state;
282 } netlink_hw_offload;
283
284 /**
285 * Look up a kernel algorithm name and its key size
286 */
287 static const char* lookup_algorithm(transform_type_t type, int ikev2)
288 {
289 kernel_algorithm_t *list;
290 int i, count;
291 char *name;
292
293 switch (type)
294 {
295 case ENCRYPTION_ALGORITHM:
296 list = encryption_algs;
297 count = countof(encryption_algs);
298 break;
299 case INTEGRITY_ALGORITHM:
300 list = integrity_algs;
301 count = countof(integrity_algs);
302 break;
303 case COMPRESSION_ALGORITHM:
304 list = compression_algs;
305 count = countof(compression_algs);
306 break;
307 default:
308 return NULL;
309 }
310 for (i = 0; i < count; i++)
311 {
312 if (list[i].ikev2 == ikev2)
313 {
314 return list[i].name;
315 }
316 }
317 if (charon->kernel->lookup_algorithm(charon->kernel, ikev2, type, NULL,
318 &name))
319 {
320 return name;
321 }
322 return NULL;
323 }
324
325 typedef struct private_kernel_netlink_ipsec_t private_kernel_netlink_ipsec_t;
326
327 /**
328 * Private variables and functions of kernel_netlink class.
329 */
330 struct private_kernel_netlink_ipsec_t {
331 /**
332 * Public part of the kernel_netlink_t object
333 */
334 kernel_netlink_ipsec_t public;
335
336 /**
337 * Mutex to lock access to installed policies
338 */
339 mutex_t *mutex;
340
341 /**
342 * Condvar to synchronize access to individual policies
343 */
344 condvar_t *condvar;
345
346 /**
347 * Hash table of installed policies (policy_entry_t)
348 */
349 hashtable_t *policies;
350
351 /**
352 * Hash table of IPsec SAs using policies (ipsec_sa_t)
353 */
354 hashtable_t *sas;
355
356 /**
357 * Netlink xfrm socket (IPsec)
358 */
359 netlink_socket_t *socket_xfrm;
360
361 /**
362 * Netlink xfrm socket to receive acquire and expire events
363 */
364 int socket_xfrm_events;
365
366 /**
367 * Whether to install routes along policies
368 */
369 bool install_routes;
370
371 /**
372 * Whether to set protocol and ports on selector installed with transport
373 * mode IPsec SAs
374 */
375 bool proto_port_transport;
376
377 /**
378 * Whether to always use UPDATE to install policies
379 */
380 bool policy_update;
381
382 /**
383 * Installed port based IKE bypass policies, as bypass_t
384 */
385 array_t *bypass;
386
387 /**
388 * Custom priority calculation function
389 */
390 uint32_t (*get_priority)(kernel_ipsec_policy_id_t *id,
391 kernel_ipsec_manage_policy_t *data);
392 };
393
394 typedef struct route_entry_t route_entry_t;
395
396 /**
397 * Installed routing entry
398 */
399 struct route_entry_t {
400 /** Name of the interface the route is bound to */
401 char *if_name;
402
403 /** Source ip of the route */
404 host_t *src_ip;
405
406 /** Gateway for this route */
407 host_t *gateway;
408
409 /** Destination net */
410 chunk_t dst_net;
411
412 /** Destination net prefixlen */
413 uint8_t prefixlen;
414 };
415
416 /**
417 * Destroy a route_entry_t object
418 */
419 static void route_entry_destroy(route_entry_t *this)
420 {
421 free(this->if_name);
422 this->src_ip->destroy(this->src_ip);
423 DESTROY_IF(this->gateway);
424 chunk_free(&this->dst_net);
425 free(this);
426 }
427
428 /**
429 * Compare two route_entry_t objects
430 */
431 static bool route_entry_equals(route_entry_t *a, route_entry_t *b)
432 {
433 return a->if_name && b->if_name && streq(a->if_name, b->if_name) &&
434 a->src_ip->ip_equals(a->src_ip, b->src_ip) &&
435 a->gateway->ip_equals(a->gateway, b->gateway) &&
436 chunk_equals(a->dst_net, b->dst_net) && a->prefixlen == b->prefixlen;
437 }
438
439 typedef struct ipsec_sa_t ipsec_sa_t;
440
441 /**
442 * IPsec SA assigned to a policy.
443 */
444 struct ipsec_sa_t {
445 /** Source address of this SA */
446 host_t *src;
447
448 /** Destination address of this SA */
449 host_t *dst;
450
451 /** Optional mark */
452 mark_t mark;
453
454 /** Description of this SA */
455 ipsec_sa_cfg_t cfg;
456
457 /** Reference count for this SA */
458 refcount_t refcount;
459 };
460
461 /**
462 * Hash function for ipsec_sa_t objects
463 */
464 static u_int ipsec_sa_hash(ipsec_sa_t *sa)
465 {
466 return chunk_hash_inc(sa->src->get_address(sa->src),
467 chunk_hash_inc(sa->dst->get_address(sa->dst),
468 chunk_hash_inc(chunk_from_thing(sa->mark),
469 chunk_hash(chunk_from_thing(sa->cfg)))));
470 }
471
472 /**
473 * Equality function for ipsec_sa_t objects
474 */
475 static bool ipsec_sa_equals(ipsec_sa_t *sa, ipsec_sa_t *other_sa)
476 {
477 return sa->src->ip_equals(sa->src, other_sa->src) &&
478 sa->dst->ip_equals(sa->dst, other_sa->dst) &&
479 sa->mark.value == other_sa->mark.value &&
480 sa->mark.mask == other_sa->mark.mask &&
481 ipsec_sa_cfg_equals(&sa->cfg, &other_sa->cfg);
482 }
483
484 /**
485 * Allocate or reference an IPsec SA object
486 */
487 static ipsec_sa_t *ipsec_sa_create(private_kernel_netlink_ipsec_t *this,
488 host_t *src, host_t *dst, mark_t mark,
489 ipsec_sa_cfg_t *cfg)
490 {
491 ipsec_sa_t *sa, *found;
492 INIT(sa,
493 .src = src,
494 .dst = dst,
495 .mark = mark,
496 .cfg = *cfg,
497 );
498 found = this->sas->get(this->sas, sa);
499 if (!found)
500 {
501 sa->src = src->clone(src);
502 sa->dst = dst->clone(dst);
503 this->sas->put(this->sas, sa, sa);
504 }
505 else
506 {
507 free(sa);
508 sa = found;
509 }
510 ref_get(&sa->refcount);
511 return sa;
512 }
513
514 /**
515 * Release and destroy an IPsec SA object
516 */
517 static void ipsec_sa_destroy(private_kernel_netlink_ipsec_t *this,
518 ipsec_sa_t *sa)
519 {
520 if (ref_put(&sa->refcount))
521 {
522 this->sas->remove(this->sas, sa);
523 DESTROY_IF(sa->src);
524 DESTROY_IF(sa->dst);
525 free(sa);
526 }
527 }
528
529 typedef struct policy_sa_t policy_sa_t;
530 typedef struct policy_sa_out_t policy_sa_out_t;
531
532 /**
533 * Mapping between a policy and an IPsec SA.
534 */
535 struct policy_sa_t {
536 /** Priority assigned to the policy when installed with this SA */
537 uint32_t priority;
538
539 /** Automatic priority assigned to the policy when installed with this SA */
540 uint32_t auto_priority;
541
542 /** Type of the policy */
543 policy_type_t type;
544
545 /** Assigned SA */
546 ipsec_sa_t *sa;
547 };
548
549 /**
550 * For outbound policies we also cache the traffic selectors in order to install
551 * the route.
552 */
553 struct policy_sa_out_t {
554 /** Generic interface */
555 policy_sa_t generic;
556
557 /** Source traffic selector of this policy */
558 traffic_selector_t *src_ts;
559
560 /** Destination traffic selector of this policy */
561 traffic_selector_t *dst_ts;
562 };
563
564 /**
565 * Create a policy_sa(_in)_t object
566 */
567 static policy_sa_t *policy_sa_create(private_kernel_netlink_ipsec_t *this,
568 policy_dir_t dir, policy_type_t type, host_t *src, host_t *dst,
569 traffic_selector_t *src_ts, traffic_selector_t *dst_ts, mark_t mark,
570 ipsec_sa_cfg_t *cfg)
571 {
572 policy_sa_t *policy;
573
574 if (dir == POLICY_OUT)
575 {
576 policy_sa_out_t *out;
577 INIT(out,
578 .src_ts = src_ts->clone(src_ts),
579 .dst_ts = dst_ts->clone(dst_ts),
580 );
581 policy = &out->generic;
582 }
583 else
584 {
585 INIT(policy, .priority = 0);
586 }
587 policy->type = type;
588 policy->sa = ipsec_sa_create(this, src, dst, mark, cfg);
589 return policy;
590 }
591
592 /**
593 * Destroy a policy_sa(_in)_t object
594 */
595 static void policy_sa_destroy(policy_sa_t *policy, policy_dir_t dir,
596 private_kernel_netlink_ipsec_t *this)
597 {
598 if (dir == POLICY_OUT)
599 {
600 policy_sa_out_t *out = (policy_sa_out_t*)policy;
601 out->src_ts->destroy(out->src_ts);
602 out->dst_ts->destroy(out->dst_ts);
603 }
604 ipsec_sa_destroy(this, policy->sa);
605 free(policy);
606 }
607
608 CALLBACK(policy_sa_destroy_cb, void,
609 policy_sa_t *policy, va_list args)
610 {
611 private_kernel_netlink_ipsec_t *this;
612 policy_dir_t dir;
613
614 VA_ARGS_VGET(args, dir, this);
615 policy_sa_destroy(policy, dir, this);
616 }
617
618 typedef struct policy_entry_t policy_entry_t;
619
620 /**
621 * Installed kernel policy.
622 */
623 struct policy_entry_t {
624
625 /** Direction of this policy: in, out, forward */
626 uint8_t direction;
627
628 /** Parameters of installed policy */
629 struct xfrm_selector sel;
630
631 /** Optional mark */
632 uint32_t mark;
633
634 /** Associated route installed for this policy */
635 route_entry_t *route;
636
637 /** List of SAs this policy is used by, ordered by priority */
638 linked_list_t *used_by;
639
640 /** reqid for this policy */
641 uint32_t reqid;
642
643 /** Number of threads waiting to work on this policy */
644 int waiting;
645
646 /** TRUE if a thread is working on this policy */
647 bool working;
648 };
649
650 /**
651 * Destroy a policy_entry_t object
652 */
653 static void policy_entry_destroy(private_kernel_netlink_ipsec_t *this,
654 policy_entry_t *policy)
655 {
656 if (policy->route)
657 {
658 route_entry_destroy(policy->route);
659 }
660 if (policy->used_by)
661 {
662 policy->used_by->invoke_function(policy->used_by, policy_sa_destroy_cb,
663 policy->direction, this);
664 policy->used_by->destroy(policy->used_by);
665 }
666 free(policy);
667 }
668
669 /**
670 * Hash function for policy_entry_t objects
671 */
672 static u_int policy_hash(policy_entry_t *key)
673 {
674 chunk_t chunk = chunk_from_thing(key->sel);
675 return chunk_hash_inc(chunk, chunk_hash(chunk_from_thing(key->mark)));
676 }
677
678 /**
679 * Equality function for policy_entry_t objects
680 */
681 static bool policy_equals(policy_entry_t *key, policy_entry_t *other_key)
682 {
683 return memeq(&key->sel, &other_key->sel, sizeof(struct xfrm_selector)) &&
684 key->mark == other_key->mark &&
685 key->direction == other_key->direction;
686 }
687
688 /**
689 * Determine number of set bits in 16 bit port mask
690 */
691 static inline uint32_t port_mask_bits(uint16_t port_mask)
692 {
693 uint32_t bits;
694 uint16_t bit_mask = 0x8000;
695
696 port_mask = ntohs(port_mask);
697
698 for (bits = 0; bits < 16; bits++)
699 {
700 if (!(port_mask & bit_mask))
701 {
702 break;
703 }
704 bit_mask >>= 1;
705 }
706 return bits;
707 }
708
709 /**
710 * Calculate the priority of a policy
711 *
712 * bits 0-0: separate trap and regular policies (0..1) 1 bit
713 * bits 1-1: restriction to network interface (0..1) 1 bit
714 * bits 2-7: src + dst port mask bits (2 * 0..16) 6 bits
715 * bits 8-8: restriction to protocol (0..1) 1 bit
716 * bits 9-17: src + dst network mask bits (2 * 0..128) 9 bits
717 * 18 bits
718 *
719 * smallest value: 000000000 0 000000 0 0: 0, lowest priority = 200'000
720 * largest value : 100000000 1 100000 1 1: 131'459, highst priority = 68'541
721 */
722 static uint32_t get_priority(policy_entry_t *policy, policy_priority_t prio,
723 char *interface)
724 {
725 uint32_t priority = PRIO_BASE, sport_mask_bits, dport_mask_bits;
726
727 switch (prio)
728 {
729 case POLICY_PRIORITY_FALLBACK:
730 priority += PRIO_BASE;
731 /* fall-through to next case */
732 case POLICY_PRIORITY_ROUTED:
733 case POLICY_PRIORITY_DEFAULT:
734 priority += PRIO_BASE;
735 /* fall-through to next case */
736 case POLICY_PRIORITY_PASS:
737 break;
738 }
739 sport_mask_bits = port_mask_bits(policy->sel.sport_mask);
740 dport_mask_bits = port_mask_bits(policy->sel.dport_mask);
741
742 /* calculate priority */
743 priority -= (policy->sel.prefixlen_s + policy->sel.prefixlen_d) * 512;
744 priority -= policy->sel.proto ? 256 : 0;
745 priority -= (sport_mask_bits + dport_mask_bits) * 4;
746 priority -= (interface != NULL) * 2;
747 priority -= (prio != POLICY_PRIORITY_ROUTED);
748
749 return priority;
750 }
751
752 /**
753 * Convert the general ipsec mode to the one defined in xfrm.h
754 */
755 static uint8_t mode2kernel(ipsec_mode_t mode)
756 {
757 switch (mode)
758 {
759 case MODE_TRANSPORT:
760 return XFRM_MODE_TRANSPORT;
761 case MODE_TUNNEL:
762 return XFRM_MODE_TUNNEL;
763 case MODE_BEET:
764 return XFRM_MODE_BEET;
765 default:
766 return mode;
767 }
768 }
769
770 /**
771 * Convert a host_t to a struct xfrm_address
772 */
773 static void host2xfrm(host_t *host, xfrm_address_t *xfrm)
774 {
775 chunk_t chunk = host->get_address(host);
776 memcpy(xfrm, chunk.ptr, min(chunk.len, sizeof(xfrm_address_t)));
777 }
778
779 /**
780 * Convert a struct xfrm_address to a host_t
781 */
782 static host_t* xfrm2host(int family, xfrm_address_t *xfrm, uint16_t port)
783 {
784 chunk_t chunk;
785
786 switch (family)
787 {
788 case AF_INET:
789 chunk = chunk_create((u_char*)&xfrm->a4, sizeof(xfrm->a4));
790 break;
791 case AF_INET6:
792 chunk = chunk_create((u_char*)&xfrm->a6, sizeof(xfrm->a6));
793 break;
794 default:
795 return NULL;
796 }
797 return host_create_from_chunk(family, chunk, ntohs(port));
798 }
799
800 /**
801 * Convert a traffic selector address range to subnet and its mask.
802 */
803 static void ts2subnet(traffic_selector_t* ts,
804 xfrm_address_t *net, uint8_t *mask)
805 {
806 host_t *net_host;
807 chunk_t net_chunk;
808
809 ts->to_subnet(ts, &net_host, mask);
810 net_chunk = net_host->get_address(net_host);
811 memcpy(net, net_chunk.ptr, net_chunk.len);
812 net_host->destroy(net_host);
813 }
814
815 /**
816 * Convert a traffic selector port range to port/portmask
817 */
818 static void ts2ports(traffic_selector_t* ts,
819 uint16_t *port, uint16_t *mask)
820 {
821 uint16_t from, to, bitmask;
822 int bit;
823
824 from = ts->get_from_port(ts);
825 to = ts->get_to_port(ts);
826
827 /* Quick check for a single port */
828 if (from == to)
829 {
830 *port = htons(from);
831 *mask = ~0;
832 }
833 else
834 {
835 /* Compute the port mask for port ranges */
836 *mask = 0;
837
838 for (bit = 15; bit >= 0; bit--)
839 {
840 bitmask = 1 << bit;
841
842 if ((bitmask & from) != (bitmask & to))
843 {
844 *port = htons(from & *mask);
845 *mask = htons(*mask);
846 return;
847 }
848 *mask |= bitmask;
849 }
850 }
851 return;
852 }
853
854 /**
855 * Convert a pair of traffic_selectors to an xfrm_selector
856 */
857 static struct xfrm_selector ts2selector(traffic_selector_t *src,
858 traffic_selector_t *dst,
859 char *interface)
860 {
861 struct xfrm_selector sel;
862 uint16_t port;
863
864 memset(&sel, 0, sizeof(sel));
865 sel.family = (src->get_type(src) == TS_IPV4_ADDR_RANGE) ? AF_INET : AF_INET6;
866 /* src or dest proto may be "any" (0), use more restrictive one */
867 sel.proto = max(src->get_protocol(src), dst->get_protocol(dst));
868 ts2subnet(dst, &sel.daddr, &sel.prefixlen_d);
869 ts2subnet(src, &sel.saddr, &sel.prefixlen_s);
870 ts2ports(dst, &sel.dport, &sel.dport_mask);
871 ts2ports(src, &sel.sport, &sel.sport_mask);
872 if ((sel.proto == IPPROTO_ICMP || sel.proto == IPPROTO_ICMPV6) &&
873 (sel.dport || sel.sport))
874 {
875 /* the kernel expects the ICMP type and code in the source and
876 * destination port fields, respectively. */
877 port = ntohs(max(sel.dport, sel.sport));
878 sel.sport = htons(traffic_selector_icmp_type(port));
879 sel.sport_mask = sel.sport ? ~0 : 0;
880 sel.dport = htons(traffic_selector_icmp_code(port));
881 sel.dport_mask = sel.dport ? ~0 : 0;
882 }
883 sel.ifindex = interface ? if_nametoindex(interface) : 0;
884 sel.user = 0;
885
886 return sel;
887 }
888
889 /**
890 * Convert an xfrm_selector to a src|dst traffic_selector
891 */
892 static traffic_selector_t* selector2ts(struct xfrm_selector *sel, bool src)
893 {
894 u_char *addr;
895 uint8_t prefixlen;
896 uint16_t port = 0;
897 host_t *host = NULL;
898
899 if (src)
900 {
901 addr = (u_char*)&sel->saddr;
902 prefixlen = sel->prefixlen_s;
903 if (sel->sport_mask)
904 {
905 port = ntohs(sel->sport);
906 }
907 }
908 else
909 {
910 addr = (u_char*)&sel->daddr;
911 prefixlen = sel->prefixlen_d;
912 if (sel->dport_mask)
913 {
914 port = ntohs(sel->dport);
915 }
916 }
917 if (sel->proto == IPPROTO_ICMP || sel->proto == IPPROTO_ICMPV6)
918 { /* convert ICMP[v6] message type and code as supplied by the kernel in
919 * source and destination ports (both in network order) */
920 port = (sel->sport >> 8) | (sel->dport & 0xff00);
921 port = ntohs(port);
922 }
923 /* The Linux 2.6 kernel does not set the selector's family field,
924 * so as a kludge we additionally test the prefix length.
925 */
926 if (sel->family == AF_INET || sel->prefixlen_s == 32)
927 {
928 host = host_create_from_chunk(AF_INET, chunk_create(addr, 4), 0);
929 }
930 else if (sel->family == AF_INET6 || sel->prefixlen_s == 128)
931 {
932 host = host_create_from_chunk(AF_INET6, chunk_create(addr, 16), 0);
933 }
934
935 if (host)
936 {
937 return traffic_selector_create_from_subnet(host, prefixlen,
938 sel->proto, port, port ?: 65535);
939 }
940 return NULL;
941 }
942
943 /**
944 * Process a XFRM_MSG_ACQUIRE from kernel
945 */
946 static void process_acquire(private_kernel_netlink_ipsec_t *this,
947 struct nlmsghdr *hdr)
948 {
949 struct xfrm_user_acquire *acquire;
950 struct rtattr *rta;
951 size_t rtasize;
952 traffic_selector_t *src_ts, *dst_ts;
953 uint32_t reqid = 0;
954 int proto = 0;
955
956 acquire = NLMSG_DATA(hdr);
957 rta = XFRM_RTA(hdr, struct xfrm_user_acquire);
958 rtasize = XFRM_PAYLOAD(hdr, struct xfrm_user_acquire);
959
960 DBG2(DBG_KNL, "received a XFRM_MSG_ACQUIRE");
961
962 while (RTA_OK(rta, rtasize))
963 {
964 DBG2(DBG_KNL, " %N", xfrm_attr_type_names, rta->rta_type);
965
966 if (rta->rta_type == XFRMA_TMPL)
967 {
968 struct xfrm_user_tmpl* tmpl;
969 tmpl = (struct xfrm_user_tmpl*)RTA_DATA(rta);
970 reqid = tmpl->reqid;
971 proto = tmpl->id.proto;
972 }
973 rta = RTA_NEXT(rta, rtasize);
974 }
975 switch (proto)
976 {
977 case 0:
978 case IPPROTO_ESP:
979 case IPPROTO_AH:
980 break;
981 default:
982 /* acquire for AH/ESP only, not for IPCOMP */
983 return;
984 }
985 src_ts = selector2ts(&acquire->sel, TRUE);
986 dst_ts = selector2ts(&acquire->sel, FALSE);
987
988 charon->kernel->acquire(charon->kernel, reqid, src_ts, dst_ts);
989 }
990
991 /**
992 * Process a XFRM_MSG_EXPIRE from kernel
993 */
994 static void process_expire(private_kernel_netlink_ipsec_t *this,
995 struct nlmsghdr *hdr)
996 {
997 struct xfrm_user_expire *expire;
998 uint32_t spi;
999 uint8_t protocol;
1000 host_t *dst;
1001
1002 expire = NLMSG_DATA(hdr);
1003 protocol = expire->state.id.proto;
1004 spi = expire->state.id.spi;
1005
1006 DBG2(DBG_KNL, "received a XFRM_MSG_EXPIRE");
1007
1008 if (protocol == IPPROTO_ESP || protocol == IPPROTO_AH)
1009 {
1010 dst = xfrm2host(expire->state.family, &expire->state.id.daddr, 0);
1011 if (dst)
1012 {
1013 charon->kernel->expire(charon->kernel, protocol, spi, dst,
1014 expire->hard != 0);
1015 dst->destroy(dst);
1016 }
1017 }
1018 }
1019
1020 /**
1021 * Process a XFRM_MSG_MIGRATE from kernel
1022 */
1023 static void process_migrate(private_kernel_netlink_ipsec_t *this,
1024 struct nlmsghdr *hdr)
1025 {
1026 struct xfrm_userpolicy_id *policy_id;
1027 struct rtattr *rta;
1028 size_t rtasize;
1029 traffic_selector_t *src_ts, *dst_ts;
1030 host_t *local = NULL, *remote = NULL;
1031 host_t *old_src = NULL, *old_dst = NULL;
1032 host_t *new_src = NULL, *new_dst = NULL;
1033 uint32_t reqid = 0;
1034 policy_dir_t dir;
1035
1036 policy_id = NLMSG_DATA(hdr);
1037 rta = XFRM_RTA(hdr, struct xfrm_userpolicy_id);
1038 rtasize = XFRM_PAYLOAD(hdr, struct xfrm_userpolicy_id);
1039
1040 DBG2(DBG_KNL, "received a XFRM_MSG_MIGRATE");
1041
1042 src_ts = selector2ts(&policy_id->sel, TRUE);
1043 dst_ts = selector2ts(&policy_id->sel, FALSE);
1044 dir = (policy_dir_t)policy_id->dir;
1045
1046 DBG2(DBG_KNL, " policy: %R === %R %N", src_ts, dst_ts, policy_dir_names);
1047
1048 while (RTA_OK(rta, rtasize))
1049 {
1050 DBG2(DBG_KNL, " %N", xfrm_attr_type_names, rta->rta_type);
1051 if (rta->rta_type == XFRMA_KMADDRESS)
1052 {
1053 struct xfrm_user_kmaddress *kmaddress;
1054
1055 kmaddress = (struct xfrm_user_kmaddress*)RTA_DATA(rta);
1056 local = xfrm2host(kmaddress->family, &kmaddress->local, 0);
1057 remote = xfrm2host(kmaddress->family, &kmaddress->remote, 0);
1058 DBG2(DBG_KNL, " kmaddress: %H...%H", local, remote);
1059 }
1060 else if (rta->rta_type == XFRMA_MIGRATE)
1061 {
1062 struct xfrm_user_migrate *migrate;
1063
1064 migrate = (struct xfrm_user_migrate*)RTA_DATA(rta);
1065 old_src = xfrm2host(migrate->old_family, &migrate->old_saddr, 0);
1066 old_dst = xfrm2host(migrate->old_family, &migrate->old_daddr, 0);
1067 new_src = xfrm2host(migrate->new_family, &migrate->new_saddr, 0);
1068 new_dst = xfrm2host(migrate->new_family, &migrate->new_daddr, 0);
1069 reqid = migrate->reqid;
1070 DBG2(DBG_KNL, " migrate %H...%H to %H...%H, reqid {%u}",
1071 old_src, old_dst, new_src, new_dst, reqid);
1072 DESTROY_IF(old_src);
1073 DESTROY_IF(old_dst);
1074 DESTROY_IF(new_src);
1075 DESTROY_IF(new_dst);
1076 }
1077 rta = RTA_NEXT(rta, rtasize);
1078 }
1079
1080 if (src_ts && dst_ts && local && remote)
1081 {
1082 charon->kernel->migrate(charon->kernel, reqid, src_ts, dst_ts, dir,
1083 local, remote);
1084 }
1085 else
1086 {
1087 DESTROY_IF(src_ts);
1088 DESTROY_IF(dst_ts);
1089 DESTROY_IF(local);
1090 DESTROY_IF(remote);
1091 }
1092 }
1093
1094 /**
1095 * Process a XFRM_MSG_MAPPING from kernel
1096 */
1097 static void process_mapping(private_kernel_netlink_ipsec_t *this,
1098 struct nlmsghdr *hdr)
1099 {
1100 struct xfrm_user_mapping *mapping;
1101 uint32_t spi;
1102
1103 mapping = NLMSG_DATA(hdr);
1104 spi = mapping->id.spi;
1105
1106 DBG2(DBG_KNL, "received a XFRM_MSG_MAPPING");
1107
1108 if (mapping->id.proto == IPPROTO_ESP)
1109 {
1110 host_t *dst, *new;
1111
1112 dst = xfrm2host(mapping->id.family, &mapping->id.daddr, 0);
1113 if (dst)
1114 {
1115 new = xfrm2host(mapping->id.family, &mapping->new_saddr,
1116 mapping->new_sport);
1117 if (new)
1118 {
1119 charon->kernel->mapping(charon->kernel, IPPROTO_ESP, spi, dst,
1120 new);
1121 new->destroy(new);
1122 }
1123 dst->destroy(dst);
1124 }
1125 }
1126 }
1127
1128 /**
1129 * Receives events from kernel
1130 */
1131 static bool receive_events(private_kernel_netlink_ipsec_t *this, int fd,
1132 watcher_event_t event)
1133 {
1134 char response[1024];
1135 struct nlmsghdr *hdr = (struct nlmsghdr*)response;
1136 struct sockaddr_nl addr;
1137 socklen_t addr_len = sizeof(addr);
1138 int len;
1139
1140 len = recvfrom(this->socket_xfrm_events, response, sizeof(response),
1141 MSG_DONTWAIT, (struct sockaddr*)&addr, &addr_len);
1142 if (len < 0)
1143 {
1144 switch (errno)
1145 {
1146 case EINTR:
1147 /* interrupted, try again */
1148 return TRUE;
1149 case EAGAIN:
1150 /* no data ready, select again */
1151 return TRUE;
1152 default:
1153 DBG1(DBG_KNL, "unable to receive from XFRM event socket: %s "
1154 "(%d)", strerror(errno), errno);
1155 sleep(1);
1156 return TRUE;
1157 }
1158 }
1159
1160 if (addr.nl_pid != 0)
1161 { /* not from kernel. not interested, try another one */
1162 return TRUE;
1163 }
1164
1165 while (NLMSG_OK(hdr, len))
1166 {
1167 switch (hdr->nlmsg_type)
1168 {
1169 case XFRM_MSG_ACQUIRE:
1170 process_acquire(this, hdr);
1171 break;
1172 case XFRM_MSG_EXPIRE:
1173 process_expire(this, hdr);
1174 break;
1175 case XFRM_MSG_MIGRATE:
1176 process_migrate(this, hdr);
1177 break;
1178 case XFRM_MSG_MAPPING:
1179 process_mapping(this, hdr);
1180 break;
1181 default:
1182 DBG1(DBG_KNL, "received unknown event from XFRM event "
1183 "socket: %d", hdr->nlmsg_type);
1184 break;
1185 }
1186 hdr = NLMSG_NEXT(hdr, len);
1187 }
1188 return TRUE;
1189 }
1190
1191 METHOD(kernel_ipsec_t, get_features, kernel_feature_t,
1192 private_kernel_netlink_ipsec_t *this)
1193 {
1194 return KERNEL_ESP_V3_TFC | KERNEL_POLICY_SPI;
1195 }
1196
1197 /**
1198 * Get an SPI for a specific protocol from the kernel.
1199 */
1200 static status_t get_spi_internal(private_kernel_netlink_ipsec_t *this,
1201 host_t *src, host_t *dst, uint8_t proto, uint32_t min, uint32_t max,
1202 uint32_t *spi)
1203 {
1204 netlink_buf_t request;
1205 struct nlmsghdr *hdr, *out;
1206 struct xfrm_userspi_info *userspi;
1207 uint32_t received_spi = 0;
1208 size_t len;
1209
1210 memset(&request, 0, sizeof(request));
1211
1212 hdr = &request.hdr;
1213 hdr->nlmsg_flags = NLM_F_REQUEST;
1214 hdr->nlmsg_type = XFRM_MSG_ALLOCSPI;
1215 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userspi_info));
1216
1217 userspi = NLMSG_DATA(hdr);
1218 host2xfrm(src, &userspi->info.saddr);
1219 host2xfrm(dst, &userspi->info.id.daddr);
1220 userspi->info.id.proto = proto;
1221 userspi->info.mode = XFRM_MODE_TUNNEL;
1222 userspi->info.family = src->get_family(src);
1223 userspi->min = min;
1224 userspi->max = max;
1225
1226 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
1227 {
1228 hdr = out;
1229 while (NLMSG_OK(hdr, len))
1230 {
1231 switch (hdr->nlmsg_type)
1232 {
1233 case XFRM_MSG_NEWSA:
1234 {
1235 struct xfrm_usersa_info* usersa = NLMSG_DATA(hdr);
1236 received_spi = usersa->id.spi;
1237 break;
1238 }
1239 case NLMSG_ERROR:
1240 {
1241 struct nlmsgerr *err = NLMSG_DATA(hdr);
1242 DBG1(DBG_KNL, "allocating SPI failed: %s (%d)",
1243 strerror(-err->error), -err->error);
1244 break;
1245 }
1246 default:
1247 hdr = NLMSG_NEXT(hdr, len);
1248 continue;
1249 case NLMSG_DONE:
1250 break;
1251 }
1252 break;
1253 }
1254 free(out);
1255 }
1256
1257 if (received_spi == 0)
1258 {
1259 return FAILED;
1260 }
1261
1262 *spi = received_spi;
1263 return SUCCESS;
1264 }
1265
1266 METHOD(kernel_ipsec_t, get_spi, status_t,
1267 private_kernel_netlink_ipsec_t *this, host_t *src, host_t *dst,
1268 uint8_t protocol, uint32_t *spi)
1269 {
1270 uint32_t spi_min, spi_max;
1271
1272 spi_min = lib->settings->get_int(lib->settings, "%s.spi_min",
1273 KERNEL_SPI_MIN, lib->ns);
1274 spi_max = lib->settings->get_int(lib->settings, "%s.spi_max",
1275 KERNEL_SPI_MAX, lib->ns);
1276
1277 if (get_spi_internal(this, src, dst, protocol, min(spi_min, spi_max),
1278 max(spi_min, spi_max), spi) != SUCCESS)
1279 {
1280 DBG1(DBG_KNL, "unable to get SPI");
1281 return FAILED;
1282 }
1283
1284 DBG2(DBG_KNL, "got SPI %.8x", ntohl(*spi));
1285 return SUCCESS;
1286 }
1287
1288 METHOD(kernel_ipsec_t, get_cpi, status_t,
1289 private_kernel_netlink_ipsec_t *this, host_t *src, host_t *dst,
1290 uint16_t *cpi)
1291 {
1292 uint32_t received_spi = 0;
1293
1294 if (get_spi_internal(this, src, dst, IPPROTO_COMP,
1295 0x100, 0xEFFF, &received_spi) != SUCCESS)
1296 {
1297 DBG1(DBG_KNL, "unable to get CPI");
1298 return FAILED;
1299 }
1300
1301 *cpi = htons((uint16_t)ntohl(received_spi));
1302
1303 DBG2(DBG_KNL, "got CPI %.4x", ntohs(*cpi));
1304 return SUCCESS;
1305 }
1306
1307 /**
1308 * Format the mark for debug messages
1309 */
1310 static void format_mark(char *buf, int buflen, mark_t mark)
1311 {
1312 if (mark.value | mark.mask)
1313 {
1314 snprintf(buf, buflen, " (mark %u/0x%08x)", mark.value, mark.mask);
1315 }
1316 }
1317
1318 /**
1319 * Add a XFRM mark to message if required
1320 */
1321 static bool add_mark(struct nlmsghdr *hdr, int buflen, mark_t mark)
1322 {
1323 if (mark.value | mark.mask)
1324 {
1325 struct xfrm_mark *xmrk;
1326
1327 xmrk = netlink_reserve(hdr, buflen, XFRMA_MARK, sizeof(*xmrk));
1328 if (!xmrk)
1329 {
1330 return FALSE;
1331 }
1332 xmrk->v = mark.value;
1333 xmrk->m = mark.mask;
1334 }
1335 return TRUE;
1336 }
1337
1338 /**
1339 * Check if kernel supports HW offload
1340 */
1341 static void netlink_find_offload_feature(const char *ifname, int query_socket)
1342 {
1343 struct ethtool_sset_info *sset_info;
1344 struct ethtool_gstrings *cmd = NULL;
1345 struct ifreq ifr;
1346 uint32_t sset_len, i;
1347 char *str;
1348 int err;
1349
1350 netlink_hw_offload.state = NL_OFFLOAD_UNSUPPORTED;
1351
1352 /* determine number of device features */
1353 INIT_EXTRA(sset_info, sizeof(uint32_t),
1354 .cmd = ETHTOOL_GSSET_INFO,
1355 .sset_mask = 1ULL << ETH_SS_FEATURES,
1356 );
1357 strncpy(ifr.ifr_name, ifname, IFNAMSIZ);
1358 ifr.ifr_name[IFNAMSIZ-1] = '\0';
1359 ifr.ifr_data = (void*)sset_info;
1360
1361 err = ioctl(query_socket, SIOCETHTOOL, &ifr);
1362 if (err || sset_info->sset_mask != 1ULL << ETH_SS_FEATURES)
1363 {
1364 goto out;
1365 }
1366 sset_len = sset_info->data[0];
1367
1368 /* retrieve names of device features */
1369 INIT_EXTRA(cmd, ETH_GSTRING_LEN * sset_len,
1370 .cmd = ETHTOOL_GSTRINGS,
1371 .string_set = ETH_SS_FEATURES,
1372 );
1373 strncpy(ifr.ifr_name, ifname, IFNAMSIZ);
1374 ifr.ifr_name[IFNAMSIZ-1] = '\0';
1375 ifr.ifr_data = (void*)cmd;
1376
1377 err = ioctl(query_socket, SIOCETHTOOL, &ifr);
1378 if (err)
1379 {
1380 goto out;
1381 }
1382
1383 /* look for the ESP_HW feature bit */
1384 str = (char*)cmd->data;
1385 for (i = 0; i < cmd->len; i++)
1386 {
1387 if (strneq(str, "esp-hw-offload", ETH_GSTRING_LEN))
1388 {
1389 netlink_hw_offload.bit = i;
1390 netlink_hw_offload.total_blocks = (sset_len + 31) / 32;
1391 netlink_hw_offload.state = NL_OFFLOAD_SUPPORTED;
1392 break;
1393 }
1394 str += ETH_GSTRING_LEN;
1395 }
1396
1397 out:
1398 free(sset_info);
1399 free(cmd);
1400 }
1401
1402 /**
1403 * Check if interface supported HW offload
1404 */
1405 static bool netlink_detect_offload(const char *ifname)
1406 {
1407 struct ethtool_gfeatures *cmd;
1408 uint32_t feature_bit;
1409 struct ifreq ifr;
1410 int query_socket;
1411 int block;
1412 bool ret = FALSE;
1413
1414 query_socket = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_XFRM);
1415 if (query_socket < 0)
1416 {
1417 return FALSE;
1418 }
1419
1420 /* kernel requires a real interface in order to query the kernel-wide
1421 * capability, so we do it here on first invocation.
1422 */
1423 if (netlink_hw_offload.state == NL_OFFLOAD_UNKNOWN)
1424 {
1425 netlink_find_offload_feature(ifname, query_socket);
1426 }
1427 if (netlink_hw_offload.state == NL_OFFLOAD_UNSUPPORTED)
1428 {
1429 DBG1(DBG_KNL, "HW offload is not supported by kernel");
1430 goto out;
1431 }
1432
1433 /* feature is supported by kernel, query device features */
1434 INIT_EXTRA(cmd, sizeof(cmd->features[0]) * netlink_hw_offload.total_blocks,
1435 .cmd = ETHTOOL_GFEATURES,
1436 .size = netlink_hw_offload.total_blocks,
1437 );
1438 strncpy(ifr.ifr_name, ifname, IFNAMSIZ);
1439 ifr.ifr_name[IFNAMSIZ-1] = '\0';
1440 ifr.ifr_data = (void*)cmd;
1441
1442 if (ioctl(query_socket, SIOCETHTOOL, &ifr))
1443 {
1444 goto out_free;
1445 }
1446
1447 block = netlink_hw_offload.bit / 32;
1448 feature_bit = 1U << (netlink_hw_offload.bit % 32);
1449 if (cmd->features[block].active & feature_bit)
1450 {
1451 ret = TRUE;
1452 }
1453
1454 out_free:
1455 free(cmd);
1456 if (!ret)
1457 {
1458 DBG1(DBG_KNL, "HW offload is not supported by device");
1459 }
1460 out:
1461 close(query_socket);
1462 return ret;
1463 }
1464
1465 /**
1466 * There are 3 HW offload configuration values:
1467 * 1. HW_OFFLOAD_NO : Do not configure HW offload.
1468 * 2. HW_OFFLOAD_YES : Configure HW offload.
1469 * Fail SA addition if offload is not supported.
1470 * 3. HW_OFFLOAD_AUTO : Configure HW offload if supported by the kernel
1471 * and device.
1472 * Do not fail SA addition otherwise.
1473 */
1474 static bool config_hw_offload(kernel_ipsec_sa_id_t *id,
1475 kernel_ipsec_add_sa_t *data, struct nlmsghdr *hdr,
1476 int buflen)
1477 {
1478 host_t *local = data->inbound ? id->dst : id->src;
1479 struct xfrm_user_offload *offload;
1480 bool hw_offload_yes, ret = FALSE;
1481 char *ifname;
1482
1483 /* do Ipsec configuration without offload */
1484 if (data->hw_offload == HW_OFFLOAD_NO)
1485 {
1486 return TRUE;
1487 }
1488
1489 hw_offload_yes = (data->hw_offload == HW_OFFLOAD_YES);
1490
1491 if (!charon->kernel->get_interface(charon->kernel, local, &ifname))
1492 {
1493 return !hw_offload_yes;
1494 }
1495
1496 /* check if interface supports hw_offload */
1497 if (!netlink_detect_offload(ifname))
1498 {
1499 ret = !hw_offload_yes;
1500 goto out;
1501 }
1502
1503 /* activate HW offload */
1504 offload = netlink_reserve(hdr, buflen,
1505 XFRMA_OFFLOAD_DEV, sizeof(*offload));
1506 if (!offload)
1507 {
1508 ret = !hw_offload_yes;
1509 goto out;
1510 }
1511 offload->ifindex = if_nametoindex(ifname);
1512 if (local->get_family(local) == AF_INET6)
1513 {
1514 offload->flags |= XFRM_OFFLOAD_IPV6;
1515 }
1516 offload->flags |= data->inbound ? XFRM_OFFLOAD_INBOUND : 0;
1517
1518 ret = TRUE;
1519
1520 out:
1521 free(ifname);
1522 return ret;
1523 }
1524
1525 METHOD(kernel_ipsec_t, add_sa, status_t,
1526 private_kernel_netlink_ipsec_t *this, kernel_ipsec_sa_id_t *id,
1527 kernel_ipsec_add_sa_t *data)
1528 {
1529 netlink_buf_t request;
1530 const char *alg_name;
1531 char markstr[32] = "";
1532 struct nlmsghdr *hdr;
1533 struct xfrm_usersa_info *sa;
1534 uint16_t icv_size = 64, ipcomp = data->ipcomp;
1535 ipsec_mode_t mode = data->mode, original_mode = data->mode;
1536 traffic_selector_t *first_src_ts, *first_dst_ts;
1537 status_t status = FAILED;
1538
1539 /* if IPComp is used, we install an additional IPComp SA. if the cpi is 0
1540 * we are in the recursive call below */
1541 if (ipcomp != IPCOMP_NONE && data->cpi != 0)
1542 {
1543 lifetime_cfg_t lft = {{0,0,0},{0,0,0},{0,0,0}};
1544 kernel_ipsec_sa_id_t ipcomp_id = {
1545 .src = id->src,
1546 .dst = id->dst,
1547 .spi = htonl(ntohs(data->cpi)),
1548 .proto = IPPROTO_COMP,
1549 .mark = id->mark,
1550 };
1551 kernel_ipsec_add_sa_t ipcomp_sa = {
1552 .reqid = data->reqid,
1553 .mode = data->mode,
1554 .src_ts = data->src_ts,
1555 .dst_ts = data->dst_ts,
1556 .lifetime = &lft,
1557 .enc_alg = ENCR_UNDEFINED,
1558 .int_alg = AUTH_UNDEFINED,
1559 .tfc = data->tfc,
1560 .ipcomp = data->ipcomp,
1561 .initiator = data->initiator,
1562 .inbound = data->inbound,
1563 .update = data->update,
1564 };
1565 add_sa(this, &ipcomp_id, &ipcomp_sa);
1566 ipcomp = IPCOMP_NONE;
1567 /* use transport mode ESP SA, IPComp uses tunnel mode */
1568 mode = MODE_TRANSPORT;
1569 }
1570
1571 memset(&request, 0, sizeof(request));
1572 format_mark(markstr, sizeof(markstr), id->mark);
1573
1574 DBG2(DBG_KNL, "adding SAD entry with SPI %.8x and reqid {%u}%s",
1575 ntohl(id->spi), data->reqid, markstr);
1576
1577 hdr = &request.hdr;
1578 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1579 hdr->nlmsg_type = data->update ? XFRM_MSG_UPDSA : XFRM_MSG_NEWSA;
1580 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
1581
1582 sa = NLMSG_DATA(hdr);
1583 host2xfrm(id->src, &sa->saddr);
1584 host2xfrm(id->dst, &sa->id.daddr);
1585 sa->id.spi = id->spi;
1586 sa->id.proto = id->proto;
1587 sa->family = id->src->get_family(id->src);
1588 sa->mode = mode2kernel(mode);
1589
1590 if (!data->copy_df)
1591 {
1592 sa->flags |= XFRM_STATE_NOPMTUDISC;
1593 }
1594
1595 if (!data->copy_ecn)
1596 {
1597 sa->flags |= XFRM_STATE_NOECN;
1598 }
1599
1600 if (data->inbound)
1601 {
1602 switch (data->copy_dscp)
1603 {
1604 case DSCP_COPY_YES:
1605 case DSCP_COPY_IN_ONLY:
1606 sa->flags |= XFRM_STATE_DECAP_DSCP;
1607 break;
1608 default:
1609 break;
1610 }
1611 }
1612 else
1613 {
1614 switch (data->copy_dscp)
1615 {
1616 case DSCP_COPY_IN_ONLY:
1617 case DSCP_COPY_NO:
1618 {
1619 uint32_t *xflags;
1620
1621 xflags = netlink_reserve(hdr, sizeof(request),
1622 XFRMA_SA_EXTRA_FLAGS, sizeof(*xflags));
1623 if (!xflags)
1624 {
1625 goto failed;
1626 }
1627 /* currently the only extra flag */
1628 *xflags |= XFRM_SA_XFLAG_DONT_ENCAP_DSCP;
1629 break;
1630 }
1631 default:
1632 break;
1633 }
1634 }
1635
1636 switch (mode)
1637 {
1638 case MODE_TUNNEL:
1639 sa->flags |= XFRM_STATE_AF_UNSPEC;
1640 break;
1641 case MODE_BEET:
1642 case MODE_TRANSPORT:
1643 if (original_mode == MODE_TUNNEL)
1644 { /* don't install selectors for switched SAs. because only one
1645 * selector can be installed other traffic would get dropped */
1646 break;
1647 }
1648 if (data->src_ts->get_first(data->src_ts,
1649 (void**)&first_src_ts) == SUCCESS &&
1650 data->dst_ts->get_first(data->dst_ts,
1651 (void**)&first_dst_ts) == SUCCESS)
1652 {
1653 sa->sel = ts2selector(first_src_ts, first_dst_ts,
1654 data->interface);
1655 if (!this->proto_port_transport)
1656 {
1657 /* don't install proto/port on SA. This would break
1658 * potential secondary SAs for the same address using a
1659 * different prot/port. */
1660 sa->sel.proto = 0;
1661 sa->sel.dport = sa->sel.dport_mask = 0;
1662 sa->sel.sport = sa->sel.sport_mask = 0;
1663 }
1664 }
1665 break;
1666 default:
1667 break;
1668 }
1669 if (id->proto == IPPROTO_AH && sa->family == AF_INET)
1670 { /* use alignment to 4 bytes for IPv4 instead of the incorrect 8 byte
1671 * alignment that's used by default but is only valid for IPv6 */
1672 sa->flags |= XFRM_STATE_ALIGN4;
1673 }
1674
1675 sa->reqid = data->reqid;
1676 sa->lft.soft_byte_limit = XFRM_LIMIT(data->lifetime->bytes.rekey);
1677 sa->lft.hard_byte_limit = XFRM_LIMIT(data->lifetime->bytes.life);
1678 sa->lft.soft_packet_limit = XFRM_LIMIT(data->lifetime->packets.rekey);
1679 sa->lft.hard_packet_limit = XFRM_LIMIT(data->lifetime->packets.life);
1680 /* we use lifetimes since added, not since used */
1681 sa->lft.soft_add_expires_seconds = data->lifetime->time.rekey;
1682 sa->lft.hard_add_expires_seconds = data->lifetime->time.life;
1683 sa->lft.soft_use_expires_seconds = 0;
1684 sa->lft.hard_use_expires_seconds = 0;
1685
1686 switch (data->enc_alg)
1687 {
1688 case ENCR_UNDEFINED:
1689 /* no encryption */
1690 break;
1691 case ENCR_AES_CCM_ICV16:
1692 case ENCR_AES_GCM_ICV16:
1693 case ENCR_NULL_AUTH_AES_GMAC:
1694 case ENCR_CAMELLIA_CCM_ICV16:
1695 case ENCR_CHACHA20_POLY1305:
1696 icv_size += 32;
1697 /* FALL */
1698 case ENCR_AES_CCM_ICV12:
1699 case ENCR_AES_GCM_ICV12:
1700 case ENCR_CAMELLIA_CCM_ICV12:
1701 icv_size += 32;
1702 /* FALL */
1703 case ENCR_AES_CCM_ICV8:
1704 case ENCR_AES_GCM_ICV8:
1705 case ENCR_CAMELLIA_CCM_ICV8:
1706 {
1707 struct xfrm_algo_aead *algo;
1708
1709 alg_name = lookup_algorithm(ENCRYPTION_ALGORITHM, data->enc_alg);
1710 if (alg_name == NULL)
1711 {
1712 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1713 encryption_algorithm_names, data->enc_alg);
1714 goto failed;
1715 }
1716 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
1717 encryption_algorithm_names, data->enc_alg,
1718 data->enc_key.len * 8);
1719
1720 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_AEAD,
1721 sizeof(*algo) + data->enc_key.len);
1722 if (!algo)
1723 {
1724 goto failed;
1725 }
1726 algo->alg_key_len = data->enc_key.len * 8;
1727 algo->alg_icv_len = icv_size;
1728 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name));
1729 algo->alg_name[sizeof(algo->alg_name) - 1] = '\0';
1730 memcpy(algo->alg_key, data->enc_key.ptr, data->enc_key.len);
1731 break;
1732 }
1733 default:
1734 {
1735 struct xfrm_algo *algo;
1736
1737 alg_name = lookup_algorithm(ENCRYPTION_ALGORITHM, data->enc_alg);
1738 if (alg_name == NULL)
1739 {
1740 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1741 encryption_algorithm_names, data->enc_alg);
1742 goto failed;
1743 }
1744 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
1745 encryption_algorithm_names, data->enc_alg,
1746 data->enc_key.len * 8);
1747
1748 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_CRYPT,
1749 sizeof(*algo) + data->enc_key.len);
1750 if (!algo)
1751 {
1752 goto failed;
1753 }
1754 algo->alg_key_len = data->enc_key.len * 8;
1755 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name));
1756 algo->alg_name[sizeof(algo->alg_name) - 1] = '\0';
1757 memcpy(algo->alg_key, data->enc_key.ptr, data->enc_key.len);
1758 }
1759 }
1760
1761 if (data->int_alg != AUTH_UNDEFINED)
1762 {
1763 u_int trunc_len = 0;
1764
1765 alg_name = lookup_algorithm(INTEGRITY_ALGORITHM, data->int_alg);
1766 if (alg_name == NULL)
1767 {
1768 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1769 integrity_algorithm_names, data->int_alg);
1770 goto failed;
1771 }
1772 DBG2(DBG_KNL, " using integrity algorithm %N with key size %d",
1773 integrity_algorithm_names, data->int_alg, data->int_key.len * 8);
1774
1775 switch (data->int_alg)
1776 {
1777 case AUTH_HMAC_MD5_128:
1778 case AUTH_HMAC_SHA2_256_128:
1779 trunc_len = 128;
1780 break;
1781 case AUTH_HMAC_SHA1_160:
1782 trunc_len = 160;
1783 break;
1784 default:
1785 break;
1786 }
1787
1788 if (trunc_len)
1789 {
1790 struct xfrm_algo_auth* algo;
1791
1792 /* the kernel uses SHA256 with 96 bit truncation by default,
1793 * use specified truncation size supported by newer kernels.
1794 * also use this for untruncated MD5 and SHA1. */
1795 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_AUTH_TRUNC,
1796 sizeof(*algo) + data->int_key.len);
1797 if (!algo)
1798 {
1799 goto failed;
1800 }
1801 algo->alg_key_len = data->int_key.len * 8;
1802 algo->alg_trunc_len = trunc_len;
1803 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name));
1804 algo->alg_name[sizeof(algo->alg_name) - 1] = '\0';
1805 memcpy(algo->alg_key, data->int_key.ptr, data->int_key.len);
1806 }
1807 else
1808 {
1809 struct xfrm_algo* algo;
1810
1811 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_AUTH,
1812 sizeof(*algo) + data->int_key.len);
1813 if (!algo)
1814 {
1815 goto failed;
1816 }
1817 algo->alg_key_len = data->int_key.len * 8;
1818 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name));
1819 algo->alg_name[sizeof(algo->alg_name) - 1] = '\0';
1820 memcpy(algo->alg_key, data->int_key.ptr, data->int_key.len);
1821 }
1822 }
1823
1824 if (ipcomp != IPCOMP_NONE)
1825 {
1826 struct xfrm_algo* algo;
1827
1828 alg_name = lookup_algorithm(COMPRESSION_ALGORITHM, ipcomp);
1829 if (alg_name == NULL)
1830 {
1831 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1832 ipcomp_transform_names, ipcomp);
1833 goto failed;
1834 }
1835 DBG2(DBG_KNL, " using compression algorithm %N",
1836 ipcomp_transform_names, ipcomp);
1837
1838 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_COMP,
1839 sizeof(*algo));
1840 if (!algo)
1841 {
1842 goto failed;
1843 }
1844 algo->alg_key_len = 0;
1845 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name));
1846 algo->alg_name[sizeof(algo->alg_name) - 1] = '\0';
1847 }
1848
1849 if (data->encap)
1850 {
1851 struct xfrm_encap_tmpl *tmpl;
1852
1853 tmpl = netlink_reserve(hdr, sizeof(request), XFRMA_ENCAP, sizeof(*tmpl));
1854 if (!tmpl)
1855 {
1856 goto failed;
1857 }
1858 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
1859 tmpl->encap_sport = htons(id->src->get_port(id->src));
1860 tmpl->encap_dport = htons(id->dst->get_port(id->dst));
1861 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
1862 /* encap_oa could probably be derived from the
1863 * traffic selectors [rfc4306, p39]. In the netlink kernel
1864 * implementation pluto does the same as we do here but it uses
1865 * encap_oa in the pfkey implementation.
1866 * BUT as /usr/src/linux/net/key/af_key.c indicates the kernel ignores
1867 * it anyway
1868 * -> does that mean that NAT-T encap doesn't work in transport mode?
1869 * No. The reason the kernel ignores NAT-OA is that it recomputes
1870 * (or, rather, just ignores) the checksum. If packets pass the IPsec
1871 * checks it marks them "checksum ok" so OA isn't needed. */
1872 }
1873
1874 if (!add_mark(hdr, sizeof(request), id->mark))
1875 {
1876 goto failed;
1877 }
1878
1879 if (data->tfc && id->proto == IPPROTO_ESP && mode == MODE_TUNNEL)
1880 { /* the kernel supports TFC padding only for tunnel mode ESP SAs */
1881 uint32_t *tfcpad;
1882
1883 tfcpad = netlink_reserve(hdr, sizeof(request), XFRMA_TFCPAD,
1884 sizeof(*tfcpad));
1885 if (!tfcpad)
1886 {
1887 goto failed;
1888 }
1889 *tfcpad = data->tfc;
1890 }
1891
1892 if (id->proto != IPPROTO_COMP)
1893 {
1894 /* generally, we don't need a replay window for outbound SAs, however,
1895 * when using ESN the kernel rejects the attribute if it is 0 */
1896 if (!data->inbound && data->replay_window)
1897 {
1898 data->replay_window = data->esn ? 1 : 0;
1899 }
1900 if (data->replay_window != 0 && (data->esn || data->replay_window > 32))
1901 {
1902 /* for ESN or larger replay windows we need the new
1903 * XFRMA_REPLAY_ESN_VAL attribute to configure a bitmap */
1904 struct xfrm_replay_state_esn *replay;
1905 uint32_t bmp_size;
1906
1907 bmp_size = round_up(data->replay_window, sizeof(uint32_t) * 8) / 8;
1908 replay = netlink_reserve(hdr, sizeof(request), XFRMA_REPLAY_ESN_VAL,
1909 sizeof(*replay) + bmp_size);
1910 if (!replay)
1911 {
1912 goto failed;
1913 }
1914 /* bmp_len contains number uf __u32's */
1915 replay->bmp_len = bmp_size / sizeof(uint32_t);
1916 replay->replay_window = data->replay_window;
1917 DBG2(DBG_KNL, " using replay window of %u packets",
1918 data->replay_window);
1919
1920 if (data->esn)
1921 {
1922 DBG2(DBG_KNL, " using extended sequence numbers (ESN)");
1923 sa->flags |= XFRM_STATE_ESN;
1924 }
1925 }
1926 else
1927 {
1928 DBG2(DBG_KNL, " using replay window of %u packets",
1929 data->replay_window);
1930 sa->replay_window = data->replay_window;
1931 }
1932
1933 DBG2(DBG_KNL, " HW offload: %N", hw_offload_names, data->hw_offload);
1934 if (!config_hw_offload(id, data, hdr, sizeof(request)))
1935 {
1936 DBG1(DBG_KNL, "failed to configure HW offload");
1937 goto failed;
1938 }
1939 }
1940
1941 status = this->socket_xfrm->send_ack(this->socket_xfrm, hdr);
1942 if (status == NOT_FOUND && data->update)
1943 {
1944 DBG1(DBG_KNL, "allocated SPI not found anymore, try to add SAD entry");
1945 hdr->nlmsg_type = XFRM_MSG_NEWSA;
1946 status = this->socket_xfrm->send_ack(this->socket_xfrm, hdr);
1947 }
1948
1949 if (status != SUCCESS)
1950 {
1951 DBG1(DBG_KNL, "unable to add SAD entry with SPI %.8x%s (%N)", ntohl(id->spi),
1952 markstr, status_names, status);
1953 status = FAILED;
1954 goto failed;
1955 }
1956
1957 status = SUCCESS;
1958
1959 failed:
1960 memwipe(&request, sizeof(request));
1961 return status;
1962 }
1963
1964 /**
1965 * Get the ESN replay state (i.e. sequence numbers) of an SA.
1966 *
1967 * Allocates into one the replay state structure we get from the kernel.
1968 */
1969 static void get_replay_state(private_kernel_netlink_ipsec_t *this,
1970 kernel_ipsec_sa_id_t *sa,
1971 struct xfrm_replay_state_esn **replay_esn,
1972 uint32_t *replay_esn_len,
1973 struct xfrm_replay_state **replay,
1974 struct xfrm_lifetime_cur **lifetime)
1975 {
1976 netlink_buf_t request;
1977 struct nlmsghdr *hdr, *out = NULL;
1978 struct xfrm_aevent_id *out_aevent = NULL, *aevent_id;
1979 size_t len;
1980 struct rtattr *rta;
1981 size_t rtasize;
1982
1983 memset(&request, 0, sizeof(request));
1984
1985 DBG2(DBG_KNL, "querying replay state from SAD entry with SPI %.8x",
1986 ntohl(sa->spi));
1987
1988 hdr = &request.hdr;
1989 hdr->nlmsg_flags = NLM_F_REQUEST;
1990 hdr->nlmsg_type = XFRM_MSG_GETAE;
1991 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_aevent_id));
1992
1993 aevent_id = NLMSG_DATA(hdr);
1994 aevent_id->flags = XFRM_AE_RVAL;
1995
1996 host2xfrm(sa->dst, &aevent_id->sa_id.daddr);
1997 aevent_id->sa_id.spi = sa->spi;
1998 aevent_id->sa_id.proto = sa->proto;
1999 aevent_id->sa_id.family = sa->dst->get_family(sa->dst);
2000
2001 if (!add_mark(hdr, sizeof(request), sa->mark))
2002 {
2003 return;
2004 }
2005
2006 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2007 {
2008 hdr = out;
2009 while (NLMSG_OK(hdr, len))
2010 {
2011 switch (hdr->nlmsg_type)
2012 {
2013 case XFRM_MSG_NEWAE:
2014 {
2015 out_aevent = NLMSG_DATA(hdr);
2016 break;
2017 }
2018 case NLMSG_ERROR:
2019 {
2020 struct nlmsgerr *err = NLMSG_DATA(hdr);
2021 DBG1(DBG_KNL, "querying replay state from SAD entry "
2022 "failed: %s (%d)", strerror(-err->error), -err->error);
2023 break;
2024 }
2025 default:
2026 hdr = NLMSG_NEXT(hdr, len);
2027 continue;
2028 case NLMSG_DONE:
2029 break;
2030 }
2031 break;
2032 }
2033 }
2034
2035 if (out_aevent)
2036 {
2037 rta = XFRM_RTA(out, struct xfrm_aevent_id);
2038 rtasize = XFRM_PAYLOAD(out, struct xfrm_aevent_id);
2039 while (RTA_OK(rta, rtasize))
2040 {
2041 if (rta->rta_type == XFRMA_LTIME_VAL &&
2042 RTA_PAYLOAD(rta) == sizeof(**lifetime))
2043 {
2044 free(*lifetime);
2045 *lifetime = malloc(RTA_PAYLOAD(rta));
2046 memcpy(*lifetime, RTA_DATA(rta), RTA_PAYLOAD(rta));
2047 }
2048 if (rta->rta_type == XFRMA_REPLAY_VAL &&
2049 RTA_PAYLOAD(rta) == sizeof(**replay))
2050 {
2051 free(*replay);
2052 *replay = malloc(RTA_PAYLOAD(rta));
2053 memcpy(*replay, RTA_DATA(rta), RTA_PAYLOAD(rta));
2054 }
2055 if (rta->rta_type == XFRMA_REPLAY_ESN_VAL &&
2056 RTA_PAYLOAD(rta) >= sizeof(**replay_esn))
2057 {
2058 free(*replay_esn);
2059 *replay_esn = malloc(RTA_PAYLOAD(rta));
2060 *replay_esn_len = RTA_PAYLOAD(rta);
2061 memcpy(*replay_esn, RTA_DATA(rta), RTA_PAYLOAD(rta));
2062 }
2063 rta = RTA_NEXT(rta, rtasize);
2064 }
2065 }
2066 free(out);
2067 }
2068
2069 METHOD(kernel_ipsec_t, query_sa, status_t,
2070 private_kernel_netlink_ipsec_t *this, kernel_ipsec_sa_id_t *id,
2071 kernel_ipsec_query_sa_t *data, uint64_t *bytes, uint64_t *packets,
2072 time_t *time)
2073 {
2074 netlink_buf_t request;
2075 struct nlmsghdr *out = NULL, *hdr;
2076 struct xfrm_usersa_id *sa_id;
2077 struct xfrm_usersa_info *sa = NULL;
2078 status_t status = FAILED;
2079 size_t len;
2080 char markstr[32] = "";
2081
2082 memset(&request, 0, sizeof(request));
2083 format_mark(markstr, sizeof(markstr), id->mark);
2084
2085 DBG2(DBG_KNL, "querying SAD entry with SPI %.8x%s", ntohl(id->spi),
2086 markstr);
2087
2088 hdr = &request.hdr;
2089 hdr->nlmsg_flags = NLM_F_REQUEST;
2090 hdr->nlmsg_type = XFRM_MSG_GETSA;
2091 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2092
2093 sa_id = NLMSG_DATA(hdr);
2094 host2xfrm(id->dst, &sa_id->daddr);
2095 sa_id->spi = id->spi;
2096 sa_id->proto = id->proto;
2097 sa_id->family = id->dst->get_family(id->dst);
2098
2099 if (!add_mark(hdr, sizeof(request), id->mark))
2100 {
2101 return FAILED;
2102 }
2103
2104 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2105 {
2106 hdr = out;
2107 while (NLMSG_OK(hdr, len))
2108 {
2109 switch (hdr->nlmsg_type)
2110 {
2111 case XFRM_MSG_NEWSA:
2112 {
2113 sa = NLMSG_DATA(hdr);
2114 break;
2115 }
2116 case NLMSG_ERROR:
2117 {
2118 struct nlmsgerr *err = NLMSG_DATA(hdr);
2119
2120 DBG1(DBG_KNL, "querying SAD entry with SPI %.8x%s failed: "
2121 "%s (%d)", ntohl(id->spi), markstr,
2122 strerror(-err->error), -err->error);
2123 break;
2124 }
2125 default:
2126 hdr = NLMSG_NEXT(hdr, len);
2127 continue;
2128 case NLMSG_DONE:
2129 break;
2130 }
2131 break;
2132 }
2133 }
2134
2135 if (sa == NULL)
2136 {
2137 DBG2(DBG_KNL, "unable to query SAD entry with SPI %.8x%s",
2138 ntohl(id->spi), markstr);
2139 }
2140 else
2141 {
2142 if (bytes)
2143 {
2144 *bytes = sa->curlft.bytes;
2145 }
2146 if (packets)
2147 {
2148 *packets = sa->curlft.packets;
2149 }
2150 if (time)
2151 { /* curlft contains an "use" time, but that contains a timestamp
2152 * of the first use, not the last. Last use time must be queried
2153 * on the policy on Linux */
2154 *time = 0;
2155 }
2156 status = SUCCESS;
2157 }
2158 memwipe(out, len);
2159 free(out);
2160 return status;
2161 }
2162
2163 METHOD(kernel_ipsec_t, del_sa, status_t,
2164 private_kernel_netlink_ipsec_t *this, kernel_ipsec_sa_id_t *id,
2165 kernel_ipsec_del_sa_t *data)
2166 {
2167 netlink_buf_t request;
2168 struct nlmsghdr *hdr;
2169 struct xfrm_usersa_id *sa_id;
2170 char markstr[32] = "";
2171
2172 /* if IPComp was used, we first delete the additional IPComp SA */
2173 if (data->cpi)
2174 {
2175 kernel_ipsec_sa_id_t ipcomp_id = {
2176 .src = id->src,
2177 .dst = id->dst,
2178 .spi = htonl(ntohs(data->cpi)),
2179 .proto = IPPROTO_COMP,
2180 .mark = id->mark,
2181 };
2182 kernel_ipsec_del_sa_t ipcomp = {};
2183 del_sa(this, &ipcomp_id, &ipcomp);
2184 }
2185
2186 memset(&request, 0, sizeof(request));
2187 format_mark(markstr, sizeof(markstr), id->mark);
2188
2189 DBG2(DBG_KNL, "deleting SAD entry with SPI %.8x%s", ntohl(id->spi),
2190 markstr);
2191
2192 hdr = &request.hdr;
2193 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2194 hdr->nlmsg_type = XFRM_MSG_DELSA;
2195 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2196
2197 sa_id = NLMSG_DATA(hdr);
2198 host2xfrm(id->dst, &sa_id->daddr);
2199 sa_id->spi = id->spi;
2200 sa_id->proto = id->proto;
2201 sa_id->family = id->dst->get_family(id->dst);
2202
2203 if (!add_mark(hdr, sizeof(request), id->mark))
2204 {
2205 return FAILED;
2206 }
2207
2208 switch (this->socket_xfrm->send_ack(this->socket_xfrm, hdr))
2209 {
2210 case SUCCESS:
2211 DBG2(DBG_KNL, "deleted SAD entry with SPI %.8x%s",
2212 ntohl(id->spi), markstr);
2213 return SUCCESS;
2214 case NOT_FOUND:
2215 return NOT_FOUND;
2216 default:
2217 DBG1(DBG_KNL, "unable to delete SAD entry with SPI %.8x%s",
2218 ntohl(id->spi), markstr);
2219 return FAILED;
2220 }
2221 }
2222
2223 METHOD(kernel_ipsec_t, update_sa, status_t,
2224 private_kernel_netlink_ipsec_t *this, kernel_ipsec_sa_id_t *id,
2225 kernel_ipsec_update_sa_t *data)
2226 {
2227 netlink_buf_t request;
2228 struct nlmsghdr *hdr, *out_hdr = NULL, *out = NULL;
2229 struct xfrm_usersa_id *sa_id;
2230 struct xfrm_usersa_info *sa;
2231 size_t len;
2232 struct rtattr *rta;
2233 size_t rtasize;
2234 struct xfrm_encap_tmpl* encap = NULL;
2235 struct xfrm_replay_state *replay = NULL;
2236 struct xfrm_replay_state_esn *replay_esn = NULL;
2237 struct xfrm_lifetime_cur *lifetime = NULL;
2238 uint32_t replay_esn_len = 0;
2239 kernel_ipsec_del_sa_t del = { 0 };
2240 status_t status = FAILED;
2241 char markstr[32] = "";
2242
2243 /* if IPComp is used, we first update the IPComp SA */
2244 if (data->cpi)
2245 {
2246 kernel_ipsec_sa_id_t ipcomp_id = {
2247 .src = id->src,
2248 .dst = id->dst,
2249 .spi = htonl(ntohs(data->cpi)),
2250 .proto = IPPROTO_COMP,
2251 .mark = id->mark,
2252 };
2253 kernel_ipsec_update_sa_t ipcomp = {
2254 .new_src = data->new_src,
2255 .new_dst = data->new_dst,
2256 };
2257 update_sa(this, &ipcomp_id, &ipcomp);
2258 }
2259
2260 memset(&request, 0, sizeof(request));
2261 format_mark(markstr, sizeof(markstr), id->mark);
2262
2263 DBG2(DBG_KNL, "querying SAD entry with SPI %.8x%s for update",
2264 ntohl(id->spi), markstr);
2265
2266 /* query the existing SA first */
2267 hdr = &request.hdr;
2268 hdr->nlmsg_flags = NLM_F_REQUEST;
2269 hdr->nlmsg_type = XFRM_MSG_GETSA;
2270 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2271
2272 sa_id = NLMSG_DATA(hdr);
2273 host2xfrm(id->dst, &sa_id->daddr);
2274 sa_id->spi = id->spi;
2275 sa_id->proto = id->proto;
2276 sa_id->family = id->dst->get_family(id->dst);
2277
2278 if (!add_mark(hdr, sizeof(request), id->mark))
2279 {
2280 return FAILED;
2281 }
2282
2283 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2284 {
2285 hdr = out;
2286 while (NLMSG_OK(hdr, len))
2287 {
2288 switch (hdr->nlmsg_type)
2289 {
2290 case XFRM_MSG_NEWSA:
2291 {
2292 out_hdr = hdr;
2293 break;
2294 }
2295 case NLMSG_ERROR:
2296 {
2297 struct nlmsgerr *err = NLMSG_DATA(hdr);
2298 DBG1(DBG_KNL, "querying SAD entry failed: %s (%d)",
2299 strerror(-err->error), -err->error);
2300 break;
2301 }
2302 default:
2303 hdr = NLMSG_NEXT(hdr, len);
2304 continue;
2305 case NLMSG_DONE:
2306 break;
2307 }
2308 break;
2309 }
2310 }
2311 if (!out_hdr)
2312 {
2313 DBG1(DBG_KNL, "unable to update SAD entry with SPI %.8x%s",
2314 ntohl(id->spi), markstr);
2315 goto failed;
2316 }
2317
2318 get_replay_state(this, id, &replay_esn, &replay_esn_len, &replay,
2319 &lifetime);
2320
2321 /* delete the old SA (without affecting the IPComp SA) */
2322 if (del_sa(this, id, &del) != SUCCESS)
2323 {
2324 DBG1(DBG_KNL, "unable to delete old SAD entry with SPI %.8x%s",
2325 ntohl(id->spi), markstr);
2326 goto failed;
2327 }
2328
2329 DBG2(DBG_KNL, "updating SAD entry with SPI %.8x%s from %#H..%#H to "
2330 "%#H..%#H", ntohl(id->spi), markstr, id->src, id->dst, data->new_src,
2331 data->new_dst);
2332 /* copy over the SA from out to request */
2333 hdr = &request.hdr;
2334 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2335 hdr->nlmsg_type = XFRM_MSG_NEWSA;
2336 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2337 sa = NLMSG_DATA(hdr);
2338 memcpy(sa, NLMSG_DATA(out_hdr), sizeof(struct xfrm_usersa_info));
2339 sa->family = data->new_dst->get_family(data->new_dst);
2340
2341 if (!id->src->ip_equals(id->src, data->new_src))
2342 {
2343 host2xfrm(data->new_src, &sa->saddr);
2344 }
2345 if (!id->dst->ip_equals(id->dst, data->new_dst))
2346 {
2347 host2xfrm(data->new_dst, &sa->id.daddr);
2348 }
2349
2350 rta = XFRM_RTA(out_hdr, struct xfrm_usersa_info);
2351 rtasize = XFRM_PAYLOAD(out_hdr, struct xfrm_usersa_info);
2352 while (RTA_OK(rta, rtasize))
2353 {
2354 /* copy all attributes, but not XFRMA_ENCAP if we are disabling it */
2355 if (rta->rta_type != XFRMA_ENCAP || data->new_encap)
2356 {
2357 if (rta->rta_type == XFRMA_ENCAP)
2358 { /* update encap tmpl */
2359 encap = RTA_DATA(rta);
2360 encap->encap_sport = ntohs(data->new_src->get_port(data->new_src));
2361 encap->encap_dport = ntohs(data->new_dst->get_port(data->new_dst));
2362 }
2363 if (rta->rta_type == XFRMA_OFFLOAD_DEV)
2364 { /* update offload device */
2365 struct xfrm_user_offload *offload;
2366 host_t *local;
2367 char *ifname;
2368
2369 offload = RTA_DATA(rta);
2370 local = offload->flags & XFRM_OFFLOAD_INBOUND ? data->new_dst
2371 : data->new_src;
2372
2373 if (charon->kernel->get_interface(charon->kernel, local,
2374 &ifname))
2375 {
2376 offload->ifindex = if_nametoindex(ifname);
2377 if (local->get_family(local) == AF_INET6)
2378 {
2379 offload->flags |= XFRM_OFFLOAD_IPV6;
2380 }
2381 else
2382 {
2383 offload->flags &= ~XFRM_OFFLOAD_IPV6;
2384 }
2385 free(ifname);
2386 }
2387 }
2388 netlink_add_attribute(hdr, rta->rta_type,
2389 chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta)),
2390 sizeof(request));
2391 }
2392 rta = RTA_NEXT(rta, rtasize);
2393 }
2394
2395 if (encap == NULL && data->new_encap)
2396 { /* add tmpl if we are enabling it */
2397 encap = netlink_reserve(hdr, sizeof(request), XFRMA_ENCAP,
2398 sizeof(*encap));
2399 if (!encap)
2400 {
2401 goto failed;
2402 }
2403 encap->encap_type = UDP_ENCAP_ESPINUDP;
2404 encap->encap_sport = ntohs(data->new_src->get_port(data->new_src));
2405 encap->encap_dport = ntohs(data->new_dst->get_port(data->new_dst));
2406 memset(&encap->encap_oa, 0, sizeof (xfrm_address_t));
2407 }
2408
2409 if (replay_esn)
2410 {
2411 struct xfrm_replay_state_esn *state;
2412
2413 state = netlink_reserve(hdr, sizeof(request), XFRMA_REPLAY_ESN_VAL,
2414 replay_esn_len);
2415 if (!state)
2416 {
2417 goto failed;
2418 }
2419 memcpy(state, replay_esn, replay_esn_len);
2420 }
2421 else if (replay)
2422 {
2423 struct xfrm_replay_state *state;
2424
2425 state = netlink_reserve(hdr, sizeof(request), XFRMA_REPLAY_VAL,
2426 sizeof(*state));
2427 if (!state)
2428 {
2429 goto failed;
2430 }
2431 memcpy(state, replay, sizeof(*state));
2432 }
2433 else
2434 {
2435 DBG1(DBG_KNL, "unable to copy replay state from old SAD entry with "
2436 "SPI %.8x%s", ntohl(id->spi), markstr);
2437 }
2438 if (lifetime)
2439 {
2440 struct xfrm_lifetime_cur *state;
2441
2442 state = netlink_reserve(hdr, sizeof(request), XFRMA_LTIME_VAL,
2443 sizeof(*state));
2444 if (!state)
2445 {
2446 goto failed;
2447 }
2448 memcpy(state, lifetime, sizeof(*state));
2449 }
2450 else
2451 {
2452 DBG1(DBG_KNL, "unable to copy usage stats from old SAD entry with "
2453 "SPI %.8x%s", ntohl(id->spi), markstr);
2454 }
2455
2456 if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
2457 {
2458 DBG1(DBG_KNL, "unable to update SAD entry with SPI %.8x%s",
2459 ntohl(id->spi), markstr);
2460 goto failed;
2461 }
2462
2463 status = SUCCESS;
2464 failed:
2465 free(replay);
2466 free(replay_esn);
2467 free(lifetime);
2468 memwipe(out, len);
2469 memwipe(&request, sizeof(request));
2470 free(out);
2471
2472 return status;
2473 }
2474
2475 METHOD(kernel_ipsec_t, flush_sas, status_t,
2476 private_kernel_netlink_ipsec_t *this)
2477 {
2478 netlink_buf_t request;
2479 struct nlmsghdr *hdr;
2480 struct xfrm_usersa_flush *flush;
2481 struct {
2482 uint8_t proto;
2483 char *name;
2484 } protos[] = {
2485 { IPPROTO_AH, "AH" },
2486 { IPPROTO_ESP, "ESP" },
2487 { IPPROTO_COMP, "IPComp" },
2488 };
2489 int i;
2490
2491 memset(&request, 0, sizeof(request));
2492
2493 hdr = &request.hdr;
2494 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2495 hdr->nlmsg_type = XFRM_MSG_FLUSHSA;
2496 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush));
2497
2498 flush = NLMSG_DATA(hdr);
2499
2500 for (i = 0; i < countof(protos); i++)
2501 {
2502 DBG2(DBG_KNL, "flushing all %s SAD entries", protos[i].name);
2503
2504 flush->proto = protos[i].proto;
2505
2506 if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
2507 {
2508 DBG1(DBG_KNL, "unable to flush %s SAD entries", protos[i].name);
2509 return FAILED;
2510 }
2511 }
2512 return SUCCESS;
2513 }
2514
2515 /**
2516 * Unlock the mutex and signal waiting threads
2517 */
2518 static void policy_change_done(private_kernel_netlink_ipsec_t *this,
2519 policy_entry_t *policy)
2520 {
2521 policy->working = FALSE;
2522 if (policy->waiting)
2523 { /* don't need to wake threads waiting for other policies */
2524 this->condvar->broadcast(this->condvar);
2525 }
2526 this->mutex->unlock(this->mutex);
2527 }
2528
2529 /**
2530 * Install a route for the given policy if enabled and required
2531 */
2532 static void install_route(private_kernel_netlink_ipsec_t *this,
2533 policy_entry_t *policy, policy_sa_t *mapping, ipsec_sa_t *ipsec)
2534 {
2535 policy_sa_out_t *out = (policy_sa_out_t*)mapping;
2536 route_entry_t *route;
2537 host_t *iface;
2538
2539 INIT(route,
2540 .prefixlen = policy->sel.prefixlen_d,
2541 );
2542
2543 if (charon->kernel->get_address_by_ts(charon->kernel, out->src_ts,
2544 &route->src_ip, NULL) == SUCCESS)
2545 {
2546 if (!ipsec->dst->is_anyaddr(ipsec->dst))
2547 {
2548 route->gateway = charon->kernel->get_nexthop(charon->kernel,
2549 ipsec->dst, -1, ipsec->src,
2550 &route->if_name);
2551 }
2552 else
2553 { /* for shunt policies */
2554 iface = xfrm2host(policy->sel.family, &policy->sel.daddr, 0);
2555 route->gateway = charon->kernel->get_nexthop(charon->kernel,
2556 iface, policy->sel.prefixlen_d,
2557 route->src_ip, &route->if_name);
2558 iface->destroy(iface);
2559 }
2560 route->dst_net = chunk_alloc(policy->sel.family == AF_INET ? 4 : 16);
2561 memcpy(route->dst_net.ptr, &policy->sel.daddr, route->dst_net.len);
2562
2563 /* get the interface to install the route for, if we haven't one yet.
2564 * If we have a local address, use it. Otherwise (for shunt policies)
2565 * use the route's source address. */
2566 if (!route->if_name)
2567 {
2568 iface = ipsec->src;
2569 if (iface->is_anyaddr(iface))
2570 {
2571 iface = route->src_ip;
2572 }
2573 if (!charon->kernel->get_interface(charon->kernel, iface,
2574 &route->if_name))
2575 {
2576 route_entry_destroy(route);
2577 return;
2578 }
2579 }
2580 if (policy->route)
2581 {
2582 route_entry_t *old = policy->route;
2583 if (route_entry_equals(old, route))
2584 {
2585 route_entry_destroy(route);
2586 return;
2587 }
2588 /* uninstall previously installed route */
2589 if (charon->kernel->del_route(charon->kernel, old->dst_net,
2590 old->prefixlen, old->gateway,
2591 old->src_ip, old->if_name) != SUCCESS)
2592 {
2593 DBG1(DBG_KNL, "error uninstalling route installed with policy "
2594 "%R === %R %N", out->src_ts, out->dst_ts, policy_dir_names,
2595 policy->direction);
2596 }
2597 route_entry_destroy(old);
2598 policy->route = NULL;
2599 }
2600
2601 DBG2(DBG_KNL, "installing route: %R via %H src %H dev %s", out->dst_ts,
2602 route->gateway, route->src_ip, route->if_name);
2603 switch (charon->kernel->add_route(charon->kernel, route->dst_net,
2604 route->prefixlen, route->gateway,
2605 route->src_ip, route->if_name))
2606 {
2607 default:
2608 DBG1(DBG_KNL, "unable to install source route for %H",
2609 route->src_ip);
2610 /* FALL */
2611 case ALREADY_DONE:
2612 /* route exists, do not uninstall */
2613 route_entry_destroy(route);
2614 break;
2615 case SUCCESS:
2616 /* cache the installed route */
2617 policy->route = route;
2618 break;
2619 }
2620 }
2621 else
2622 {
2623 free(route);
2624 }
2625 }
2626
2627 /**
2628 * Add or update a policy in the kernel.
2629 *
2630 * Note: The mutex has to be locked when entering this function
2631 * and is unlocked here in any case.
2632 */
2633 static status_t add_policy_internal(private_kernel_netlink_ipsec_t *this,
2634 policy_entry_t *policy, policy_sa_t *mapping, bool update)
2635 {
2636 netlink_buf_t request;
2637 policy_entry_t clone;
2638 ipsec_sa_t *ipsec = mapping->sa;
2639 struct xfrm_userpolicy_info *policy_info;
2640 struct nlmsghdr *hdr;
2641 status_t status;
2642 int i;
2643
2644 /* clone the policy so we are able to check it out again later */
2645 memcpy(&clone, policy, sizeof(policy_entry_t));
2646
2647 memset(&request, 0, sizeof(request));
2648 hdr = &request.hdr;
2649 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2650 hdr->nlmsg_type = update ? XFRM_MSG_UPDPOLICY : XFRM_MSG_NEWPOLICY;
2651 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info));
2652
2653 policy_info = NLMSG_DATA(hdr);
2654 policy_info->sel = policy->sel;
2655 policy_info->dir = policy->direction;
2656
2657 /* calculate priority based on selector size, small size = high prio */
2658 policy_info->priority = mapping->priority;
2659 policy_info->action = mapping->type != POLICY_DROP ? XFRM_POLICY_ALLOW
2660 : XFRM_POLICY_BLOCK;
2661 policy_info->share = XFRM_SHARE_ANY;
2662
2663 /* policies don't expire */
2664 policy_info->lft.soft_byte_limit = XFRM_INF;
2665 policy_info->lft.soft_packet_limit = XFRM_INF;
2666 policy_info->lft.hard_byte_limit = XFRM_INF;
2667 policy_info->lft.hard_packet_limit = XFRM_INF;
2668 policy_info->lft.soft_add_expires_seconds = 0;
2669 policy_info->lft.hard_add_expires_seconds = 0;
2670 policy_info->lft.soft_use_expires_seconds = 0;
2671 policy_info->lft.hard_use_expires_seconds = 0;
2672
2673 if (mapping->type == POLICY_IPSEC && ipsec->cfg.reqid)
2674 {
2675 struct xfrm_user_tmpl *tmpl;
2676 struct {
2677 uint8_t proto;
2678 uint32_t spi;
2679 bool use;
2680 } protos[] = {
2681 { IPPROTO_COMP, htonl(ntohs(ipsec->cfg.ipcomp.cpi)),
2682 ipsec->cfg.ipcomp.transform != IPCOMP_NONE },
2683 { IPPROTO_ESP, ipsec->cfg.esp.spi, ipsec->cfg.esp.use },
2684 { IPPROTO_AH, ipsec->cfg.ah.spi, ipsec->cfg.ah.use },
2685 };
2686 ipsec_mode_t proto_mode = ipsec->cfg.mode;
2687 int count = 0;
2688
2689 for (i = 0; i < countof(protos); i++)
2690 {
2691 if (protos[i].use)
2692 {
2693 count++;
2694 }
2695 }
2696 tmpl = netlink_reserve(hdr, sizeof(request), XFRMA_TMPL,
2697 count * sizeof(*tmpl));
2698 if (!tmpl)
2699 {
2700 policy_change_done(this, policy);
2701 return FAILED;
2702 }
2703
2704 for (i = 0; i < countof(protos); i++)
2705 {
2706 if (!protos[i].use)
2707 {
2708 continue;
2709 }
2710 tmpl->reqid = ipsec->cfg.reqid;
2711 tmpl->id.proto = protos[i].proto;
2712 if (policy->direction == POLICY_OUT)
2713 {
2714 tmpl->id.spi = protos[i].spi;
2715 }
2716 tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
2717 tmpl->mode = mode2kernel(proto_mode);
2718 tmpl->optional = protos[i].proto == IPPROTO_COMP &&
2719 policy->direction != POLICY_OUT;
2720 tmpl->family = ipsec->src->get_family(ipsec->src);
2721
2722 if (proto_mode == MODE_TUNNEL || proto_mode == MODE_BEET)
2723 { /* only for tunnel mode */
2724 host2xfrm(ipsec->src, &tmpl->saddr);
2725 host2xfrm(ipsec->dst, &tmpl->id.daddr);
2726 }
2727
2728 tmpl++;
2729
2730 /* use transport mode for other SAs */
2731 proto_mode = MODE_TRANSPORT;
2732 }
2733 }
2734
2735 if (!add_mark(hdr, sizeof(request), ipsec->mark))
2736 {
2737 policy_change_done(this, policy);
2738 return FAILED;
2739 }
2740 this->mutex->unlock(this->mutex);
2741
2742 status = this->socket_xfrm->send_ack(this->socket_xfrm, hdr);
2743 if (status == ALREADY_DONE && !update)
2744 {
2745 DBG1(DBG_KNL, "policy already exists, try to update it");
2746 hdr->nlmsg_type = XFRM_MSG_UPDPOLICY;
2747 status = this->socket_xfrm->send_ack(this->socket_xfrm, hdr);
2748 }
2749
2750 this->mutex->lock(this->mutex);
2751 if (status != SUCCESS)
2752 {
2753 policy_change_done(this, policy);
2754 return FAILED;
2755 }
2756 /* install a route, if:
2757 * - this is an outbound policy (to just get one for each child)
2758 * - routing is not disabled via strongswan.conf
2759 * - the selector is not for a specific protocol/port
2760 * - we are in tunnel/BEET mode or install a bypass policy
2761 */
2762 if (policy->direction == POLICY_OUT && this->install_routes &&
2763 !policy->sel.proto && !policy->sel.dport && !policy->sel.sport)
2764 {
2765 if (mapping->type == POLICY_PASS ||
2766 (mapping->type == POLICY_IPSEC && ipsec->cfg.mode != MODE_TRANSPORT))
2767 {
2768 install_route(this, policy, mapping, ipsec);
2769 }
2770 }
2771 policy_change_done(this, policy);
2772 return SUCCESS;
2773 }
2774
2775 METHOD(kernel_ipsec_t, add_policy, status_t,
2776 private_kernel_netlink_ipsec_t *this, kernel_ipsec_policy_id_t *id,
2777 kernel_ipsec_manage_policy_t *data)
2778 {
2779 policy_entry_t *policy, *current;
2780 policy_sa_t *assigned_sa, *current_sa;
2781 enumerator_t *enumerator;
2782 bool found = FALSE, update = TRUE;
2783 char markstr[32] = "";
2784 uint32_t cur_priority = 0;
2785 int use_count;
2786
2787 /* create a policy */
2788 INIT(policy,
2789 .sel = ts2selector(id->src_ts, id->dst_ts, id->interface),
2790 .mark = id->mark.value & id->mark.mask,
2791 .direction = id->dir,
2792 .reqid = data->sa->reqid,
2793 );
2794 format_mark(markstr, sizeof(markstr), id->mark);
2795
2796 /* find the policy, which matches EXACTLY */
2797 this->mutex->lock(this->mutex);
2798 current = this->policies->get(this->policies, policy);
2799 if (current)
2800 {
2801 if (current->reqid && data->sa->reqid &&
2802 current->reqid != data->sa->reqid)
2803 {
2804 DBG1(DBG_CFG, "unable to install policy %R === %R %N%s for reqid "
2805 "%u, the same policy for reqid %u exists",
2806 id->src_ts, id->dst_ts, policy_dir_names, id->dir, markstr,
2807 data->sa->reqid, current->reqid);
2808 policy_entry_destroy(this, policy);
2809 this->mutex->unlock(this->mutex);
2810 return INVALID_STATE;
2811 }
2812 /* use existing policy */
2813 DBG2(DBG_KNL, "policy %R === %R %N%s already exists, increasing "
2814 "refcount", id->src_ts, id->dst_ts, policy_dir_names, id->dir,
2815 markstr);
2816 policy_entry_destroy(this, policy);
2817 policy = current;
2818 found = TRUE;
2819
2820 policy->waiting++;
2821 while (policy->working)
2822 {
2823 this->condvar->wait(this->condvar, this->mutex);
2824 }
2825 policy->waiting--;
2826 policy->working = TRUE;
2827 }
2828 else
2829 { /* use the new one, if we have no such policy */
2830 policy->used_by = linked_list_create();
2831 this->policies->put(this->policies, policy, policy);
2832 }
2833
2834 /* cache the assigned IPsec SA */
2835 assigned_sa = policy_sa_create(this, id->dir, data->type, data->src,
2836 data->dst, id->src_ts, id->dst_ts, id->mark, data->sa);
2837 assigned_sa->auto_priority = get_priority(policy, data->prio, id->interface);
2838 assigned_sa->priority = this->get_priority ? this->get_priority(id, data)
2839 : data->manual_prio;
2840 assigned_sa->priority = assigned_sa->priority ?: assigned_sa->auto_priority;
2841
2842 /* insert the SA according to its priority */
2843 enumerator = policy->used_by->create_enumerator(policy->used_by);
2844 while (enumerator->enumerate(enumerator, (void**)&current_sa))
2845 {
2846 if (current_sa->priority > assigned_sa->priority)
2847 {
2848 break;
2849 }
2850 if (current_sa->priority == assigned_sa->priority)
2851 {
2852 /* in case of equal manual prios order SAs by automatic priority */
2853 if (current_sa->auto_priority > assigned_sa->auto_priority)
2854 {
2855 break;
2856 }
2857 /* prefer SAs with a reqid over those without */
2858 if (current_sa->auto_priority == assigned_sa->auto_priority &&
2859 (!current_sa->sa->cfg.reqid || assigned_sa->sa->cfg.reqid))
2860 {
2861 break;
2862 }
2863 }
2864 if (update)
2865 {
2866 cur_priority = current_sa->priority;
2867 update = FALSE;
2868 }
2869 }
2870 policy->used_by->insert_before(policy->used_by, enumerator, assigned_sa);
2871 enumerator->destroy(enumerator);
2872
2873 use_count = policy->used_by->get_count(policy->used_by);
2874 if (!update)
2875 { /* we don't update the policy if the priority is lower than that of
2876 * the currently installed one */
2877 policy_change_done(this, policy);
2878 DBG2(DBG_KNL, "not updating policy %R === %R %N%s [priority %u, "
2879 "refcount %d]", id->src_ts, id->dst_ts, policy_dir_names,
2880 id->dir, markstr, cur_priority, use_count);
2881 return SUCCESS;
2882 }
2883 policy->reqid = assigned_sa->sa->cfg.reqid;
2884
2885 if (this->policy_update)
2886 {
2887 found = TRUE;
2888 }
2889
2890 DBG2(DBG_KNL, "%s policy %R === %R %N%s [priority %u, refcount %d]",
2891 found ? "updating" : "adding", id->src_ts, id->dst_ts,
2892 policy_dir_names, id->dir, markstr, assigned_sa->priority, use_count);
2893
2894 if (add_policy_internal(this, policy, assigned_sa, found) != SUCCESS)
2895 {
2896 DBG1(DBG_KNL, "unable to %s policy %R === %R %N%s",
2897 found ? "update" : "add", id->src_ts, id->dst_ts,
2898 policy_dir_names, id->dir, markstr);
2899 return FAILED;
2900 }
2901 return SUCCESS;
2902 }
2903
2904 METHOD(kernel_ipsec_t, query_policy, status_t,
2905 private_kernel_netlink_ipsec_t *this, kernel_ipsec_policy_id_t *id,
2906 kernel_ipsec_query_policy_t *data, time_t *use_time)
2907 {
2908 netlink_buf_t request;
2909 struct nlmsghdr *out = NULL, *hdr;
2910 struct xfrm_userpolicy_id *policy_id;
2911 struct xfrm_userpolicy_info *policy = NULL;
2912 size_t len;
2913 char markstr[32] = "";
2914
2915 memset(&request, 0, sizeof(request));
2916 format_mark(markstr, sizeof(markstr), id->mark);
2917
2918 DBG2(DBG_KNL, "querying policy %R === %R %N%s", id->src_ts, id->dst_ts,
2919 policy_dir_names, id->dir, markstr);
2920
2921 hdr = &request.hdr;
2922 hdr->nlmsg_flags = NLM_F_REQUEST;
2923 hdr->nlmsg_type = XFRM_MSG_GETPOLICY;
2924 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
2925
2926 policy_id = NLMSG_DATA(hdr);
2927 policy_id->sel = ts2selector(id->src_ts, id->dst_ts, id->interface);
2928 policy_id->dir = id->dir;
2929
2930 if (!add_mark(hdr, sizeof(request), id->mark))
2931 {
2932 return FAILED;
2933 }
2934
2935 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2936 {
2937 hdr = out;
2938 while (NLMSG_OK(hdr, len))
2939 {
2940 switch (hdr->nlmsg_type)
2941 {
2942 case XFRM_MSG_NEWPOLICY:
2943 {
2944 policy = NLMSG_DATA(hdr);
2945 break;
2946 }
2947 case NLMSG_ERROR:
2948 {
2949 struct nlmsgerr *err = NLMSG_DATA(hdr);
2950 DBG1(DBG_KNL, "querying policy failed: %s (%d)",
2951 strerror(-err->error), -err->error);
2952 break;
2953 }
2954 default:
2955 hdr = NLMSG_NEXT(hdr, len);
2956 continue;
2957 case NLMSG_DONE:
2958 break;
2959 }
2960 break;
2961 }
2962 }
2963
2964 if (policy == NULL)
2965 {
2966 DBG2(DBG_KNL, "unable to query policy %R === %R %N%s", id->src_ts,
2967 id->dst_ts, policy_dir_names, id->dir, markstr);
2968 free(out);
2969 return FAILED;
2970 }
2971
2972 if (policy->curlft.use_time)
2973 {
2974 /* we need the monotonic time, but the kernel returns system time. */
2975 *use_time = time_monotonic(NULL) - (time(NULL) - policy->curlft.use_time);
2976 }
2977 else
2978 {
2979 *use_time = 0;
2980 }
2981
2982 free(out);
2983 return SUCCESS;
2984 }
2985
2986 METHOD(kernel_ipsec_t, del_policy, status_t,
2987 private_kernel_netlink_ipsec_t *this, kernel_ipsec_policy_id_t *id,
2988 kernel_ipsec_manage_policy_t *data)
2989 {
2990 policy_entry_t *current, policy;
2991 enumerator_t *enumerator;
2992 policy_sa_t *mapping;
2993 netlink_buf_t request;
2994 struct nlmsghdr *hdr;
2995 struct xfrm_userpolicy_id *policy_id;
2996 bool is_installed = TRUE;
2997 uint32_t priority, auto_priority, cur_priority;
2998 ipsec_sa_t assigned_sa = {
2999 .src = data->src,
3000 .dst = data->dst,
3001 .mark = id->mark,
3002 .cfg = *data->sa,
3003 };
3004 char markstr[32] = "";
3005 int use_count;
3006 status_t status = SUCCESS;
3007
3008 format_mark(markstr, sizeof(markstr), id->mark);
3009
3010 DBG2(DBG_KNL, "deleting policy %R === %R %N%s", id->src_ts, id->dst_ts,
3011 policy_dir_names, id->dir, markstr);
3012
3013 /* create a policy */
3014 memset(&policy, 0, sizeof(policy_entry_t));
3015 policy.sel = ts2selector(id->src_ts, id->dst_ts, id->interface);
3016 policy.mark = id->mark.value & id->mark.mask;
3017 policy.direction = id->dir;
3018
3019 /* find the policy */
3020 this->mutex->lock(this->mutex);
3021 current = this->policies->get(this->policies, &policy);
3022 if (!current)
3023 {
3024 DBG1(DBG_KNL, "deleting policy %R === %R %N%s failed, not found",
3025 id->src_ts, id->dst_ts, policy_dir_names, id->dir, markstr);
3026 this->mutex->unlock(this->mutex);
3027 return NOT_FOUND;
3028 }
3029 current->waiting++;
3030 while (current->working)
3031 {
3032 this->condvar->wait(this->condvar, this->mutex);
3033 }
3034 current->working = TRUE;
3035 current->waiting--;
3036
3037 /* remove mapping to SA by reqid and priority */
3038 auto_priority = get_priority(current, data->prio,id->interface);
3039 priority = this->get_priority ? this->get_priority(id, data)
3040 : data->manual_prio;
3041 priority = priority ?: auto_priority;
3042
3043 enumerator = current->used_by->create_enumerator(current->used_by);
3044 while (enumerator->enumerate(enumerator, (void**)&mapping))
3045 {
3046 if (priority == mapping->priority &&
3047 auto_priority == mapping->auto_priority &&
3048 data->type == mapping->type &&
3049 ipsec_sa_equals(mapping->sa, &assigned_sa))
3050 {
3051 current->used_by->remove_at(current->used_by, enumerator);
3052 policy_sa_destroy(mapping, id->dir, this);
3053 break;
3054 }
3055 if (is_installed)
3056 {
3057 cur_priority = mapping->priority;
3058 is_installed = FALSE;
3059 }
3060 }
3061 enumerator->destroy(enumerator);
3062
3063 use_count = current->used_by->get_count(current->used_by);
3064 if (use_count > 0)
3065 { /* policy is used by more SAs, keep in kernel */
3066 DBG2(DBG_KNL, "policy still used by another CHILD_SA, not removed");
3067 if (!is_installed)
3068 { /* no need to update as the policy was not installed for this SA */
3069 policy_change_done(this, current);
3070 DBG2(DBG_KNL, "not updating policy %R === %R %N%s [priority %u, "
3071 "refcount %d]", id->src_ts, id->dst_ts, policy_dir_names,
3072 id->dir, markstr, cur_priority, use_count);
3073 return SUCCESS;
3074 }
3075 current->used_by->get_first(current->used_by, (void**)&mapping);
3076 current->reqid = mapping->sa->cfg.reqid;
3077
3078 DBG2(DBG_KNL, "updating policy %R === %R %N%s [priority %u, "
3079 "refcount %d]", id->src_ts, id->dst_ts, policy_dir_names, id->dir,
3080 markstr, mapping->priority, use_count);
3081
3082 if (add_policy_internal(this, current, mapping, TRUE) != SUCCESS)
3083 {
3084 DBG1(DBG_KNL, "unable to update policy %R === %R %N%s",
3085 id->src_ts, id->dst_ts, policy_dir_names, id->dir, markstr);
3086 return FAILED;
3087 }
3088 return SUCCESS;
3089 }
3090
3091 memset(&request, 0, sizeof(request));
3092
3093 hdr = &request.hdr;
3094 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3095 hdr->nlmsg_type = XFRM_MSG_DELPOLICY;
3096 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
3097
3098 policy_id = NLMSG_DATA(hdr);
3099 policy_id->sel = current->sel;
3100 policy_id->dir = id->dir;
3101
3102 if (!add_mark(hdr, sizeof(request), id->mark))
3103 {
3104 policy_change_done(this, current);
3105 return FAILED;
3106 }
3107
3108 if (current->route)
3109 {
3110 route_entry_t *route = current->route;
3111 if (charon->kernel->del_route(charon->kernel, route->dst_net,
3112 route->prefixlen, route->gateway,
3113 route->src_ip, route->if_name) != SUCCESS)
3114 {
3115 DBG1(DBG_KNL, "error uninstalling route installed with policy "
3116 "%R === %R %N%s", id->src_ts, id->dst_ts, policy_dir_names,
3117 id->dir, markstr);
3118 }
3119 }
3120 this->mutex->unlock(this->mutex);
3121
3122 if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
3123 {
3124 DBG1(DBG_KNL, "unable to delete policy %R === %R %N%s", id->src_ts,
3125 id->dst_ts, policy_dir_names, id->dir, markstr);
3126 status = FAILED;
3127 }
3128
3129 this->mutex->lock(this->mutex);
3130 if (!current->waiting)
3131 { /* only if no other thread still needs the policy */
3132 this->policies->remove(this->policies, current);
3133 policy_entry_destroy(this, current);
3134 this->mutex->unlock(this->mutex);
3135 }
3136 else
3137 {
3138 policy_change_done(this, current);
3139 }
3140 return status;
3141 }
3142
3143 METHOD(kernel_ipsec_t, flush_policies, status_t,
3144 private_kernel_netlink_ipsec_t *this)
3145 {
3146 netlink_buf_t request;
3147 struct nlmsghdr *hdr;
3148
3149 memset(&request, 0, sizeof(request));
3150
3151 DBG2(DBG_KNL, "flushing all policies from SPD");
3152
3153 hdr = &request.hdr;
3154 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3155 hdr->nlmsg_type = XFRM_MSG_FLUSHPOLICY;
3156 hdr->nlmsg_len = NLMSG_LENGTH(0); /* no data associated */
3157
3158 /* by adding an rtattr of type XFRMA_POLICY_TYPE we could restrict this
3159 * to main or sub policies (default is main) */
3160
3161 if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
3162 {
3163 DBG1(DBG_KNL, "unable to flush SPD entries");
3164 return FAILED;
3165 }
3166 return SUCCESS;
3167 }
3168
3169 /**
3170 * Bypass socket using a per-socket policy
3171 */
3172 static bool add_socket_bypass(private_kernel_netlink_ipsec_t *this,
3173 int fd, int family)
3174 {
3175 struct xfrm_userpolicy_info policy;
3176 u_int sol, ipsec_policy;
3177
3178 switch (family)
3179 {
3180 case AF_INET:
3181 sol = SOL_IP;
3182 ipsec_policy = IP_XFRM_POLICY;
3183 break;
3184 case AF_INET6:
3185 sol = SOL_IPV6;
3186 ipsec_policy = IPV6_XFRM_POLICY;
3187 break;
3188 default:
3189 return FALSE;
3190 }
3191
3192 memset(&policy, 0, sizeof(policy));
3193 policy.action = XFRM_POLICY_ALLOW;
3194 policy.sel.family = family;
3195
3196 policy.dir = XFRM_POLICY_OUT;
3197 if (setsockopt(fd, sol, ipsec_policy, &policy, sizeof(policy)) < 0)
3198 {
3199 DBG1(DBG_KNL, "unable to set IPSEC_POLICY on socket: %s (%d)",
3200 strerror(errno), errno);
3201 return FALSE;
3202 }
3203 policy.dir = XFRM_POLICY_IN;
3204 if (setsockopt(fd, sol, ipsec_policy, &policy, sizeof(policy)) < 0)
3205 {
3206 DBG1(DBG_KNL, "unable to set IPSEC_POLICY on socket: %s (%d)",
3207 strerror(errno), errno);
3208 return FALSE;
3209 }
3210 return TRUE;
3211 }
3212
3213 /**
3214 * Port based IKE bypass policy
3215 */
3216 typedef struct {
3217 /** address family */
3218 int family;
3219 /** layer 4 protocol */
3220 int proto;
3221 /** port number, network order */
3222 uint16_t port;
3223 } bypass_t;
3224
3225 /**
3226 * Add or remove a bypass policy from/to kernel
3227 */
3228 static bool manage_bypass(private_kernel_netlink_ipsec_t *this,
3229 int type, policy_dir_t dir, bypass_t *bypass)
3230 {
3231 netlink_buf_t request;
3232 struct xfrm_selector *sel;
3233 struct nlmsghdr *hdr;
3234
3235 memset(&request, 0, sizeof(request));
3236 hdr = &request.hdr;
3237 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3238 hdr->nlmsg_type = type;
3239
3240 if (type == XFRM_MSG_NEWPOLICY)
3241 {
3242 struct xfrm_userpolicy_info *policy;
3243
3244 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info));
3245
3246 policy = NLMSG_DATA(hdr);
3247 policy->dir = dir;
3248 policy->priority = 32;
3249 policy->action = XFRM_POLICY_ALLOW;
3250 policy->share = XFRM_SHARE_ANY;
3251
3252 policy->lft.soft_byte_limit = XFRM_INF;
3253 policy->lft.soft_packet_limit = XFRM_INF;
3254 policy->lft.hard_byte_limit = XFRM_INF;
3255 policy->lft.hard_packet_limit = XFRM_INF;
3256
3257 sel = &policy->sel;
3258 }
3259 else /* XFRM_MSG_DELPOLICY */
3260 {
3261 struct xfrm_userpolicy_id *policy;
3262
3263 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
3264
3265 policy = NLMSG_DATA(hdr);
3266 policy->dir = dir;
3267
3268 sel = &policy->sel;
3269 }
3270
3271 sel->family = bypass->family;
3272 sel->proto = bypass->proto;
3273 if (dir == POLICY_IN)
3274 {
3275 sel->dport = bypass->port;
3276 sel->dport_mask = 0xffff;
3277 }
3278 else
3279 {
3280 sel->sport = bypass->port;
3281 sel->sport_mask = 0xffff;
3282 }
3283 return this->socket_xfrm->send_ack(this->socket_xfrm, hdr) == SUCCESS;
3284 }
3285
3286 /**
3287 * Bypass socket using a port-based bypass policy
3288 */
3289 static bool add_port_bypass(private_kernel_netlink_ipsec_t *this,
3290 int fd, int family)
3291 {
3292 union {
3293 struct sockaddr sa;
3294 struct sockaddr_in in;
3295 struct sockaddr_in6 in6;
3296 } saddr;
3297 socklen_t len;
3298 bypass_t bypass = {
3299 .family = family,
3300 };
3301
3302 len = sizeof(saddr);
3303 if (getsockname(fd, &saddr.sa, &len) != 0)
3304 {
3305 return FALSE;
3306 }
3307 #ifdef SO_PROTOCOL /* since 2.6.32 */
3308 len = sizeof(bypass.proto);
3309 if (getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &bypass.proto, &len) != 0)
3310 #endif
3311 { /* assume UDP if SO_PROTOCOL not supported */
3312 bypass.proto = IPPROTO_UDP;
3313 }
3314 switch (family)
3315 {
3316 case AF_INET:
3317 bypass.port = saddr.in.sin_port;
3318 break;
3319 case AF_INET6:
3320 bypass.port = saddr.in6.sin6_port;
3321 break;
3322 default:
3323 return FALSE;
3324 }
3325
3326 if (!manage_bypass(this, XFRM_MSG_NEWPOLICY, POLICY_IN, &bypass))
3327 {
3328 return FALSE;
3329 }
3330 if (!manage_bypass(this, XFRM_MSG_NEWPOLICY, POLICY_OUT, &bypass))
3331 {
3332 manage_bypass(this, XFRM_MSG_DELPOLICY, POLICY_IN, &bypass);
3333 return FALSE;
3334 }
3335 array_insert(this->bypass, ARRAY_TAIL, &bypass);
3336
3337 return TRUE;
3338 }
3339
3340 /**
3341 * Remove installed port based bypass policy
3342 */
3343 static void remove_port_bypass(bypass_t *bypass, int idx,
3344 private_kernel_netlink_ipsec_t *this)
3345 {
3346 manage_bypass(this, XFRM_MSG_DELPOLICY, POLICY_OUT, bypass);
3347 manage_bypass(this, XFRM_MSG_DELPOLICY, POLICY_IN, bypass);
3348 }
3349
3350 METHOD(kernel_ipsec_t, bypass_socket, bool,
3351 private_kernel_netlink_ipsec_t *this, int fd, int family)
3352 {
3353 if (lib->settings->get_bool(lib->settings,
3354 "%s.plugins.kernel-netlink.port_bypass", FALSE, lib->ns))
3355 {
3356 return add_port_bypass(this, fd, family);
3357 }
3358 return add_socket_bypass(this, fd, family);
3359 }
3360
3361 METHOD(kernel_ipsec_t, enable_udp_decap, bool,
3362 private_kernel_netlink_ipsec_t *this, int fd, int family, uint16_t port)
3363 {
3364 int type = UDP_ENCAP_ESPINUDP;
3365
3366 if (setsockopt(fd, SOL_UDP, UDP_ENCAP, &type, sizeof(type)) < 0)
3367 {
3368 DBG1(DBG_KNL, "unable to set UDP_ENCAP: %s", strerror(errno));
3369 return FALSE;
3370 }
3371 return TRUE;
3372 }
3373
3374 METHOD(kernel_ipsec_t, destroy, void,
3375 private_kernel_netlink_ipsec_t *this)
3376 {
3377 enumerator_t *enumerator;
3378 policy_entry_t *policy;
3379
3380 array_destroy_function(this->bypass,
3381 (array_callback_t)remove_port_bypass, this);
3382 if (this->socket_xfrm_events > 0)
3383 {
3384 lib->watcher->remove(lib->watcher, this->socket_xfrm_events);
3385 close(this->socket_xfrm_events);
3386 }
3387 DESTROY_IF(this->socket_xfrm);
3388 enumerator = this->policies->create_enumerator(this->policies);
3389 while (enumerator->enumerate(enumerator, &policy, &policy))
3390 {
3391 policy_entry_destroy(this, policy);
3392 }
3393 enumerator->destroy(enumerator);
3394 this->policies->destroy(this->policies);
3395 this->sas->destroy(this->sas);
3396 this->condvar->destroy(this->condvar);
3397 this->mutex->destroy(this->mutex);
3398 free(this);
3399 }
3400
3401 /**
3402 * Get the currently configured SPD hashing thresholds for an address family
3403 */
3404 static bool get_spd_hash_thresh(private_kernel_netlink_ipsec_t *this,
3405 int type, uint8_t *lbits, uint8_t *rbits)
3406 {
3407 netlink_buf_t request;
3408 struct nlmsghdr *hdr, *out;
3409 struct xfrmu_spdhthresh *thresh;
3410 struct rtattr *rta;
3411 size_t len, rtasize;
3412 bool success = FALSE;
3413
3414 memset(&request, 0, sizeof(request));
3415
3416 hdr = &request.hdr;
3417 hdr->nlmsg_flags = NLM_F_REQUEST;
3418 hdr->nlmsg_type = XFRM_MSG_GETSPDINFO;
3419 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(uint32_t));
3420
3421 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
3422 {
3423 hdr = out;
3424 while (NLMSG_OK(hdr, len))
3425 {
3426 switch (hdr->nlmsg_type)
3427 {
3428 case XFRM_MSG_NEWSPDINFO:
3429 {
3430 rta = XFRM_RTA(hdr, uint32_t);
3431 rtasize = XFRM_PAYLOAD(hdr, uint32_t);
3432 while (RTA_OK(rta, rtasize))
3433 {
3434 if (rta->rta_type == type &&
3435 RTA_PAYLOAD(rta) == sizeof(*thresh))
3436 {
3437 thresh = RTA_DATA(rta);
3438 *lbits = thresh->lbits;
3439 *rbits = thresh->rbits;
3440 success = TRUE;
3441 break;
3442 }
3443 rta = RTA_NEXT(rta, rtasize);
3444 }
3445 break;
3446 }
3447 case NLMSG_ERROR:
3448 {
3449 struct nlmsgerr *err = NLMSG_DATA(hdr);
3450 DBG1(DBG_KNL, "getting SPD hash threshold failed: %s (%d)",
3451 strerror(-err->error), -err->error);
3452 break;
3453 }
3454 default:
3455 hdr = NLMSG_NEXT(hdr, len);
3456 continue;
3457 case NLMSG_DONE:
3458 break;
3459 }
3460 break;
3461 }
3462 free(out);
3463 }
3464 return success;
3465 }
3466
3467 /**
3468 * Configure SPD hashing threshold for an address family
3469 */
3470 static void setup_spd_hash_thresh(private_kernel_netlink_ipsec_t *this,
3471 char *key, int type, uint8_t def)
3472 {
3473 struct xfrmu_spdhthresh *thresh;
3474 struct nlmsghdr *hdr;
3475 netlink_buf_t request;
3476 uint8_t lbits, rbits;
3477
3478 if (!get_spd_hash_thresh(this, type, &lbits, &rbits))
3479 {
3480 return;
3481 }
3482 memset(&request, 0, sizeof(request));
3483
3484 hdr = &request.hdr;
3485 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3486 hdr->nlmsg_type = XFRM_MSG_NEWSPDINFO;
3487 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(uint32_t));
3488
3489 thresh = netlink_reserve(hdr, sizeof(request), type, sizeof(*thresh));
3490 thresh->lbits = lib->settings->get_int(lib->settings,
3491 "%s.plugins.kernel-netlink.spdh_thresh.%s.lbits",
3492 def, lib->ns, key);
3493 thresh->rbits = lib->settings->get_int(lib->settings,
3494 "%s.plugins.kernel-netlink.spdh_thresh.%s.rbits",
3495 def, lib->ns, key);
3496 if (thresh->lbits != lbits || thresh->rbits != rbits)
3497 {
3498 if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
3499 {
3500 DBG1(DBG_KNL, "setting SPD hash threshold failed");
3501 }
3502 }
3503 }
3504
3505 /*
3506 * Described in header.
3507 */
3508 kernel_netlink_ipsec_t *kernel_netlink_ipsec_create()
3509 {
3510 private_kernel_netlink_ipsec_t *this;
3511 bool register_for_events = TRUE;
3512
3513 INIT(this,
3514 .public = {
3515 .interface = {
3516 .get_features = _get_features,
3517 .get_spi = _get_spi,
3518 .get_cpi = _get_cpi,
3519 .add_sa = _add_sa,
3520 .update_sa = _update_sa,
3521 .query_sa = _query_sa,
3522 .del_sa = _del_sa,
3523 .flush_sas = _flush_sas,
3524 .add_policy = _add_policy,
3525 .query_policy = _query_policy,
3526 .del_policy = _del_policy,
3527 .flush_policies = _flush_policies,
3528 .bypass_socket = _bypass_socket,
3529 .enable_udp_decap = _enable_udp_decap,
3530 .destroy = _destroy,
3531 },
3532 },
3533 .policies = hashtable_create((hashtable_hash_t)policy_hash,
3534 (hashtable_equals_t)policy_equals, 32),
3535 .sas = hashtable_create((hashtable_hash_t)ipsec_sa_hash,
3536 (hashtable_equals_t)ipsec_sa_equals, 32),
3537 .bypass = array_create(sizeof(bypass_t), 0),
3538 .mutex = mutex_create(MUTEX_TYPE_DEFAULT),
3539 .condvar = condvar_create(CONDVAR_TYPE_DEFAULT),
3540 .get_priority = dlsym(RTLD_DEFAULT,
3541 "kernel_netlink_get_priority_custom"),
3542 .policy_update = lib->settings->get_bool(lib->settings,
3543 "%s.plugins.kernel-netlink.policy_update", FALSE, lib->ns),
3544 .install_routes = lib->settings->get_bool(lib->settings,
3545 "%s.install_routes", TRUE, lib->ns),
3546 .proto_port_transport = lib->settings->get_bool(lib->settings,
3547 "%s.plugins.kernel-netlink.set_proto_port_transport_sa",
3548 FALSE, lib->ns),
3549 );
3550
3551 if (streq(lib->ns, "starter"))
3552 { /* starter has no threads, so we do not register for kernel events */
3553 register_for_events = FALSE;
3554 }
3555
3556 this->socket_xfrm = netlink_socket_create(NETLINK_XFRM, xfrm_msg_names,
3557 lib->settings->get_bool(lib->settings,
3558 "%s.plugins.kernel-netlink.parallel_xfrm", FALSE, lib->ns));
3559 if (!this->socket_xfrm)
3560 {
3561 destroy(this);
3562 return NULL;
3563 }
3564
3565 setup_spd_hash_thresh(this, "ipv4", XFRMA_SPD_IPV4_HTHRESH, 32);
3566 setup_spd_hash_thresh(this, "ipv6", XFRMA_SPD_IPV6_HTHRESH, 128);
3567
3568 if (register_for_events)
3569 {
3570 struct sockaddr_nl addr;
3571
3572 memset(&addr, 0, sizeof(addr));
3573 addr.nl_family = AF_NETLINK;
3574
3575 /* create and bind XFRM socket for ACQUIRE, EXPIRE, MIGRATE & MAPPING */
3576 this->socket_xfrm_events = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
3577 if (this->socket_xfrm_events <= 0)
3578 {
3579 DBG1(DBG_KNL, "unable to create XFRM event socket: %s (%d)",
3580 strerror(errno), errno);
3581 destroy(this);
3582 return NULL;
3583 }
3584 addr.nl_groups = XFRMNLGRP(ACQUIRE) | XFRMNLGRP(EXPIRE) |
3585 XFRMNLGRP(MIGRATE) | XFRMNLGRP(MAPPING);
3586 if (bind(this->socket_xfrm_events, (struct sockaddr*)&addr, sizeof(addr)))
3587 {
3588 DBG1(DBG_KNL, "unable to bind XFRM event socket: %s (%d)",
3589 strerror(errno), errno);
3590 destroy(this);
3591 return NULL;
3592 }
3593 lib->watcher->add(lib->watcher, this->socket_xfrm_events, WATCHER_READ,
3594 (watcher_cb_t)receive_events, this);
3595 }
3596
3597 return &this->public;
3598 }