]> git.ipfire.org Git - thirdparty/strongswan.git/blame_incremental - src/libcharon/plugins/kernel_netlink/kernel_netlink_ipsec.c
nm: Don't set DL_LIBS to 'none required' in configure script
[thirdparty/strongswan.git] / src / libcharon / plugins / kernel_netlink / kernel_netlink_ipsec.c
... / ...
CommitLineData
1/*
2 * Copyright (C) 2006-2023 Tobias Brunner
3 * Copyright (C) 2005-2009 Martin Willi
4 * Copyright (C) 2008-2016 Andreas Steffen
5 * Copyright (C) 2006-2007 Fabian Hartmann, Noah Heusser
6 * Copyright (C) 2006 Daniel Roethlisberger
7 * Copyright (C) 2005 Jan Hutter
8 *
9 * Copyright (C) secunet Security Networks AG
10 *
11 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2 of the License, or (at your
14 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
15 *
16 * This program is distributed in the hope that it will be useful, but
17 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 * for more details.
20 */
21/*
22 * Copyright (C) 2018 Mellanox Technologies.
23 *
24 * Permission is hereby granted, free of charge, to any person obtaining a copy
25 * of this software and associated documentation files (the "Software"), to deal
26 * in the Software without restriction, including without limitation the rights
27 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
28 * copies of the Software, and to permit persons to whom the Software is
29 * furnished to do so, subject to the following conditions:
30 *
31 * The above copyright notice and this permission notice shall be included in
32 * all copies or substantial portions of the Software.
33 *
34 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
35 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
36 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
37 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
38 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
39 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
40 * THE SOFTWARE.
41 */
42
43#define _GNU_SOURCE
44#include <sys/types.h>
45#include <sys/socket.h>
46#include <sys/ioctl.h>
47#include <sys/utsname.h>
48#include <stdint.h>
49#include <linux/ipsec.h>
50#include <linux/netlink.h>
51#include <linux/rtnetlink.h>
52#include <linux/xfrm.h>
53#include <linux/udp.h>
54#include <linux/ethtool.h>
55#include <linux/sockios.h>
56#include <net/if.h>
57#include <unistd.h>
58#include <time.h>
59#include <errno.h>
60#include <string.h>
61#include <fcntl.h>
62#include <dlfcn.h>
63
64#include "kernel_netlink_ipsec.h"
65#include "kernel_netlink_shared.h"
66#include "kernel_netlink_xfrmi.h"
67
68#include <daemon.h>
69#include <utils/debug.h>
70#include <threading/mutex.h>
71#include <threading/condvar.h>
72#include <collections/array.h>
73#include <collections/hashtable.h>
74#include <collections/linked_list.h>
75
76/** Required for Linux 2.6.26 kernel and later */
77#ifndef XFRM_STATE_AF_UNSPEC
78#define XFRM_STATE_AF_UNSPEC 32
79#endif
80
81/** From linux/in.h */
82#ifndef IP_XFRM_POLICY
83#define IP_XFRM_POLICY 17
84#endif
85
86/** Missing on uclibc */
87#ifndef IPV6_XFRM_POLICY
88#define IPV6_XFRM_POLICY 34
89#endif /*IPV6_XFRM_POLICY*/
90
91/* from linux/udp.h */
92#ifndef UDP_ENCAP
93#define UDP_ENCAP 100
94#endif
95
96#ifndef UDP_ENCAP_ESPINUDP
97#define UDP_ENCAP_ESPINUDP 2
98#endif
99
100/* this is not defined on some platforms */
101#ifndef SOL_UDP
102#define SOL_UDP IPPROTO_UDP
103#endif
104
105/** Base priority for installed policies */
106#define PRIO_BASE 200000
107
108/**
109 * Map the limit for bytes and packets to XFRM_INF by default
110 */
111#define XFRM_LIMIT(x) ((x) == 0 ? XFRM_INF : (x))
112
113/**
114 * Returns a pointer to the first rtattr following the nlmsghdr *nlh and the
115 * 'usual' netlink data x like 'struct xfrm_usersa_info'
116 */
117#define XFRM_RTA(nlh, x) ((struct rtattr*)(NLMSG_DATA(nlh) + \
118 NLMSG_ALIGN(sizeof(x))))
119/**
120 * Returns the total size of attached rta data
121 * (after 'usual' netlink data x like 'struct xfrm_usersa_info')
122 */
123#define XFRM_PAYLOAD(nlh, x) NLMSG_PAYLOAD(nlh, sizeof(x))
124
125typedef struct kernel_algorithm_t kernel_algorithm_t;
126
127/**
128 * Mapping of IKEv2 kernel identifier to linux crypto API names
129 */
130struct kernel_algorithm_t {
131 /**
132 * Identifier specified in IKEv2
133 */
134 int ikev2;
135
136 /**
137 * Name of the algorithm in linux crypto API
138 */
139 const char *name;
140};
141
142ENUM(xfrm_msg_names, XFRM_MSG_NEWSA, __XFRM_MSG_MAX,
143 "XFRM_MSG_NEWSA",
144 "XFRM_MSG_DELSA",
145 "XFRM_MSG_GETSA",
146 "XFRM_MSG_NEWPOLICY",
147 "XFRM_MSG_DELPOLICY",
148 "XFRM_MSG_GETPOLICY",
149 "XFRM_MSG_ALLOCSPI",
150 "XFRM_MSG_ACQUIRE",
151 "XFRM_MSG_EXPIRE",
152 "XFRM_MSG_UPDPOLICY",
153 "XFRM_MSG_UPDSA",
154 "XFRM_MSG_POLEXPIRE",
155 "XFRM_MSG_FLUSHSA",
156 "XFRM_MSG_FLUSHPOLICY",
157 "XFRM_MSG_NEWAE",
158 "XFRM_MSG_GETAE",
159 "XFRM_MSG_REPORT",
160 "XFRM_MSG_MIGRATE",
161 "XFRM_MSG_NEWSADINFO",
162 "XFRM_MSG_GETSADINFO",
163 "XFRM_MSG_NEWSPDINFO",
164 "XFRM_MSG_GETSPDINFO",
165 "XFRM_MSG_MAPPING",
166 "XFRM_MSG_SETDEFAULT",
167 "XFRM_MSG_GETDEFAULT",
168 "XFRM_MSG_MAX",
169);
170
171ENUM(xfrm_attr_type_names, XFRMA_UNSPEC, __XFRMA_MAX,
172 "XFRMA_UNSPEC",
173 "XFRMA_ALG_AUTH",
174 "XFRMA_ALG_CRYPT",
175 "XFRMA_ALG_COMP",
176 "XFRMA_ENCAP",
177 "XFRMA_TMPL",
178 "XFRMA_SA",
179 "XFRMA_POLICY",
180 "XFRMA_SEC_CTX",
181 "XFRMA_LTIME_VAL",
182 "XFRMA_REPLAY_VAL",
183 "XFRMA_REPLAY_THRESH",
184 "XFRMA_ETIMER_THRESH",
185 "XFRMA_SRCADDR",
186 "XFRMA_COADDR",
187 "XFRMA_LASTUSED",
188 "XFRMA_POLICY_TYPE",
189 "XFRMA_MIGRATE",
190 "XFRMA_ALG_AEAD",
191 "XFRMA_KMADDRESS",
192 "XFRMA_ALG_AUTH_TRUNC",
193 "XFRMA_MARK",
194 "XFRMA_TFCPAD",
195 "XFRMA_REPLAY_ESN_VAL",
196 "XFRMA_SA_EXTRA_FLAGS",
197 "XFRMA_PROTO",
198 "XFRMA_ADDRESS_FILTER",
199 "XFRMA_PAD",
200 "XFRMA_OFFLOAD_DEV",
201 "XFRMA_SET_MARK",
202 "XFRMA_SET_MARK_MASK",
203 "XFRMA_IF_ID",
204 "XFRMA_MTIMER_THRESH",
205 "XFRMA_SA_DIR",
206 "XFRMA_NAT_KEEPALIVE_INTERVAL",
207 "XFRMA_SA_PCPU",
208 "XFRMA_IPTFS_DROP_TIME",
209 "XFRMA_IPTFS_REORDER_WINDOW",
210 "XFRMA_IPTFS_DONT_FRAG",
211 "XFRMA_IPTFS_INIT_DELAY",
212 "XFRMA_IPTFS_MAX_QSIZE",
213 "XFRMA_IPTFS_PKT_SIZE",
214 "XFRMA_MAX",
215);
216
217/**
218 * Algorithms for encryption
219 */
220static kernel_algorithm_t encryption_algs[] = {
221/* {ENCR_DES_IV64, "***" }, */
222 {ENCR_DES, "des" },
223 {ENCR_3DES, "des3_ede" },
224/* {ENCR_RC5, "***" }, */
225/* {ENCR_IDEA, "***" }, */
226 {ENCR_CAST, "cast5" },
227 {ENCR_BLOWFISH, "blowfish" },
228/* {ENCR_3IDEA, "***" }, */
229/* {ENCR_DES_IV32, "***" }, */
230 {ENCR_NULL, "cipher_null" },
231 {ENCR_AES_CBC, "aes" },
232 {ENCR_AES_CTR, "rfc3686(ctr(aes))" },
233 {ENCR_AES_CCM_ICV8, "rfc4309(ccm(aes))" },
234 {ENCR_AES_CCM_ICV12, "rfc4309(ccm(aes))" },
235 {ENCR_AES_CCM_ICV16, "rfc4309(ccm(aes))" },
236 {ENCR_AES_GCM_ICV8, "rfc4106(gcm(aes))" },
237 {ENCR_AES_GCM_ICV12, "rfc4106(gcm(aes))" },
238 {ENCR_AES_GCM_ICV16, "rfc4106(gcm(aes))" },
239 {ENCR_NULL_AUTH_AES_GMAC, "rfc4543(gcm(aes))" },
240 {ENCR_CAMELLIA_CBC, "cbc(camellia)" },
241/* {ENCR_CAMELLIA_CTR, "***" }, */
242/* {ENCR_CAMELLIA_CCM_ICV8, "***" }, */
243/* {ENCR_CAMELLIA_CCM_ICV12, "***" }, */
244/* {ENCR_CAMELLIA_CCM_ICV16, "***" }, */
245 {ENCR_SERPENT_CBC, "serpent" },
246 {ENCR_TWOFISH_CBC, "twofish" },
247 {ENCR_CHACHA20_POLY1305, "rfc7539esp(chacha20,poly1305)"},
248};
249
250/**
251 * Algorithms for integrity protection
252 */
253static kernel_algorithm_t integrity_algs[] = {
254 {AUTH_HMAC_MD5_96, "md5" },
255 {AUTH_HMAC_MD5_128, "hmac(md5)" },
256 {AUTH_HMAC_SHA1_96, "sha1" },
257 {AUTH_HMAC_SHA1_160, "hmac(sha1)" },
258 {AUTH_HMAC_SHA2_256_96, "sha256" },
259 {AUTH_HMAC_SHA2_256_128, "hmac(sha256)" },
260 {AUTH_HMAC_SHA2_256_256, "hmac(sha256)" },
261 {AUTH_HMAC_SHA2_384_192, "hmac(sha384)" },
262 {AUTH_HMAC_SHA2_384_384, "hmac(sha384)" },
263 {AUTH_HMAC_SHA2_512_256, "hmac(sha512)" },
264 {AUTH_HMAC_SHA2_512_512, "hmac(sha512)" },
265/* {AUTH_DES_MAC, "***" }, */
266/* {AUTH_KPDK_MD5, "***" }, */
267 {AUTH_AES_XCBC_96, "xcbc(aes)" },
268 {AUTH_AES_CMAC_96, "cmac(aes)" },
269};
270
271/**
272 * Algorithms for IPComp
273 */
274static kernel_algorithm_t compression_algs[] = {
275/* {IPCOMP_OUI, "***" }, */
276 {IPCOMP_DEFLATE, "deflate" },
277 {IPCOMP_LZS, "lzs" },
278 {IPCOMP_LZJH, "lzjh" },
279};
280
281/**
282 * Look up a kernel algorithm name and its key size
283 */
284static const char* lookup_algorithm(transform_type_t type, int ikev2)
285{
286 kernel_algorithm_t *list;
287 int i, count;
288 char *name;
289
290 switch (type)
291 {
292 case ENCRYPTION_ALGORITHM:
293 list = encryption_algs;
294 count = countof(encryption_algs);
295 break;
296 case INTEGRITY_ALGORITHM:
297 list = integrity_algs;
298 count = countof(integrity_algs);
299 break;
300 case COMPRESSION_ALGORITHM:
301 list = compression_algs;
302 count = countof(compression_algs);
303 break;
304 default:
305 return NULL;
306 }
307 for (i = 0; i < count; i++)
308 {
309 if (list[i].ikev2 == ikev2)
310 {
311 return list[i].name;
312 }
313 }
314 if (charon->kernel->lookup_algorithm(charon->kernel, ikev2, type, NULL,
315 &name))
316 {
317 return name;
318 }
319 return NULL;
320}
321
322typedef struct private_kernel_netlink_ipsec_t private_kernel_netlink_ipsec_t;
323
324/**
325 * Private variables and functions of kernel_netlink class.
326 */
327struct private_kernel_netlink_ipsec_t {
328 /**
329 * Public part of the kernel_netlink_t object
330 */
331 kernel_netlink_ipsec_t public;
332
333 /**
334 * Mutex to lock access to installed policies
335 */
336 mutex_t *mutex;
337
338 /**
339 * Condvar to synchronize access to individual policies
340 */
341 condvar_t *condvar;
342
343 /**
344 * Hash table of installed policies (policy_entry_t)
345 */
346 hashtable_t *policies;
347
348 /**
349 * Hash table of IPsec SAs using policies (ipsec_sa_t)
350 */
351 hashtable_t *sas;
352
353 /**
354 * Netlink xfrm socket (IPsec)
355 */
356 netlink_socket_t *socket_xfrm;
357
358 /**
359 * XFRM interface manager
360 */
361 kernel_netlink_xfrmi_t *xfrmi;
362
363 /**
364 * Netlink xfrm socket to receive acquire and expire events
365 */
366 netlink_event_socket_t *socket_xfrm_events;
367
368 /**
369 * Whether the kernel reports the last use time on SAs
370 */
371 bool sa_lastused;
372
373 /**
374 * Whether the kernel supports setting the SA direction
375 */
376 bool sa_dir;
377
378 /**
379 * Whether to install routes along policies
380 */
381 bool install_routes;
382
383 /**
384 * Whether to install routes via XFRM interfaces
385 */
386 bool install_routes_xfrmi;
387
388 /**
389 * Whether to set protocol and ports on selector installed with transport
390 * mode IPsec SAs
391 */
392 bool proto_port_transport;
393
394 /**
395 * Whether to always use UPDATE to install policies
396 */
397 bool policy_update;
398
399 /**
400 * Whether to use port-based policies instead of socket policies for the
401 * IKE sockets/ports
402 */
403 bool port_bypass;
404
405 /**
406 * Installed port-based IKE bypass policies, as bypass_t
407 *
408 * If they are potentially offloaded, the offload mutex has to be locked
409 * when modifying it
410 */
411 array_t *bypass;
412
413 /**
414 * Interfaces that potentially support HW offloading, as offload_iface_t
415 */
416 hashtable_t *offload_interfaces;
417
418 /**
419 * Mutex to safely access the interfaces and bypasses
420 */
421 mutex_t *offload_mutex;
422
423 /**
424 * Netlink routing socket to receive link events
425 */
426 netlink_event_socket_t *socket_link_events;
427
428 /**
429 * Custom priority calculation function
430 */
431 uint32_t (*get_priority)(kernel_ipsec_policy_id_t *id,
432 kernel_ipsec_manage_policy_t *data);
433};
434
435typedef struct ipsec_sa_t ipsec_sa_t;
436
437/**
438 * IPsec SA assigned to a policy.
439 */
440struct ipsec_sa_t {
441 /** Source address of this SA */
442 host_t *src;
443
444 /** Destination address of this SA */
445 host_t *dst;
446
447 /** Optional mark */
448 mark_t mark;
449
450 /** Optional mark */
451 uint32_t if_id;
452
453 /** Optional HW offload */
454 hw_offload_t hw_offload;
455
456 /** Description of this SA */
457 ipsec_sa_cfg_t cfg;
458
459 /** Reference count for this SA */
460 refcount_t refcount;
461};
462
463/**
464 * Hash function for ipsec_sa_t objects
465 */
466static u_int ipsec_sa_hash(ipsec_sa_t *sa)
467{
468 return chunk_hash_inc(sa->src->get_address(sa->src),
469 chunk_hash_inc(sa->dst->get_address(sa->dst),
470 chunk_hash_inc(chunk_from_thing(sa->mark),
471 chunk_hash_inc(chunk_from_thing(sa->if_id),
472 chunk_hash_inc(chunk_from_thing(sa->hw_offload),
473 chunk_hash(chunk_from_thing(sa->cfg)))))));
474}
475
476/**
477 * Equality function for ipsec_sa_t objects
478 */
479static bool ipsec_sa_equals(ipsec_sa_t *sa, ipsec_sa_t *other_sa)
480{
481 return sa->src->ip_equals(sa->src, other_sa->src) &&
482 sa->dst->ip_equals(sa->dst, other_sa->dst) &&
483 sa->mark.value == other_sa->mark.value &&
484 sa->mark.mask == other_sa->mark.mask &&
485 sa->if_id == other_sa->if_id &&
486 sa->hw_offload == other_sa->hw_offload &&
487 ipsec_sa_cfg_equals(&sa->cfg, &other_sa->cfg);
488}
489
490/**
491 * Allocate or reference an IPsec SA object
492 */
493static ipsec_sa_t *ipsec_sa_create(private_kernel_netlink_ipsec_t *this,
494 host_t *src, host_t *dst, mark_t mark,
495 uint32_t if_id, hw_offload_t hw_offload,
496 ipsec_sa_cfg_t *cfg)
497{
498 ipsec_sa_t *sa, *found;
499 INIT(sa,
500 .src = src,
501 .dst = dst,
502 .mark = mark,
503 .if_id = if_id,
504 .hw_offload = hw_offload,
505 .cfg = *cfg,
506 );
507 found = this->sas->get(this->sas, sa);
508 if (!found)
509 {
510 sa->src = src->clone(src);
511 sa->dst = dst->clone(dst);
512 this->sas->put(this->sas, sa, sa);
513 }
514 else
515 {
516 free(sa);
517 sa = found;
518 }
519 ref_get(&sa->refcount);
520 return sa;
521}
522
523/**
524 * Release and destroy an IPsec SA object
525 */
526static void ipsec_sa_destroy(private_kernel_netlink_ipsec_t *this,
527 ipsec_sa_t *sa)
528{
529 if (ref_put(&sa->refcount))
530 {
531 this->sas->remove(this->sas, sa);
532 DESTROY_IF(sa->src);
533 DESTROY_IF(sa->dst);
534 free(sa);
535 }
536}
537
538typedef struct policy_sa_t policy_sa_t;
539typedef struct policy_sa_out_t policy_sa_out_t;
540
541/**
542 * Mapping between a policy and an IPsec SA.
543 */
544struct policy_sa_t {
545 /** Priority assigned to the policy when installed with this SA */
546 uint32_t priority;
547
548 /** Automatic priority assigned to the policy when installed with this SA */
549 uint32_t auto_priority;
550
551 /** Type of the policy */
552 policy_type_t type;
553
554 /** Whether to trigger per-CPU acquires for this policy */
555 bool pcpu_acquires;
556
557 /** Assigned SA */
558 ipsec_sa_t *sa;
559};
560
561/**
562 * For outbound policies we also cache the traffic selectors in order to install
563 * the route.
564 */
565struct policy_sa_out_t {
566 /** Generic interface */
567 policy_sa_t generic;
568
569 /** Source traffic selector of this policy */
570 traffic_selector_t *src_ts;
571
572 /** Destination traffic selector of this policy */
573 traffic_selector_t *dst_ts;
574};
575
576/**
577 * Create a policy_sa(_out)_t object
578 */
579static policy_sa_t *policy_sa_create(private_kernel_netlink_ipsec_t *this,
580 policy_dir_t dir, policy_type_t type, host_t *src, host_t *dst,
581 traffic_selector_t *src_ts, traffic_selector_t *dst_ts, mark_t mark,
582 uint32_t if_id, hw_offload_t hw_offload, bool pcpu_acquires,
583 ipsec_sa_cfg_t *cfg)
584{
585 policy_sa_t *policy;
586
587 if (dir == POLICY_OUT)
588 {
589 policy_sa_out_t *out;
590 INIT(out,
591 .src_ts = src_ts->clone(src_ts),
592 .dst_ts = dst_ts->clone(dst_ts),
593 );
594 policy = &out->generic;
595 }
596 else
597 {
598 INIT(policy, .priority = 0);
599 }
600 policy->type = type;
601 policy->pcpu_acquires = pcpu_acquires;
602 policy->sa = ipsec_sa_create(this, src, dst, mark, if_id, hw_offload, cfg);
603 return policy;
604}
605
606/**
607 * Destroy a policy_sa(_in)_t object
608 */
609static void policy_sa_destroy(policy_sa_t *policy, policy_dir_t dir,
610 private_kernel_netlink_ipsec_t *this)
611{
612 if (dir == POLICY_OUT)
613 {
614 policy_sa_out_t *out = (policy_sa_out_t*)policy;
615 out->src_ts->destroy(out->src_ts);
616 out->dst_ts->destroy(out->dst_ts);
617 }
618 ipsec_sa_destroy(this, policy->sa);
619 free(policy);
620}
621
622CALLBACK(policy_sa_destroy_cb, void,
623 policy_sa_t *policy, va_list args)
624{
625 private_kernel_netlink_ipsec_t *this;
626 policy_dir_t dir;
627
628 VA_ARGS_VGET(args, dir, this);
629 policy_sa_destroy(policy, dir, this);
630}
631
632typedef struct policy_entry_t policy_entry_t;
633
634/**
635 * Installed kernel policy.
636 */
637struct policy_entry_t {
638
639 /** Direction of this policy: in, out, forward */
640 uint8_t direction;
641
642 /** Parameters of installed policy */
643 struct xfrm_selector sel;
644
645 /** Optional mark */
646 uint32_t mark;
647
648 /** Optional interface ID */
649 uint32_t if_id;
650
651 /** Optional security label */
652 sec_label_t *label;
653
654 /** Associated route installed for this policy */
655 route_entry_t *route;
656
657 /** List of SAs this policy is used by, ordered by priority */
658 linked_list_t *used_by;
659
660 /** reqid for this policy */
661 uint32_t reqid;
662
663 /** Number of threads waiting to work on this policy */
664 int waiting;
665
666 /** TRUE if a thread is working on this policy */
667 bool working;
668};
669
670/**
671 * Destroy a policy_entry_t object
672 */
673static void policy_entry_destroy(private_kernel_netlink_ipsec_t *this,
674 policy_entry_t *policy)
675{
676 if (policy->route)
677 {
678 route_entry_destroy(policy->route);
679 }
680 if (policy->used_by)
681 {
682 policy->used_by->invoke_function(policy->used_by, policy_sa_destroy_cb,
683 policy->direction, this);
684 policy->used_by->destroy(policy->used_by);
685 }
686 DESTROY_IF(policy->label);
687 free(policy);
688}
689
690/**
691 * Hash function for policy_entry_t objects
692 */
693static u_int policy_hash(policy_entry_t *key)
694{
695 chunk_t chunk = chunk_from_thing(key->sel);
696 u_int hash;
697
698 hash = chunk_hash_inc(chunk, chunk_hash_inc(chunk_from_thing(key->mark),
699 chunk_hash(chunk_from_thing(key->if_id))));
700 if (key->label)
701 {
702 hash = key->label->hash(key->label, hash);
703 }
704 return hash;
705}
706
707/**
708 * Equality function for policy_entry_t objects
709 */
710static bool policy_equals(policy_entry_t *key, policy_entry_t *other_key)
711{
712 return memeq(&key->sel, &other_key->sel, sizeof(struct xfrm_selector)) &&
713 key->mark == other_key->mark &&
714 key->if_id == other_key->if_id &&
715 key->direction == other_key->direction &&
716 sec_labels_equal(key->label, other_key->label);
717}
718
719/**
720 * Determine number of set bits in 16 bit port mask
721 */
722static inline uint32_t port_mask_bits(uint16_t port_mask)
723{
724 uint32_t bits;
725 uint16_t bit_mask = 0x8000;
726
727 port_mask = ntohs(port_mask);
728
729 for (bits = 0; bits < 16; bits++)
730 {
731 if (!(port_mask & bit_mask))
732 {
733 break;
734 }
735 bit_mask >>= 1;
736 }
737 return bits;
738}
739
740/**
741 * Calculate the priority of a policy
742 *
743 * bits 0-0: separate trap and regular policies (0..1) 1 bit
744 * bits 1-1: restriction to network interface (0..1) 1 bit
745 * bits 2-7: src + dst port mask bits (2 * 0..16) 6 bits
746 * bits 8-8: restriction to protocol (0..1) 1 bit
747 * bits 9-17: src + dst network mask bits (2 * 0..128) 9 bits
748 * 18 bits
749 *
750 * smallest value: 000000000 0 000000 0 0: 0, lowest priority = 200'000
751 * largest value : 100000000 1 100000 1 1: 131'459, highst priority = 68'541
752 */
753static uint32_t get_priority(policy_entry_t *policy, policy_priority_t prio,
754 char *interface)
755{
756 uint32_t priority = PRIO_BASE, sport_mask_bits, dport_mask_bits;
757
758 switch (prio)
759 {
760 case POLICY_PRIORITY_FALLBACK:
761 priority += PRIO_BASE;
762 /* fall-through to next case */
763 case POLICY_PRIORITY_ROUTED:
764 case POLICY_PRIORITY_DEFAULT:
765 priority += PRIO_BASE;
766 /* fall-through to next case */
767 case POLICY_PRIORITY_PASS:
768 break;
769 }
770 sport_mask_bits = port_mask_bits(policy->sel.sport_mask);
771 dport_mask_bits = port_mask_bits(policy->sel.dport_mask);
772
773 /* calculate priority */
774 priority -= (policy->sel.prefixlen_s + policy->sel.prefixlen_d) * 512;
775 priority -= policy->sel.proto ? 256 : 0;
776 priority -= (sport_mask_bits + dport_mask_bits) * 4;
777 priority -= (interface != NULL) * 2;
778 priority -= (prio != POLICY_PRIORITY_ROUTED);
779
780 return priority;
781}
782
783/**
784 * Convert the general ipsec mode to the one defined in xfrm.h
785 */
786static uint8_t mode2kernel(ipsec_mode_t mode)
787{
788 switch (mode)
789 {
790 case MODE_TRANSPORT:
791 return XFRM_MODE_TRANSPORT;
792 case MODE_TUNNEL:
793 return XFRM_MODE_TUNNEL;
794 case MODE_BEET:
795 return XFRM_MODE_BEET;
796 case MODE_IPTFS:
797 return XFRM_MODE_IPTFS;
798 default:
799 return mode;
800 }
801}
802
803/**
804 * Convert a host_t to a struct xfrm_address
805 */
806static void host2xfrm(host_t *host, xfrm_address_t *xfrm)
807{
808 chunk_t chunk = host->get_address(host);
809 memcpy(xfrm, chunk.ptr, min(chunk.len, sizeof(xfrm_address_t)));
810}
811
812/**
813 * Convert a struct xfrm_address to a host_t
814 */
815static host_t* xfrm2host(int family, xfrm_address_t *xfrm, uint16_t port)
816{
817 chunk_t chunk;
818
819 switch (family)
820 {
821 case AF_INET:
822 chunk = chunk_create((u_char*)&xfrm->a4, sizeof(xfrm->a4));
823 break;
824 case AF_INET6:
825 chunk = chunk_create((u_char*)&xfrm->a6, sizeof(xfrm->a6));
826 break;
827 default:
828 return NULL;
829 }
830 return host_create_from_chunk(family, chunk, ntohs(port));
831}
832
833/**
834 * Convert a traffic selector address range to subnet and its mask.
835 */
836static void ts2subnet(traffic_selector_t* ts,
837 xfrm_address_t *net, uint8_t *mask)
838{
839 host_t *net_host;
840 chunk_t net_chunk;
841
842 ts->to_subnet(ts, &net_host, mask);
843 net_chunk = net_host->get_address(net_host);
844 memcpy(net, net_chunk.ptr, net_chunk.len);
845 net_host->destroy(net_host);
846}
847
848/**
849 * Convert a traffic selector port range to port/portmask
850 */
851static void ts2ports(traffic_selector_t* ts,
852 uint16_t *port, uint16_t *mask)
853{
854 uint16_t from, to, bitmask;
855 int bit;
856
857 from = ts->get_from_port(ts);
858 to = ts->get_to_port(ts);
859
860 /* Quick check for a single port */
861 if (from == to)
862 {
863 *port = htons(from);
864 *mask = ~0;
865 }
866 else
867 {
868 /* Compute the port mask for port ranges */
869 *mask = 0;
870
871 for (bit = 15; bit >= 0; bit--)
872 {
873 bitmask = 1 << bit;
874
875 if ((bitmask & from) != (bitmask & to))
876 {
877 *port = htons(from & *mask);
878 *mask = htons(*mask);
879 return;
880 }
881 *mask |= bitmask;
882 }
883 }
884 return;
885}
886
887/**
888 * Convert a pair of traffic_selectors to an xfrm_selector
889 */
890static struct xfrm_selector ts2selector(traffic_selector_t *src,
891 traffic_selector_t *dst,
892 char *interface)
893{
894 struct xfrm_selector sel;
895 uint16_t port;
896
897 memset(&sel, 0, sizeof(sel));
898 sel.family = (src->get_type(src) == TS_IPV4_ADDR_RANGE) ? AF_INET : AF_INET6;
899 /* src or dest proto may be "any" (0), use more restrictive one */
900 sel.proto = max(src->get_protocol(src), dst->get_protocol(dst));
901 ts2subnet(dst, &sel.daddr, &sel.prefixlen_d);
902 ts2subnet(src, &sel.saddr, &sel.prefixlen_s);
903 ts2ports(dst, &sel.dport, &sel.dport_mask);
904 ts2ports(src, &sel.sport, &sel.sport_mask);
905 if ((sel.proto == IPPROTO_ICMP || sel.proto == IPPROTO_ICMPV6) &&
906 (sel.dport || sel.sport))
907 {
908 /* the kernel expects the ICMP type and code in the source and
909 * destination port fields, respectively. */
910 port = ntohs(max(sel.dport, sel.sport));
911 sel.sport = htons(traffic_selector_icmp_type(port));
912 sel.sport_mask = sel.sport ? ~0 : 0;
913 sel.dport = htons(traffic_selector_icmp_code(port));
914 sel.dport_mask = sel.dport ? ~0 : 0;
915 }
916 sel.ifindex = interface ? if_nametoindex(interface) : 0;
917 sel.user = 0;
918
919 return sel;
920}
921
922/**
923 * Convert an xfrm_selector to a src|dst traffic_selector
924 */
925static traffic_selector_t* selector2ts(struct xfrm_selector *sel, bool src)
926{
927 u_char *addr;
928 uint8_t prefixlen;
929 uint16_t port = 0;
930 host_t *host = NULL;
931
932 if (src)
933 {
934 addr = (u_char*)&sel->saddr;
935 prefixlen = sel->prefixlen_s;
936 if (sel->sport_mask)
937 {
938 port = ntohs(sel->sport);
939 }
940 }
941 else
942 {
943 addr = (u_char*)&sel->daddr;
944 prefixlen = sel->prefixlen_d;
945 if (sel->dport_mask)
946 {
947 port = ntohs(sel->dport);
948 }
949 }
950 if (sel->proto == IPPROTO_ICMP || sel->proto == IPPROTO_ICMPV6)
951 { /* convert ICMP[v6] message type and code as supplied by the kernel in
952 * source and destination ports (both in network order) */
953 port = (sel->sport >> 8) | (sel->dport & 0xff00);
954 port = ntohs(port);
955 }
956 /* The Linux 2.6 kernel does not set the selector's family field,
957 * so as a kludge we additionally test the prefix length.
958 */
959 if (sel->family == AF_INET || sel->prefixlen_s == 32)
960 {
961 host = host_create_from_chunk(AF_INET, chunk_create(addr, 4), 0);
962 }
963 else if (sel->family == AF_INET6 || sel->prefixlen_s == 128)
964 {
965 host = host_create_from_chunk(AF_INET6, chunk_create(addr, 16), 0);
966 }
967
968 if (host)
969 {
970 return traffic_selector_create_from_subnet(host, prefixlen,
971 sel->proto, port, port ?: 65535);
972 }
973 return NULL;
974}
975
976/**
977 * Process a XFRM_MSG_ACQUIRE from kernel
978 */
979static void process_acquire(private_kernel_netlink_ipsec_t *this,
980 struct nlmsghdr *hdr)
981{
982 struct xfrm_user_acquire *acquire;
983 struct rtattr *rta;
984 size_t rtasize;
985 kernel_acquire_data_t data = {
986 .cpu = CPU_ID_MAX,
987 };
988 chunk_t label = chunk_empty;
989 uint32_t reqid = 0;
990 uint8_t proto;
991
992 acquire = NLMSG_DATA(hdr);
993 proto = acquire->id.proto;
994 rta = XFRM_RTA(hdr, struct xfrm_user_acquire);
995 rtasize = XFRM_PAYLOAD(hdr, struct xfrm_user_acquire);
996
997 DBG2(DBG_KNL, "received a XFRM_MSG_ACQUIRE");
998
999 while (RTA_OK(rta, rtasize))
1000 {
1001 DBG2(DBG_KNL, " %N", xfrm_attr_type_names, rta->rta_type);
1002
1003 if (rta->rta_type == XFRMA_TMPL)
1004 {
1005 struct xfrm_user_tmpl* tmpl = RTA_DATA(rta);
1006 reqid = tmpl->reqid;
1007 }
1008 if (rta->rta_type == XFRMA_SA_PCPU)
1009 {
1010 data.cpu = *(uint32_t*)RTA_DATA(rta);
1011 }
1012#ifdef USE_SELINUX
1013 if (rta->rta_type == XFRMA_SEC_CTX)
1014 {
1015 struct xfrm_user_sec_ctx *ctx = RTA_DATA(rta);
1016
1017 if (ctx->ctx_doi == XFRM_SC_DOI_LSM &&
1018 ctx->ctx_alg == XFRM_SC_ALG_SELINUX)
1019 {
1020 label = chunk_create((void*)(ctx + 1), ctx->ctx_len);
1021 }
1022 }
1023#endif
1024 rta = RTA_NEXT(rta, rtasize);
1025 }
1026 switch (proto)
1027 {
1028 case 0:
1029 case IPPROTO_ESP:
1030 case IPPROTO_AH:
1031 break;
1032 default:
1033 /* acquire for AH/ESP only, not for IPCOMP */
1034 return;
1035 }
1036 data.src = selector2ts(&acquire->sel, TRUE);
1037 data.dst = selector2ts(&acquire->sel, FALSE);
1038 data.label = label.len ? sec_label_from_encoding(label) : NULL;
1039 data.seq = acquire->seq;
1040
1041 charon->kernel->acquire(charon->kernel, reqid, &data);
1042
1043 DESTROY_IF(data.src);
1044 DESTROY_IF(data.dst);
1045 DESTROY_IF(data.label);
1046}
1047
1048/**
1049 * Process a XFRM_MSG_EXPIRE from kernel
1050 */
1051static void process_expire(private_kernel_netlink_ipsec_t *this,
1052 struct nlmsghdr *hdr)
1053{
1054 struct xfrm_user_expire *expire;
1055 uint32_t spi;
1056 uint8_t protocol;
1057 host_t *dst;
1058
1059 expire = NLMSG_DATA(hdr);
1060 protocol = expire->state.id.proto;
1061 spi = expire->state.id.spi;
1062
1063 DBG2(DBG_KNL, "received a XFRM_MSG_EXPIRE");
1064
1065 if (protocol == IPPROTO_ESP || protocol == IPPROTO_AH)
1066 {
1067 dst = xfrm2host(expire->state.family, &expire->state.id.daddr, 0);
1068 if (dst)
1069 {
1070 charon->kernel->expire(charon->kernel, protocol, spi, dst,
1071 expire->hard != 0);
1072 dst->destroy(dst);
1073 }
1074 }
1075}
1076
1077/**
1078 * Process a XFRM_MSG_MIGRATE from kernel
1079 */
1080static void process_migrate(private_kernel_netlink_ipsec_t *this,
1081 struct nlmsghdr *hdr)
1082{
1083 struct xfrm_userpolicy_id *policy_id;
1084 struct rtattr *rta;
1085 size_t rtasize;
1086 traffic_selector_t *src_ts, *dst_ts;
1087 host_t *local = NULL, *remote = NULL;
1088 host_t *old_src = NULL, *old_dst = NULL;
1089 host_t *new_src = NULL, *new_dst = NULL;
1090 uint32_t reqid = 0;
1091 policy_dir_t dir;
1092
1093 policy_id = NLMSG_DATA(hdr);
1094 rta = XFRM_RTA(hdr, struct xfrm_userpolicy_id);
1095 rtasize = XFRM_PAYLOAD(hdr, struct xfrm_userpolicy_id);
1096
1097 DBG2(DBG_KNL, "received a XFRM_MSG_MIGRATE");
1098
1099 src_ts = selector2ts(&policy_id->sel, TRUE);
1100 dst_ts = selector2ts(&policy_id->sel, FALSE);
1101 dir = (policy_dir_t)policy_id->dir;
1102
1103 DBG2(DBG_KNL, " policy: %R === %R %N", src_ts, dst_ts, policy_dir_names);
1104
1105 while (RTA_OK(rta, rtasize))
1106 {
1107 DBG2(DBG_KNL, " %N", xfrm_attr_type_names, rta->rta_type);
1108 if (rta->rta_type == XFRMA_KMADDRESS)
1109 {
1110 struct xfrm_user_kmaddress *kmaddress;
1111
1112 kmaddress = (struct xfrm_user_kmaddress*)RTA_DATA(rta);
1113 local = xfrm2host(kmaddress->family, &kmaddress->local, 0);
1114 remote = xfrm2host(kmaddress->family, &kmaddress->remote, 0);
1115 DBG2(DBG_KNL, " kmaddress: %H...%H", local, remote);
1116 }
1117 else if (rta->rta_type == XFRMA_MIGRATE)
1118 {
1119 struct xfrm_user_migrate *migrate;
1120
1121 migrate = (struct xfrm_user_migrate*)RTA_DATA(rta);
1122 old_src = xfrm2host(migrate->old_family, &migrate->old_saddr, 0);
1123 old_dst = xfrm2host(migrate->old_family, &migrate->old_daddr, 0);
1124 new_src = xfrm2host(migrate->new_family, &migrate->new_saddr, 0);
1125 new_dst = xfrm2host(migrate->new_family, &migrate->new_daddr, 0);
1126 reqid = migrate->reqid;
1127 DBG2(DBG_KNL, " migrate %H...%H to %H...%H, reqid {%u}",
1128 old_src, old_dst, new_src, new_dst, reqid);
1129 DESTROY_IF(old_src);
1130 DESTROY_IF(old_dst);
1131 DESTROY_IF(new_src);
1132 DESTROY_IF(new_dst);
1133 }
1134 rta = RTA_NEXT(rta, rtasize);
1135 }
1136
1137 if (src_ts && dst_ts && local && remote)
1138 {
1139 charon->kernel->migrate(charon->kernel, reqid, src_ts, dst_ts, dir,
1140 local, remote);
1141 }
1142 else
1143 {
1144 DESTROY_IF(src_ts);
1145 DESTROY_IF(dst_ts);
1146 DESTROY_IF(local);
1147 DESTROY_IF(remote);
1148 }
1149}
1150
1151/**
1152 * Process a XFRM_MSG_MAPPING from kernel
1153 */
1154static void process_mapping(private_kernel_netlink_ipsec_t *this,
1155 struct nlmsghdr *hdr)
1156{
1157 struct xfrm_user_mapping *mapping;
1158 uint32_t spi;
1159
1160 mapping = NLMSG_DATA(hdr);
1161 spi = mapping->id.spi;
1162
1163 DBG2(DBG_KNL, "received a XFRM_MSG_MAPPING");
1164
1165 if (mapping->id.proto == IPPROTO_ESP)
1166 {
1167 host_t *dst, *new;
1168
1169 dst = xfrm2host(mapping->id.family, &mapping->id.daddr, 0);
1170 if (dst)
1171 {
1172 if (!mapping->old_sport)
1173 {
1174 /* ignore mappings for per-CPU SAs with 0 source port */
1175 DBG1(DBG_KNL, "ignore NAT mapping change for per-resource "
1176 "CHILD_SA %N/0x%08x/%H", protocol_id_names, PROTO_ESP,
1177 htonl(spi), dst);
1178 }
1179 else
1180 {
1181 new = xfrm2host(mapping->id.family, &mapping->new_saddr,
1182 mapping->new_sport);
1183 if (new)
1184 {
1185 charon->kernel->mapping(charon->kernel, IPPROTO_ESP, spi, dst,
1186 new);
1187 new->destroy(new);
1188 }
1189 }
1190 dst->destroy(dst);
1191 }
1192 }
1193}
1194
1195CALLBACK(receive_events, void,
1196 private_kernel_netlink_ipsec_t *this, struct nlmsghdr *hdr)
1197{
1198 switch (hdr->nlmsg_type)
1199 {
1200 case XFRM_MSG_ACQUIRE:
1201 process_acquire(this, hdr);
1202 break;
1203 case XFRM_MSG_EXPIRE:
1204 process_expire(this, hdr);
1205 break;
1206 case XFRM_MSG_MIGRATE:
1207 process_migrate(this, hdr);
1208 break;
1209 case XFRM_MSG_MAPPING:
1210 process_mapping(this, hdr);
1211 break;
1212 default:
1213 DBG1(DBG_KNL, "received unknown event from XFRM event "
1214 "socket: %d", hdr->nlmsg_type);
1215 break;
1216 }
1217}
1218
1219METHOD(kernel_ipsec_t, get_features, kernel_feature_t,
1220 private_kernel_netlink_ipsec_t *this)
1221{
1222 return KERNEL_ESP_V3_TFC | KERNEL_POLICY_SPI | KERNEL_ACQUIRE_SEQ |
1223 (this->sa_lastused ? KERNEL_SA_USE_TIME : 0);
1224}
1225
1226/**
1227 * Format the mark for debug messages
1228 */
1229static void format_mark(char *buf, int buflen, mark_t mark)
1230{
1231 if (mark.value | mark.mask)
1232 {
1233 snprintf(buf, buflen, " (mark %u/0x%08x)", mark.value, mark.mask);
1234 }
1235}
1236
1237/**
1238 * Add a XFRM mark to message if required
1239 */
1240static bool add_mark(struct nlmsghdr *hdr, int buflen, mark_t mark)
1241{
1242 if (mark.value | mark.mask)
1243 {
1244 struct xfrm_mark *xmrk;
1245
1246 xmrk = netlink_reserve(hdr, buflen, XFRMA_MARK, sizeof(*xmrk));
1247 if (!xmrk)
1248 {
1249 return FALSE;
1250 }
1251 xmrk->v = mark.value;
1252 xmrk->m = mark.mask;
1253 }
1254 return TRUE;
1255}
1256
1257/**
1258 * Format the security label for debug messages
1259 */
1260static void format_label(char *buf, int buflen, sec_label_t *label)
1261{
1262 if (label)
1263 {
1264 snprintf(buf, buflen, " (ctx %s)", label->get_string(label));
1265 }
1266}
1267
1268/**
1269 * Add a security label to message if required
1270 */
1271static bool add_label(struct nlmsghdr *hdr, int buflen, sec_label_t *label)
1272{
1273 if (label)
1274 {
1275#ifdef USE_SELINUX
1276 struct xfrm_user_sec_ctx *ctx;
1277 chunk_t enc = label->get_encoding(label);
1278 int len = sizeof(*ctx) + enc.len;
1279
1280 ctx = netlink_reserve(hdr, buflen, XFRMA_SEC_CTX, len);
1281 if (!ctx)
1282 {
1283 return FALSE;
1284 }
1285 /* this attribute for some reason duplicates the generic header */
1286 ctx->exttype = XFRMA_SEC_CTX;
1287 ctx->len = len;
1288
1289 ctx->ctx_doi = XFRM_SC_DOI_LSM;
1290 ctx->ctx_alg = XFRM_SC_ALG_SELINUX;
1291 ctx->ctx_len = enc.len;
1292 memcpy((void*)(ctx + 1), enc.ptr, enc.len);
1293#endif
1294 }
1295 return TRUE;
1296}
1297
1298/**
1299 * Add a uint32 attribute to message
1300 */
1301static bool add_uint32(struct nlmsghdr *hdr, int buflen,
1302 enum xfrm_attr_type_t type, uint32_t value)
1303{
1304 uint32_t *xvalue;
1305
1306 xvalue = netlink_reserve(hdr, buflen, type, sizeof(*xvalue));
1307 if (!xvalue)
1308 {
1309 return FALSE;
1310 }
1311 *xvalue = value;
1312 return TRUE;
1313}
1314
1315/**
1316 * Add a uint16 attribute to message
1317 */
1318static bool add_uint16(struct nlmsghdr *hdr, int buflen,
1319 enum xfrm_attr_type_t type, uint16_t value)
1320{
1321 uint16_t *xvalue;
1322
1323 xvalue = netlink_reserve(hdr, buflen, type, sizeof(*xvalue));
1324 if (!xvalue)
1325 {
1326 return FALSE;
1327 }
1328 *xvalue = value;
1329 return TRUE;
1330}
1331
1332/**
1333 * Add a uint8 attribute to message
1334 */
1335static bool add_uint8(struct nlmsghdr *hdr, int buflen,
1336 enum xfrm_attr_type_t type, uint8_t value)
1337{
1338 uint8_t *xvalue;
1339
1340 xvalue = netlink_reserve(hdr, buflen, type, sizeof(*xvalue));
1341 if (!xvalue)
1342 {
1343 return FALSE;
1344 }
1345 *xvalue = value;
1346 return TRUE;
1347}
1348
1349/**
1350 * Get an SPI for a specific protocol from the kernel.
1351 */
1352static status_t get_spi_internal(private_kernel_netlink_ipsec_t *this,
1353 host_t *src, host_t *dst, uint8_t proto, uint32_t min, uint32_t max,
1354 uint32_t *spi)
1355{
1356 netlink_buf_t request;
1357 struct nlmsghdr *hdr, *out;
1358 struct xfrm_userspi_info *userspi;
1359 uint32_t received_spi = 0;
1360 size_t len;
1361
1362 memset(&request, 0, sizeof(request));
1363
1364 hdr = &request.hdr;
1365 hdr->nlmsg_flags = NLM_F_REQUEST;
1366 hdr->nlmsg_type = XFRM_MSG_ALLOCSPI;
1367 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userspi_info));
1368
1369 userspi = NLMSG_DATA(hdr);
1370 host2xfrm(src, &userspi->info.saddr);
1371 host2xfrm(dst, &userspi->info.id.daddr);
1372 userspi->info.id.proto = proto;
1373 userspi->info.mode = XFRM_MODE_TUNNEL;
1374 userspi->info.family = src->get_family(src);
1375 userspi->min = min;
1376 userspi->max = max;
1377
1378 if (this->sa_dir &&
1379 !add_uint8(hdr, sizeof(request), XFRMA_SA_DIR, XFRM_SA_DIR_IN))
1380 {
1381 return FAILED;
1382 }
1383
1384 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
1385 {
1386 hdr = out;
1387 while (NLMSG_OK(hdr, len))
1388 {
1389 switch (hdr->nlmsg_type)
1390 {
1391 case XFRM_MSG_NEWSA:
1392 {
1393 struct xfrm_usersa_info* usersa = NLMSG_DATA(hdr);
1394 received_spi = usersa->id.spi;
1395 break;
1396 }
1397 case NLMSG_ERROR:
1398 {
1399 netlink_log_error(hdr, "allocating SPI failed");
1400 break;
1401 }
1402 default:
1403 hdr = NLMSG_NEXT(hdr, len);
1404 continue;
1405 case NLMSG_DONE:
1406 break;
1407 }
1408 break;
1409 }
1410 free(out);
1411 }
1412
1413 if (received_spi == 0)
1414 {
1415 return FAILED;
1416 }
1417
1418 *spi = received_spi;
1419 return SUCCESS;
1420}
1421
1422METHOD(kernel_ipsec_t, get_spi, status_t,
1423 private_kernel_netlink_ipsec_t *this, host_t *src, host_t *dst,
1424 uint8_t protocol, uint32_t *spi)
1425{
1426 uint32_t spi_min, spi_max;
1427
1428 spi_min = lib->settings->get_int(lib->settings, "%s.spi_min",
1429 KERNEL_SPI_MIN, lib->ns);
1430 spi_max = lib->settings->get_int(lib->settings, "%s.spi_max",
1431 KERNEL_SPI_MAX, lib->ns);
1432
1433 if (get_spi_internal(this, src, dst, protocol, min(spi_min, spi_max),
1434 max(spi_min, spi_max), spi) != SUCCESS)
1435 {
1436 DBG1(DBG_KNL, "unable to get SPI");
1437 return FAILED;
1438 }
1439
1440 DBG2(DBG_KNL, "got SPI %.8x", ntohl(*spi));
1441 return SUCCESS;
1442}
1443
1444METHOD(kernel_ipsec_t, get_cpi, status_t,
1445 private_kernel_netlink_ipsec_t *this, host_t *src, host_t *dst,
1446 uint16_t *cpi)
1447{
1448 uint32_t received_spi = 0;
1449
1450 if (get_spi_internal(this, src, dst, IPPROTO_COMP,
1451 0x100, 0xEFFF, &received_spi) != SUCCESS)
1452 {
1453 DBG1(DBG_KNL, "unable to get CPI");
1454 return FAILED;
1455 }
1456
1457 *cpi = htons((uint16_t)ntohl(received_spi));
1458
1459 DBG2(DBG_KNL, "got CPI %.4x", ntohs(*cpi));
1460 return SUCCESS;
1461}
1462
1463/* ETHTOOL_GSSET_INFO is available since 2.6.34 and ETH_SS_FEATURES (enum) and
1464 * ETHTOOL_GFEATURES since 2.6.39, so check for the latter */
1465#ifdef ETHTOOL_GFEATURES
1466
1467/**
1468 * Global metadata used for IPsec HW offload
1469 */
1470static struct {
1471 /** determined HW offload support */
1472 bool supported;
1473 /** bit in feature set */
1474 u_int bit;
1475 /** total number of device feature blocks */
1476 u_int total_blocks;
1477} netlink_hw_offload;
1478
1479/**
1480 * Check if kernel supports HW offload and determine feature flag
1481 */
1482static bool netlink_find_offload_feature(const char *ifname)
1483{
1484 struct ethtool_sset_info *sset_info;
1485 struct ethtool_gstrings *cmd = NULL;
1486 struct ifreq ifr = { 0 };
1487 uint32_t sset_len, i;
1488 char *str;
1489 int err, query_socket;
1490
1491 query_socket = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_XFRM);
1492 if (query_socket < 0)
1493 {
1494 return FALSE;
1495 }
1496
1497 /* determine number of device features */
1498 INIT_EXTRA(sset_info, sizeof(uint32_t),
1499 .cmd = ETHTOOL_GSSET_INFO,
1500 .sset_mask = 1ULL << ETH_SS_FEATURES,
1501 );
1502 strncpy(ifr.ifr_name, ifname, IFNAMSIZ-1);
1503 ifr.ifr_name[IFNAMSIZ-1] = '\0';
1504 ifr.ifr_data = (void*)sset_info;
1505
1506 err = ioctl(query_socket, SIOCETHTOOL, &ifr);
1507 if (err || sset_info->sset_mask != 1ULL << ETH_SS_FEATURES)
1508 {
1509 goto out;
1510 }
1511 sset_len = sset_info->data[0];
1512
1513 /* retrieve names of device features */
1514 INIT_EXTRA(cmd, ETH_GSTRING_LEN * sset_len,
1515 .cmd = ETHTOOL_GSTRINGS,
1516 .string_set = ETH_SS_FEATURES,
1517 );
1518 strncpy(ifr.ifr_name, ifname, IFNAMSIZ-1);
1519 ifr.ifr_name[IFNAMSIZ-1] = '\0';
1520 ifr.ifr_data = (void*)cmd;
1521
1522 err = ioctl(query_socket, SIOCETHTOOL, &ifr);
1523 if (err)
1524 {
1525 goto out;
1526 }
1527
1528 /* look for the ESP_HW feature bit */
1529 str = (char*)cmd->data;
1530 for (i = 0; i < cmd->len; i++)
1531 {
1532 if (strneq(str, "esp-hw-offload", ETH_GSTRING_LEN))
1533 {
1534 netlink_hw_offload.supported = TRUE;
1535 netlink_hw_offload.bit = i;
1536 netlink_hw_offload.total_blocks = (sset_len + 31) / 32;
1537 break;
1538 }
1539 str += ETH_GSTRING_LEN;
1540 }
1541
1542out:
1543 free(sset_info);
1544 free(cmd);
1545 close(query_socket);
1546 return netlink_hw_offload.supported;
1547}
1548
1549/**
1550 * Check if interface supports HW offload
1551 */
1552static bool netlink_detect_offload(const char *ifname)
1553{
1554 struct ethtool_gfeatures *cmd;
1555 uint32_t feature_bit;
1556 struct ifreq ifr = { 0 };
1557 int query_socket;
1558 int block;
1559 bool ret = FALSE;
1560
1561 if (!netlink_hw_offload.supported)
1562 {
1563 DBG1(DBG_KNL, "HW offload is not supported by kernel");
1564 return FALSE;
1565 }
1566
1567 query_socket = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_XFRM);
1568 if (query_socket < 0)
1569 {
1570 return FALSE;
1571 }
1572
1573 /* feature is supported by kernel, query device features */
1574 INIT_EXTRA(cmd, sizeof(cmd->features[0]) * netlink_hw_offload.total_blocks,
1575 .cmd = ETHTOOL_GFEATURES,
1576 .size = netlink_hw_offload.total_blocks,
1577 );
1578 strncpy(ifr.ifr_name, ifname, IFNAMSIZ-1);
1579 ifr.ifr_name[IFNAMSIZ-1] = '\0';
1580 ifr.ifr_data = (void*)cmd;
1581
1582 if (!ioctl(query_socket, SIOCETHTOOL, &ifr))
1583 {
1584 block = netlink_hw_offload.bit / 32;
1585 feature_bit = 1U << (netlink_hw_offload.bit % 32);
1586 if (cmd->features[block].active & feature_bit)
1587 {
1588 ret = TRUE;
1589 }
1590 }
1591 free(cmd);
1592 close(query_socket);
1593 return ret;
1594}
1595
1596#else
1597
1598static bool netlink_find_offload_feature(const char *ifname)
1599{
1600 return FALSE;
1601}
1602
1603static bool netlink_detect_offload(const char *ifname)
1604{
1605 return FALSE;
1606}
1607
1608#endif
1609
1610/**
1611 * Add a HW offload attribute to the given message, return it if it was added.
1612 *
1613 * There are 4 HW offload configuration values:
1614 * 1. HW_OFFLOAD_NO : Do not configure HW offload.
1615 * 2. HW_OFFLOAD_CRYPTO : Configure crypto HW offload.
1616 * Fail SA addition if crypto offload is not supported.
1617 * 3. HW_OFFLOAD_PACKET : Configure packet HW offload.
1618 * Fail SA addition if packet offload is not supported.
1619 * 4. HW_OFFLOAD_AUTO : Configure packet HW offload if supported by the kernel
1620 * and device. If not, configure crypto HW offload if
1621 * supported by the kernel and device.
1622 * Do not fail SA addition if offload is not supported.
1623 */
1624static bool add_hw_offload(struct nlmsghdr *hdr, int buflen, host_t *local,
1625 char *interface, hw_offload_t hw_offload,
1626 struct xfrm_user_offload **offload)
1627{
1628 char *ifname;
1629 bool ret;
1630
1631 /* do IPsec configuration without offload */
1632 if (hw_offload == HW_OFFLOAD_NO)
1633 {
1634 return TRUE;
1635 }
1636
1637 /* unless offloading is forced, we return TRUE even if we fail */
1638 ret = (hw_offload == HW_OFFLOAD_AUTO);
1639
1640 if (!local || local->is_anyaddr(local) ||
1641 !charon->kernel->get_interface(charon->kernel, local, &ifname))
1642 {
1643 if (!interface || !interface[0])
1644 {
1645 return ret;
1646 }
1647 ifname = strdup(interface);
1648 }
1649
1650 /* check if interface supports hw_offload */
1651 if (!netlink_detect_offload(ifname))
1652 {
1653 DBG1(DBG_KNL, "HW offload is not supported by device %s", ifname);
1654 goto out;
1655 }
1656
1657 /* activate HW offload */
1658 *offload = netlink_reserve(hdr, buflen,
1659 XFRMA_OFFLOAD_DEV, sizeof(**offload));
1660 if (!(*offload))
1661 {
1662 goto out;
1663 }
1664 (*offload)->ifindex = if_nametoindex(ifname);
1665
1666 if (hw_offload == HW_OFFLOAD_PACKET ||
1667 hw_offload == HW_OFFLOAD_AUTO)
1668 {
1669 (*offload)->flags |= XFRM_OFFLOAD_PACKET;
1670 }
1671
1672 ret = TRUE;
1673
1674out:
1675 free(ifname);
1676 return ret;
1677}
1678
1679/**
1680 * Add a HW offload attribute to the given SA-related message.
1681 */
1682static bool add_hw_offload_sa(struct nlmsghdr *hdr, int buflen,
1683 kernel_ipsec_sa_id_t *id,
1684 kernel_ipsec_add_sa_t *data,
1685 struct xfrm_user_offload **offload)
1686{
1687 host_t *local = data->inbound ? id->dst : id->src;
1688
1689 if (!add_hw_offload(hdr, buflen, local, NULL, data->hw_offload, offload))
1690 {
1691 return FALSE;
1692 }
1693 else if (*offload)
1694 {
1695 (*offload)->flags |= data->inbound ? XFRM_OFFLOAD_INBOUND : 0;
1696 }
1697 return TRUE;
1698}
1699
1700/**
1701 * Add a HW offload attribute to the given policy-related message.
1702 */
1703static bool add_hw_offload_policy(struct nlmsghdr *hdr, int buflen,
1704 policy_entry_t *policy,
1705 policy_sa_t *mapping,
1706 struct xfrm_user_offload **offload)
1707{
1708 ipsec_sa_t *ipsec = mapping->sa;
1709 host_t *local = ipsec->src;
1710 char ifname[IFNAMSIZ] = "";
1711
1712 /* only packet offloading is supported for policies, which we try to use
1713 * in automatic mode */
1714 if (ipsec->hw_offload != HW_OFFLOAD_PACKET &&
1715 ipsec->hw_offload != HW_OFFLOAD_AUTO)
1716 {
1717 return TRUE;
1718 }
1719
1720 switch (policy->direction)
1721 {
1722 case POLICY_FWD:
1723 /* FWD policies are not offloaded, they are enforced by the kernel */
1724 return TRUE;
1725 case POLICY_IN:
1726 local = ipsec->dst;
1727 break;
1728 }
1729 if (policy->sel.ifindex)
1730 {
1731 if_indextoname(policy->sel.ifindex, ifname);
1732 }
1733 return add_hw_offload(hdr, buflen, local, ifname, ipsec->hw_offload, offload);
1734}
1735
1736METHOD(kernel_ipsec_t, add_sa, status_t,
1737 private_kernel_netlink_ipsec_t *this, kernel_ipsec_sa_id_t *id,
1738 kernel_ipsec_add_sa_t *data)
1739{
1740 netlink_buf_t request;
1741 const char *alg_name;
1742 char markstr[32] = "";
1743 struct nlmsghdr *hdr;
1744 struct xfrm_usersa_info *sa;
1745 struct xfrm_user_offload *offload = NULL;
1746 uint16_t icv_size = 64, ipcomp = data->ipcomp;
1747 ipsec_mode_t mode = data->mode, original_mode = data->mode;
1748 traffic_selector_t *first_src_ts, *first_dst_ts;
1749 status_t status = FAILED;
1750
1751 /* if IPComp is used, we install an additional IPComp SA. if the cpi is 0
1752 * we are in the recursive call below */
1753 if (ipcomp != IPCOMP_NONE && data->cpi != 0)
1754 {
1755 lifetime_cfg_t lft = {{0,0,0},{0,0,0},{0,0,0}};
1756 kernel_ipsec_sa_id_t ipcomp_id = {
1757 .src = id->src,
1758 .dst = id->dst,
1759 .spi = htonl(ntohs(data->cpi)),
1760 .proto = IPPROTO_COMP,
1761 .mark = id->mark,
1762 .if_id = id->if_id,
1763 };
1764 kernel_ipsec_add_sa_t ipcomp_sa = {
1765 .reqid = data->reqid,
1766 .mode = data->mode,
1767 .src_ts = data->src_ts,
1768 .dst_ts = data->dst_ts,
1769 .lifetime = &lft,
1770 .enc_alg = ENCR_UNDEFINED,
1771 .int_alg = AUTH_UNDEFINED,
1772 .tfc = data->tfc,
1773 .ipcomp = data->ipcomp,
1774 .cpu = data->cpu,
1775 .initiator = data->initiator,
1776 .inbound = data->inbound,
1777 .update = data->update,
1778 };
1779 add_sa(this, &ipcomp_id, &ipcomp_sa);
1780 ipcomp = IPCOMP_NONE;
1781 /* use transport mode ESP SA, IPComp uses tunnel mode */
1782 mode = MODE_TRANSPORT;
1783 }
1784
1785 memset(&request, 0, sizeof(request));
1786 format_mark(markstr, sizeof(markstr), id->mark);
1787
1788 DBG2(DBG_KNL, "adding SAD entry with SPI %.8x and reqid {%u}%s",
1789 ntohl(id->spi), data->reqid, markstr);
1790
1791 hdr = &request.hdr;
1792 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1793 hdr->nlmsg_type = data->update ? XFRM_MSG_UPDSA : XFRM_MSG_NEWSA;
1794 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
1795
1796 sa = NLMSG_DATA(hdr);
1797 host2xfrm(id->src, &sa->saddr);
1798 host2xfrm(id->dst, &sa->id.daddr);
1799 sa->id.spi = id->spi;
1800 sa->id.proto = id->proto;
1801 sa->family = id->src->get_family(id->src);
1802 sa->mode = mode2kernel(mode);
1803 sa->seq = data->seq;
1804
1805 if (!data->copy_ecn)
1806 {
1807 sa->flags |= XFRM_STATE_NOECN;
1808 }
1809
1810 if (data->inbound)
1811 {
1812 switch (data->copy_dscp)
1813 {
1814 case DSCP_COPY_YES:
1815 case DSCP_COPY_IN_ONLY:
1816 sa->flags |= XFRM_STATE_DECAP_DSCP;
1817 break;
1818 default:
1819 break;
1820 }
1821 }
1822 else
1823 {
1824 if (!data->copy_df)
1825 {
1826 sa->flags |= XFRM_STATE_NOPMTUDISC;
1827 }
1828 switch (data->copy_dscp)
1829 {
1830 case DSCP_COPY_IN_ONLY:
1831 case DSCP_COPY_NO:
1832 {
1833 /* currently the only extra flag */
1834 if (!add_uint32(hdr, sizeof(request), XFRMA_SA_EXTRA_FLAGS,
1835 XFRM_SA_XFLAG_DONT_ENCAP_DSCP))
1836 {
1837 goto failed;
1838 }
1839 break;
1840 }
1841 default:
1842 break;
1843 }
1844 }
1845
1846 switch (mode)
1847 {
1848 case MODE_TUNNEL:
1849 case MODE_IPTFS:
1850 sa->flags |= XFRM_STATE_AF_UNSPEC;
1851 break;
1852 case MODE_BEET:
1853 case MODE_TRANSPORT:
1854 if (original_mode == MODE_TUNNEL)
1855 { /* don't install selectors for switched SAs. because only one
1856 * selector can be installed other traffic would get dropped */
1857 break;
1858 }
1859 if (data->src_ts->get_first(data->src_ts,
1860 (void**)&first_src_ts) == SUCCESS &&
1861 data->dst_ts->get_first(data->dst_ts,
1862 (void**)&first_dst_ts) == SUCCESS)
1863 {
1864 sa->sel = ts2selector(first_src_ts, first_dst_ts,
1865 data->interface);
1866 if (!this->proto_port_transport)
1867 {
1868 /* don't install proto/port on SA. This would break
1869 * potential secondary SAs for the same address using a
1870 * different prot/port. */
1871 sa->sel.proto = 0;
1872 sa->sel.dport = sa->sel.dport_mask = 0;
1873 sa->sel.sport = sa->sel.sport_mask = 0;
1874 }
1875 }
1876 break;
1877 default:
1878 break;
1879 }
1880 if (id->proto == IPPROTO_AH && sa->family == AF_INET)
1881 { /* use alignment to 4 bytes for IPv4 instead of the incorrect 8 byte
1882 * alignment that's used by default but is only valid for IPv6 */
1883 sa->flags |= XFRM_STATE_ALIGN4;
1884 }
1885
1886 sa->reqid = data->reqid;
1887 sa->lft.soft_byte_limit = XFRM_LIMIT(data->lifetime->bytes.rekey);
1888 sa->lft.hard_byte_limit = XFRM_LIMIT(data->lifetime->bytes.life);
1889 sa->lft.soft_packet_limit = XFRM_LIMIT(data->lifetime->packets.rekey);
1890 sa->lft.hard_packet_limit = XFRM_LIMIT(data->lifetime->packets.life);
1891 /* we use lifetimes since added, not since used */
1892 sa->lft.soft_add_expires_seconds = data->lifetime->time.rekey;
1893 sa->lft.hard_add_expires_seconds = data->lifetime->time.life;
1894 sa->lft.soft_use_expires_seconds = 0;
1895 sa->lft.hard_use_expires_seconds = 0;
1896
1897 switch (data->enc_alg)
1898 {
1899 case ENCR_UNDEFINED:
1900 /* no encryption */
1901 break;
1902 case ENCR_AES_CCM_ICV16:
1903 case ENCR_AES_GCM_ICV16:
1904 case ENCR_NULL_AUTH_AES_GMAC:
1905 case ENCR_CAMELLIA_CCM_ICV16:
1906 case ENCR_CHACHA20_POLY1305:
1907 icv_size += 32;
1908 /* FALL */
1909 case ENCR_AES_CCM_ICV12:
1910 case ENCR_AES_GCM_ICV12:
1911 case ENCR_CAMELLIA_CCM_ICV12:
1912 icv_size += 32;
1913 /* FALL */
1914 case ENCR_AES_CCM_ICV8:
1915 case ENCR_AES_GCM_ICV8:
1916 case ENCR_CAMELLIA_CCM_ICV8:
1917 {
1918 struct xfrm_algo_aead *algo;
1919
1920 alg_name = lookup_algorithm(ENCRYPTION_ALGORITHM, data->enc_alg);
1921 if (alg_name == NULL)
1922 {
1923 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1924 encryption_algorithm_names, data->enc_alg);
1925 goto failed;
1926 }
1927 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
1928 encryption_algorithm_names, data->enc_alg,
1929 data->enc_key.len * 8);
1930
1931 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_AEAD,
1932 sizeof(*algo) + data->enc_key.len);
1933 if (!algo)
1934 {
1935 goto failed;
1936 }
1937 algo->alg_key_len = data->enc_key.len * 8;
1938 algo->alg_icv_len = icv_size;
1939 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name)-1);
1940 algo->alg_name[sizeof(algo->alg_name)-1] = '\0';
1941 memcpy(algo->alg_key, data->enc_key.ptr, data->enc_key.len);
1942 break;
1943 }
1944 default:
1945 {
1946 struct xfrm_algo *algo;
1947
1948 alg_name = lookup_algorithm(ENCRYPTION_ALGORITHM, data->enc_alg);
1949 if (alg_name == NULL)
1950 {
1951 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1952 encryption_algorithm_names, data->enc_alg);
1953 goto failed;
1954 }
1955 DBG2(DBG_KNL, " using encryption algorithm %N with key size %d",
1956 encryption_algorithm_names, data->enc_alg,
1957 data->enc_key.len * 8);
1958
1959 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_CRYPT,
1960 sizeof(*algo) + data->enc_key.len);
1961 if (!algo)
1962 {
1963 goto failed;
1964 }
1965 algo->alg_key_len = data->enc_key.len * 8;
1966 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name)-1);
1967 algo->alg_name[sizeof(algo->alg_name)-1] = '\0';
1968 memcpy(algo->alg_key, data->enc_key.ptr, data->enc_key.len);
1969 }
1970 }
1971
1972 if (data->int_alg != AUTH_UNDEFINED)
1973 {
1974 u_int trunc_len = 0;
1975
1976 alg_name = lookup_algorithm(INTEGRITY_ALGORITHM, data->int_alg);
1977 if (alg_name == NULL)
1978 {
1979 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
1980 integrity_algorithm_names, data->int_alg);
1981 goto failed;
1982 }
1983 DBG2(DBG_KNL, " using integrity algorithm %N with key size %d",
1984 integrity_algorithm_names, data->int_alg, data->int_key.len * 8);
1985
1986 switch (data->int_alg)
1987 {
1988 case AUTH_HMAC_MD5_128:
1989 case AUTH_HMAC_SHA2_256_128:
1990 trunc_len = 128;
1991 break;
1992 case AUTH_HMAC_SHA1_160:
1993 trunc_len = 160;
1994 break;
1995 case AUTH_HMAC_SHA2_256_256:
1996 trunc_len = 256;
1997 break;
1998 case AUTH_HMAC_SHA2_384_384:
1999 trunc_len = 384;
2000 break;
2001 case AUTH_HMAC_SHA2_512_512:
2002 trunc_len = 512;
2003 break;
2004 default:
2005 break;
2006 }
2007
2008 if (trunc_len)
2009 {
2010 struct xfrm_algo_auth* algo;
2011
2012 /* the kernel uses SHA256 with 96 bit truncation by default,
2013 * use specified truncation size supported by newer kernels.
2014 * also use this for untruncated MD5, SHA1 and SHA2. */
2015 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_AUTH_TRUNC,
2016 sizeof(*algo) + data->int_key.len);
2017 if (!algo)
2018 {
2019 goto failed;
2020 }
2021 algo->alg_key_len = data->int_key.len * 8;
2022 algo->alg_trunc_len = trunc_len;
2023 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name)-1);
2024 algo->alg_name[sizeof(algo->alg_name)-1] = '\0';
2025 memcpy(algo->alg_key, data->int_key.ptr, data->int_key.len);
2026 }
2027 else
2028 {
2029 struct xfrm_algo* algo;
2030
2031 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_AUTH,
2032 sizeof(*algo) + data->int_key.len);
2033 if (!algo)
2034 {
2035 goto failed;
2036 }
2037 algo->alg_key_len = data->int_key.len * 8;
2038 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name)-1);
2039 algo->alg_name[sizeof(algo->alg_name)-1] = '\0';
2040 memcpy(algo->alg_key, data->int_key.ptr, data->int_key.len);
2041 }
2042 }
2043
2044 if (ipcomp != IPCOMP_NONE)
2045 {
2046 struct xfrm_algo* algo;
2047
2048 alg_name = lookup_algorithm(COMPRESSION_ALGORITHM, ipcomp);
2049 if (alg_name == NULL)
2050 {
2051 DBG1(DBG_KNL, "algorithm %N not supported by kernel!",
2052 ipcomp_transform_names, ipcomp);
2053 goto failed;
2054 }
2055 DBG2(DBG_KNL, " using compression algorithm %N",
2056 ipcomp_transform_names, ipcomp);
2057
2058 algo = netlink_reserve(hdr, sizeof(request), XFRMA_ALG_COMP,
2059 sizeof(*algo));
2060 if (!algo)
2061 {
2062 goto failed;
2063 }
2064 algo->alg_key_len = 0;
2065 strncpy(algo->alg_name, alg_name, sizeof(algo->alg_name)-1);
2066 algo->alg_name[sizeof(algo->alg_name)-1] = '\0';
2067 }
2068
2069 if (data->encap)
2070 {
2071 struct xfrm_encap_tmpl *tmpl;
2072
2073 tmpl = netlink_reserve(hdr, sizeof(request), XFRMA_ENCAP, sizeof(*tmpl));
2074 if (!tmpl)
2075 {
2076 goto failed;
2077 }
2078 tmpl->encap_type = UDP_ENCAP_ESPINUDP;
2079 tmpl->encap_sport = htons(id->src->get_port(id->src));
2080 tmpl->encap_dport = htons(id->dst->get_port(id->dst));
2081 memset(&tmpl->encap_oa, 0, sizeof (xfrm_address_t));
2082 /* encap_oa could probably be derived from the
2083 * traffic selectors [rfc4306, p39]. In the netlink kernel
2084 * implementation pluto does the same as we do here but it uses
2085 * encap_oa in the pfkey implementation.
2086 * BUT as /usr/src/linux/net/key/af_key.c indicates the kernel ignores
2087 * it anyway
2088 * -> does that mean that NAT-T encap doesn't work in transport mode?
2089 * No. The reason the kernel ignores NAT-OA is that it recomputes
2090 * (or, rather, just ignores) the checksum. If packets pass the IPsec
2091 * checks it marks them "checksum ok" so OA isn't needed. */
2092
2093 /* if the remote port is set to 0 for UDP-encapsulated per-CPU SAs, we
2094 * increase the treshold for mapping changes as it gets otherwise
2095 * triggered with every packet */
2096 if (data->inbound && !id->src->get_port(id->src) &&
2097 !add_uint32(hdr, sizeof(request), XFRMA_MTIMER_THRESH, UINT32_MAX))
2098 {
2099 goto failed;
2100 }
2101 }
2102
2103 if (!add_mark(hdr, sizeof(request), id->mark))
2104 {
2105 goto failed;
2106 }
2107
2108 if (id->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, id->if_id))
2109 {
2110 goto failed;
2111 }
2112
2113 if (!add_label(hdr, sizeof(request), data->label))
2114 {
2115 goto failed;
2116 }
2117
2118 if (ipcomp == IPCOMP_NONE && (data->mark.value | data->mark.mask))
2119 {
2120 if (!add_uint32(hdr, sizeof(request), XFRMA_SET_MARK,
2121 data->mark.value) ||
2122 !add_uint32(hdr, sizeof(request), XFRMA_SET_MARK_MASK,
2123 data->mark.mask))
2124 {
2125 goto failed;
2126 }
2127 }
2128
2129 if (data->tfc && id->proto == IPPROTO_ESP && mode == MODE_TUNNEL)
2130 { /* the kernel supports TFC padding only for tunnel mode ESP SAs */
2131 if (!add_uint32(hdr, sizeof(request), XFRMA_TFCPAD, data->tfc))
2132 {
2133 goto failed;
2134 }
2135 }
2136
2137 if (this->sa_dir &&
2138 !add_uint8(hdr, sizeof(request), XFRMA_SA_DIR,
2139 data->inbound ? XFRM_SA_DIR_IN : XFRM_SA_DIR_OUT))
2140 {
2141 goto failed;
2142 }
2143
2144 if (data->cpu != CPU_ID_MAX)
2145 {
2146 if (!add_uint32(hdr, sizeof(request), XFRMA_SA_PCPU, data->cpu))
2147 {
2148 goto failed;
2149 }
2150 DBG2(DBG_KNL, " using CPU ID: %u", data->cpu);
2151 }
2152
2153 if (mode == MODE_IPTFS)
2154 {
2155 if (data->inbound)
2156 {
2157 if (!add_uint32(hdr, sizeof(request), XFRMA_IPTFS_DROP_TIME,
2158 lib->settings->get_int(lib->settings,
2159 "%s.iptfs.drop_time", 1000000, lib->ns)))
2160 {
2161 goto failed;
2162 }
2163 if (!add_uint16(hdr, sizeof(request), XFRMA_IPTFS_REORDER_WINDOW,
2164 lib->settings->get_int(lib->settings,
2165 "%s.iptfs.reorder_window", 3, lib->ns)))
2166 {
2167 goto failed;
2168 }
2169 }
2170 else
2171 {
2172 if (!add_uint32(hdr, sizeof(request), XFRMA_IPTFS_INIT_DELAY,
2173 lib->settings->get_int(lib->settings,
2174 "%s.iptfs.init_delay", 0, lib->ns)))
2175 {
2176 goto failed;
2177 }
2178 if (!add_uint32(hdr, sizeof(request), XFRMA_IPTFS_MAX_QSIZE,
2179 lib->settings->get_int(lib->settings,
2180 "%s.iptfs.max_queue_size", 1024 * 1024, lib->ns)))
2181 {
2182 goto failed;
2183 }
2184 if (!add_uint32(hdr, sizeof(request), XFRMA_IPTFS_PKT_SIZE,
2185 lib->settings->get_int(lib->settings,
2186 "%s.iptfs.packet_size", 0, lib->ns)))
2187 {
2188 goto failed;
2189 }
2190 if ((data->iptfs_dont_frag ||
2191 lib->settings->get_bool(lib->settings,
2192 "%s.iptfs.dont_fragment", FALSE, lib->ns)) &&
2193 !netlink_reserve(hdr, sizeof(request), XFRMA_IPTFS_DONT_FRAG, 0))
2194 {
2195 goto failed;
2196 }
2197 }
2198 }
2199
2200 if (id->proto != IPPROTO_COMP)
2201 {
2202 /* we don't need a replay window for outbound SAs, however, older
2203 * kernels reject the attribute if it is 0 when using ESN, while
2204 * newer kernels reject it if > 0 if the SA's direction is set */
2205 if (!data->inbound && data->replay_window)
2206 {
2207 data->replay_window = (data->esn && !this->sa_dir) ? 1 : 0;
2208 }
2209 if (data->esn || data->replay_window > 32)
2210 {
2211 /* for ESN or larger replay windows we need the new
2212 * XFRMA_REPLAY_ESN_VAL attribute to configure a bitmap */
2213 struct xfrm_replay_state_esn *replay;
2214 uint32_t bmp_size;
2215
2216 bmp_size = round_up(data->replay_window, sizeof(uint32_t) * 8) / 8;
2217 replay = netlink_reserve(hdr, sizeof(request), XFRMA_REPLAY_ESN_VAL,
2218 sizeof(*replay) + bmp_size);
2219 if (!replay)
2220 {
2221 goto failed;
2222 }
2223 /* bmp_len contains number uf __u32's */
2224 replay->bmp_len = bmp_size / sizeof(uint32_t);
2225 replay->replay_window = data->replay_window;
2226 DBG2(DBG_KNL, " using replay window of %u packets",
2227 data->replay_window);
2228
2229 if (data->esn)
2230 {
2231 DBG2(DBG_KNL, " using extended sequence numbers (ESN)");
2232 sa->flags |= XFRM_STATE_ESN;
2233 }
2234 }
2235 else
2236 {
2237 DBG2(DBG_KNL, " using replay window of %u packets",
2238 data->replay_window);
2239 sa->replay_window = data->replay_window;
2240 }
2241
2242 DBG2(DBG_KNL, " HW offload: %N", hw_offload_names, data->hw_offload);
2243 if (!add_hw_offload_sa(hdr, sizeof(request), id, data, &offload))
2244 {
2245 DBG1(DBG_KNL, "failed to configure HW offload");
2246 goto failed;
2247 }
2248 }
2249
2250 status = this->socket_xfrm->send_ack(this->socket_xfrm, hdr);
2251
2252 if (status != SUCCESS && offload && data->hw_offload == HW_OFFLOAD_AUTO)
2253 {
2254 DBG1(DBG_KNL, "failed to install SA with %N HW offload, trying with "
2255 "%N HW offload", hw_offload_names, HW_OFFLOAD_PACKET,
2256 hw_offload_names, HW_OFFLOAD_CRYPTO);
2257 offload->flags &= ~XFRM_OFFLOAD_PACKET;
2258 status = this->socket_xfrm->send_ack(this->socket_xfrm, hdr);
2259 }
2260
2261 if (status == NOT_FOUND && data->update)
2262 {
2263 DBG1(DBG_KNL, "allocated SPI not found anymore, try to add SAD entry");
2264 hdr->nlmsg_type = XFRM_MSG_NEWSA;
2265 status = this->socket_xfrm->send_ack(this->socket_xfrm, hdr);
2266 }
2267
2268 if (status != SUCCESS)
2269 {
2270 DBG1(DBG_KNL, "unable to add SAD entry with SPI %.8x%s (%N)", ntohl(id->spi),
2271 markstr, status_names, status);
2272 status = FAILED;
2273 goto failed;
2274 }
2275
2276 status = SUCCESS;
2277
2278failed:
2279 memwipe(&request, sizeof(request));
2280 return status;
2281}
2282
2283/**
2284 * Get the usage stats (packets/bytes) and classic replay state (i.e. sequence
2285 * numbers for small windows/non-ESN) of an SA.
2286 *
2287 * Allocates and copies the attributes we get from the kernel.
2288 */
2289static void get_replay_state(private_kernel_netlink_ipsec_t *this,
2290 kernel_ipsec_sa_id_t *sa,
2291 struct xfrm_replay_state **replay,
2292 struct xfrm_lifetime_cur **lifetime)
2293{
2294 netlink_buf_t request;
2295 struct nlmsghdr *hdr, *out = NULL;
2296 struct xfrm_aevent_id *out_aevent = NULL, *aevent_id;
2297 size_t len;
2298 struct rtattr *rta;
2299 size_t rtasize;
2300
2301 memset(&request, 0, sizeof(request));
2302
2303 DBG3(DBG_KNL, "querying replay state from SAD entry with SPI %.8x",
2304 ntohl(sa->spi));
2305
2306 hdr = &request.hdr;
2307 hdr->nlmsg_flags = NLM_F_REQUEST;
2308 hdr->nlmsg_type = XFRM_MSG_GETAE;
2309 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_aevent_id));
2310
2311 aevent_id = NLMSG_DATA(hdr);
2312 aevent_id->flags = XFRM_AE_RVAL;
2313
2314 host2xfrm(sa->dst, &aevent_id->sa_id.daddr);
2315 aevent_id->sa_id.spi = sa->spi;
2316 aevent_id->sa_id.proto = sa->proto;
2317 aevent_id->sa_id.family = sa->dst->get_family(sa->dst);
2318
2319 if (!add_mark(hdr, sizeof(request), sa->mark))
2320 {
2321 return;
2322 }
2323 if (sa->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, sa->if_id))
2324 {
2325 return;
2326 }
2327
2328 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2329 {
2330 hdr = out;
2331 while (NLMSG_OK(hdr, len))
2332 {
2333 switch (hdr->nlmsg_type)
2334 {
2335 case XFRM_MSG_NEWAE:
2336 {
2337 out_aevent = NLMSG_DATA(hdr);
2338 break;
2339 }
2340 case NLMSG_ERROR:
2341 {
2342 netlink_log_error(hdr, "querying replay state from SAD "
2343 "entry failed");
2344 break;
2345 }
2346 default:
2347 hdr = NLMSG_NEXT(hdr, len);
2348 continue;
2349 case NLMSG_DONE:
2350 break;
2351 }
2352 break;
2353 }
2354 }
2355
2356 if (out_aevent)
2357 {
2358 rta = XFRM_RTA(out, struct xfrm_aevent_id);
2359 rtasize = XFRM_PAYLOAD(out, struct xfrm_aevent_id);
2360 while (RTA_OK(rta, rtasize))
2361 {
2362 if (rta->rta_type == XFRMA_LTIME_VAL &&
2363 RTA_PAYLOAD(rta) == sizeof(**lifetime))
2364 {
2365 free(*lifetime);
2366 *lifetime = malloc(RTA_PAYLOAD(rta));
2367 memcpy(*lifetime, RTA_DATA(rta), RTA_PAYLOAD(rta));
2368 }
2369 if (rta->rta_type == XFRMA_REPLAY_VAL &&
2370 RTA_PAYLOAD(rta) == sizeof(**replay))
2371 {
2372 free(*replay);
2373 *replay = malloc(RTA_PAYLOAD(rta));
2374 memcpy(*replay, RTA_DATA(rta), RTA_PAYLOAD(rta));
2375 }
2376 rta = RTA_NEXT(rta, rtasize);
2377 }
2378 }
2379 free(out);
2380}
2381
2382/**
2383 * Get the last used time of an SA if provided by the kernel
2384 */
2385static bool get_lastused(struct nlmsghdr *hdr, uint64_t *lastused)
2386{
2387 struct rtattr *rta;
2388 size_t rtasize;
2389
2390 rta = XFRM_RTA(hdr, struct xfrm_usersa_info);
2391 rtasize = XFRM_PAYLOAD(hdr, struct xfrm_usersa_info);
2392 while (RTA_OK(rta, rtasize))
2393 {
2394 if (rta->rta_type == XFRMA_LASTUSED &&
2395 RTA_PAYLOAD(rta) == sizeof(*lastused))
2396 {
2397 *lastused = *(uint64_t*)RTA_DATA(rta);
2398 return TRUE;
2399 }
2400 rta = RTA_NEXT(rta, rtasize);
2401 }
2402 return FALSE;
2403}
2404
2405METHOD(kernel_ipsec_t, query_sa, status_t,
2406 private_kernel_netlink_ipsec_t *this, kernel_ipsec_sa_id_t *id,
2407 kernel_ipsec_query_sa_t *data, uint64_t *bytes, uint64_t *packets,
2408 time_t *use_time)
2409{
2410 netlink_buf_t request;
2411 struct nlmsghdr *out = NULL, *hdr;
2412 struct xfrm_usersa_id *sa_id;
2413 struct xfrm_usersa_info *sa = NULL;
2414 status_t status = FAILED;
2415 size_t len;
2416 char markstr[32] = "";
2417
2418 memset(&request, 0, sizeof(request));
2419 format_mark(markstr, sizeof(markstr), id->mark);
2420
2421 DBG3(DBG_KNL, "querying SAD entry with SPI %.8x%s", ntohl(id->spi),
2422 markstr);
2423
2424 hdr = &request.hdr;
2425 hdr->nlmsg_flags = NLM_F_REQUEST;
2426 hdr->nlmsg_type = XFRM_MSG_GETSA;
2427 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2428
2429 sa_id = NLMSG_DATA(hdr);
2430 host2xfrm(id->dst, &sa_id->daddr);
2431 sa_id->spi = id->spi;
2432 sa_id->proto = id->proto;
2433 sa_id->family = id->dst->get_family(id->dst);
2434
2435 if (!add_mark(hdr, sizeof(request), id->mark))
2436 {
2437 return FAILED;
2438 }
2439 if (id->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, id->if_id))
2440 {
2441 return FAILED;
2442 }
2443
2444 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2445 {
2446 hdr = out;
2447 while (NLMSG_OK(hdr, len))
2448 {
2449 switch (hdr->nlmsg_type)
2450 {
2451 case XFRM_MSG_NEWSA:
2452 {
2453 sa = NLMSG_DATA(hdr);
2454 break;
2455 }
2456 case NLMSG_ERROR:
2457 {
2458 netlink_log_error(hdr, "querying SAD entry failed");
2459 break;
2460 }
2461 default:
2462 hdr = NLMSG_NEXT(hdr, len);
2463 continue;
2464 case NLMSG_DONE:
2465 break;
2466 }
2467 break;
2468 }
2469 }
2470
2471 if (sa == NULL)
2472 {
2473 DBG2(DBG_KNL, "unable to query SAD entry with SPI %.8x%s",
2474 ntohl(id->spi), markstr);
2475 }
2476 else
2477 {
2478 if (bytes)
2479 {
2480 *bytes = sa->curlft.bytes;
2481 }
2482 if (packets)
2483 {
2484 *packets = sa->curlft.packets;
2485 }
2486 if (use_time)
2487 {
2488 uint64_t lastused = 0;
2489
2490 /* curlft.use_time contains the timestamp of the SA's first use, not
2491 * the last, but we might get the last use time in an attribute */
2492 if (this->sa_lastused && get_lastused(hdr, &lastused))
2493 {
2494 *use_time = time_monotonic(NULL) - (time(NULL) - lastused);
2495 }
2496 else
2497 {
2498 *use_time = 0;
2499 }
2500 }
2501 status = SUCCESS;
2502 }
2503 memwipe(out, len);
2504 free(out);
2505 return status;
2506}
2507
2508METHOD(kernel_ipsec_t, del_sa, status_t,
2509 private_kernel_netlink_ipsec_t *this, kernel_ipsec_sa_id_t *id,
2510 kernel_ipsec_del_sa_t *data)
2511{
2512 netlink_buf_t request;
2513 struct nlmsghdr *hdr;
2514 struct xfrm_usersa_id *sa_id;
2515 char markstr[32] = "";
2516
2517 /* if IPComp was used, we first delete the additional IPComp SA */
2518 if (data->cpi)
2519 {
2520 kernel_ipsec_sa_id_t ipcomp_id = {
2521 .src = id->src,
2522 .dst = id->dst,
2523 .spi = htonl(ntohs(data->cpi)),
2524 .proto = IPPROTO_COMP,
2525 .mark = id->mark,
2526 };
2527 kernel_ipsec_del_sa_t ipcomp = {};
2528 del_sa(this, &ipcomp_id, &ipcomp);
2529 }
2530
2531 memset(&request, 0, sizeof(request));
2532 format_mark(markstr, sizeof(markstr), id->mark);
2533
2534 DBG2(DBG_KNL, "deleting SAD entry with SPI %.8x%s", ntohl(id->spi),
2535 markstr);
2536
2537 hdr = &request.hdr;
2538 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2539 hdr->nlmsg_type = XFRM_MSG_DELSA;
2540 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2541
2542 sa_id = NLMSG_DATA(hdr);
2543 host2xfrm(id->dst, &sa_id->daddr);
2544 sa_id->spi = id->spi;
2545 sa_id->proto = id->proto;
2546 sa_id->family = id->dst->get_family(id->dst);
2547
2548 if (!add_mark(hdr, sizeof(request), id->mark))
2549 {
2550 return FAILED;
2551 }
2552 if (id->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, id->if_id))
2553 {
2554 return FAILED;
2555 }
2556
2557 switch (this->socket_xfrm->send_ack(this->socket_xfrm, hdr))
2558 {
2559 case SUCCESS:
2560 DBG2(DBG_KNL, "deleted SAD entry with SPI %.8x%s",
2561 ntohl(id->spi), markstr);
2562 return SUCCESS;
2563 case NOT_FOUND:
2564 return NOT_FOUND;
2565 default:
2566 DBG1(DBG_KNL, "unable to delete SAD entry with SPI %.8x%s",
2567 ntohl(id->spi), markstr);
2568 return FAILED;
2569 }
2570}
2571
2572METHOD(kernel_ipsec_t, update_sa, status_t,
2573 private_kernel_netlink_ipsec_t *this, kernel_ipsec_sa_id_t *id,
2574 kernel_ipsec_update_sa_t *data)
2575{
2576 netlink_buf_t request;
2577 struct nlmsghdr *hdr, *out_hdr = NULL, *out = NULL;
2578 struct xfrm_usersa_id *sa_id;
2579 struct xfrm_usersa_info *sa;
2580 size_t len;
2581 struct rtattr *rta;
2582 size_t rtasize;
2583 struct xfrm_encap_tmpl* encap = NULL;
2584 struct xfrm_replay_state *replay = NULL;
2585 struct xfrm_replay_state_esn *replay_esn = NULL;
2586 struct xfrm_lifetime_cur *lifetime = NULL;
2587 bool replay_state_seen = FALSE;
2588 kernel_ipsec_del_sa_t del = { 0 };
2589 status_t status = FAILED;
2590 traffic_selector_t *ts;
2591 char markstr[32] = "";
2592
2593 /* if IPComp is used, we first update the IPComp SA */
2594 if (data->cpi)
2595 {
2596 kernel_ipsec_sa_id_t ipcomp_id = {
2597 .src = id->src,
2598 .dst = id->dst,
2599 .spi = htonl(ntohs(data->cpi)),
2600 .proto = IPPROTO_COMP,
2601 .mark = id->mark,
2602 .if_id = id->if_id,
2603 };
2604 kernel_ipsec_update_sa_t ipcomp = {
2605 .new_src = data->new_src,
2606 .new_dst = data->new_dst,
2607 .new_reqid = data->new_reqid,
2608 };
2609 update_sa(this, &ipcomp_id, &ipcomp);
2610 }
2611
2612 memset(&request, 0, sizeof(request));
2613 format_mark(markstr, sizeof(markstr), id->mark);
2614
2615 DBG3(DBG_KNL, "querying SAD entry with SPI %.8x%s for update",
2616 ntohl(id->spi), markstr);
2617
2618 /* query the existing SA first */
2619 hdr = &request.hdr;
2620 hdr->nlmsg_flags = NLM_F_REQUEST;
2621 hdr->nlmsg_type = XFRM_MSG_GETSA;
2622 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_id));
2623
2624 sa_id = NLMSG_DATA(hdr);
2625 host2xfrm(id->dst, &sa_id->daddr);
2626 sa_id->spi = id->spi;
2627 sa_id->proto = id->proto;
2628 sa_id->family = id->dst->get_family(id->dst);
2629
2630 if (!add_mark(hdr, sizeof(request), id->mark))
2631 {
2632 return FAILED;
2633 }
2634 if (id->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, id->if_id))
2635 {
2636 return FAILED;
2637 }
2638
2639 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
2640 {
2641 hdr = out;
2642 while (NLMSG_OK(hdr, len))
2643 {
2644 switch (hdr->nlmsg_type)
2645 {
2646 case XFRM_MSG_NEWSA:
2647 {
2648 out_hdr = hdr;
2649 break;
2650 }
2651 case NLMSG_ERROR:
2652 {
2653 netlink_log_error(hdr, "querying SAD entry failed");
2654 break;
2655 }
2656 default:
2657 hdr = NLMSG_NEXT(hdr, len);
2658 continue;
2659 case NLMSG_DONE:
2660 break;
2661 }
2662 break;
2663 }
2664 }
2665 if (!out_hdr)
2666 {
2667 DBG1(DBG_KNL, "unable to update SAD entry with SPI %.8x%s",
2668 ntohl(id->spi), markstr);
2669 goto failed;
2670 }
2671
2672 get_replay_state(this, id, &replay, &lifetime);
2673
2674 /* delete the old SA (without affecting the IPComp SA) */
2675 if (del_sa(this, id, &del) != SUCCESS)
2676 {
2677 DBG1(DBG_KNL, "unable to delete old SAD entry with SPI %.8x%s",
2678 ntohl(id->spi), markstr);
2679 goto failed;
2680 }
2681
2682 DBG2(DBG_KNL, "updating SAD entry with SPI %.8x%s from %#H..%#H to "
2683 "%#H..%#H", ntohl(id->spi), markstr, id->src, id->dst, data->new_src,
2684 data->new_dst);
2685 /* copy over the SA from out to request */
2686 hdr = &request.hdr;
2687 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2688 hdr->nlmsg_type = XFRM_MSG_NEWSA;
2689 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_info));
2690 sa = NLMSG_DATA(hdr);
2691 memcpy(sa, NLMSG_DATA(out_hdr), sizeof(struct xfrm_usersa_info));
2692 sa->family = data->new_dst->get_family(data->new_dst);
2693 if (data->new_reqid)
2694 {
2695 sa->reqid = data->new_reqid;
2696 }
2697
2698 if (!id->src->ip_equals(id->src, data->new_src))
2699 {
2700 host2xfrm(data->new_src, &sa->saddr);
2701
2702 ts = selector2ts(&sa->sel, TRUE);
2703 if (ts && ts->is_host(ts, id->src))
2704 {
2705 ts->set_address(ts, data->new_src);
2706 ts2subnet(ts, &sa->sel.saddr, &sa->sel.prefixlen_s);
2707 }
2708 DESTROY_IF(ts);
2709 }
2710 if (!id->dst->ip_equals(id->dst, data->new_dst))
2711 {
2712 host2xfrm(data->new_dst, &sa->id.daddr);
2713
2714 ts = selector2ts(&sa->sel, FALSE);
2715 if (ts && ts->is_host(ts, id->dst))
2716 {
2717 ts->set_address(ts, data->new_dst);
2718 ts2subnet(ts, &sa->sel.daddr, &sa->sel.prefixlen_d);
2719 }
2720 DESTROY_IF(ts);
2721 }
2722
2723 rta = XFRM_RTA(out_hdr, struct xfrm_usersa_info);
2724 rtasize = XFRM_PAYLOAD(out_hdr, struct xfrm_usersa_info);
2725 while (RTA_OK(rta, rtasize))
2726 {
2727 /* copy all attributes, but not XFRMA_ENCAP if we are disabling it */
2728 if (rta->rta_type != XFRMA_ENCAP || data->new_encap)
2729 {
2730 if (rta->rta_type == XFRMA_ENCAP)
2731 { /* update encap tmpl */
2732 encap = RTA_DATA(rta);
2733 encap->encap_sport = ntohs(data->new_src->get_port(data->new_src));
2734 encap->encap_dport = ntohs(data->new_dst->get_port(data->new_dst));
2735 }
2736 if (rta->rta_type == XFRMA_OFFLOAD_DEV)
2737 { /* update offload device */
2738 struct xfrm_user_offload *offload;
2739 host_t *local;
2740 char *ifname;
2741
2742 offload = RTA_DATA(rta);
2743 local = offload->flags & XFRM_OFFLOAD_INBOUND ? data->new_dst
2744 : data->new_src;
2745
2746 if (charon->kernel->get_interface(charon->kernel, local,
2747 &ifname))
2748 {
2749 offload->ifindex = if_nametoindex(ifname);
2750 free(ifname);
2751 }
2752 }
2753 if (rta->rta_type == XFRMA_REPLAY_ESN_VAL ||
2754 rta->rta_type == XFRMA_REPLAY_VAL)
2755 {
2756 replay_state_seen = TRUE;
2757 }
2758 netlink_add_attribute(hdr, rta->rta_type,
2759 chunk_create(RTA_DATA(rta), RTA_PAYLOAD(rta)),
2760 sizeof(request));
2761 }
2762 rta = RTA_NEXT(rta, rtasize);
2763 }
2764
2765 if (encap == NULL && data->new_encap)
2766 { /* add tmpl if we are enabling it */
2767 encap = netlink_reserve(hdr, sizeof(request), XFRMA_ENCAP,
2768 sizeof(*encap));
2769 if (!encap)
2770 {
2771 goto failed;
2772 }
2773 encap->encap_type = UDP_ENCAP_ESPINUDP;
2774 encap->encap_sport = ntohs(data->new_src->get_port(data->new_src));
2775 encap->encap_dport = ntohs(data->new_dst->get_port(data->new_dst));
2776 memset(&encap->encap_oa, 0, sizeof (xfrm_address_t));
2777 }
2778
2779 if (!replay_state_seen)
2780 {
2781 if (replay)
2782 {
2783 struct xfrm_replay_state *state;
2784
2785 state = netlink_reserve(hdr, sizeof(request), XFRMA_REPLAY_VAL,
2786 sizeof(*state));
2787 if (!state)
2788 {
2789 goto failed;
2790 }
2791 memcpy(state, replay, sizeof(*state));
2792 }
2793 else
2794 {
2795 DBG1(DBG_KNL, "unable to copy replay state from old SAD entry with "
2796 "SPI %.8x%s", ntohl(id->spi), markstr);
2797 }
2798 }
2799 if (lifetime)
2800 {
2801 struct xfrm_lifetime_cur *state;
2802
2803 state = netlink_reserve(hdr, sizeof(request), XFRMA_LTIME_VAL,
2804 sizeof(*state));
2805 if (!state)
2806 {
2807 goto failed;
2808 }
2809 memcpy(state, lifetime, sizeof(*state));
2810 }
2811 else
2812 {
2813 DBG1(DBG_KNL, "unable to copy usage stats from old SAD entry with "
2814 "SPI %.8x%s", ntohl(id->spi), markstr);
2815 }
2816
2817 if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
2818 {
2819 DBG1(DBG_KNL, "unable to update SAD entry with SPI %.8x%s",
2820 ntohl(id->spi), markstr);
2821 goto failed;
2822 }
2823
2824 status = SUCCESS;
2825failed:
2826 free(replay);
2827 free(replay_esn);
2828 free(lifetime);
2829 memwipe(out, len);
2830 memwipe(&request, sizeof(request));
2831 free(out);
2832
2833 return status;
2834}
2835
2836METHOD(kernel_ipsec_t, flush_sas, status_t,
2837 private_kernel_netlink_ipsec_t *this)
2838{
2839 netlink_buf_t request;
2840 struct nlmsghdr *hdr;
2841 struct xfrm_usersa_flush *flush;
2842 struct {
2843 uint8_t proto;
2844 char *name;
2845 } protos[] = {
2846 { IPPROTO_AH, "AH" },
2847 { IPPROTO_ESP, "ESP" },
2848 { IPPROTO_COMP, "IPComp" },
2849 };
2850 int i;
2851
2852 memset(&request, 0, sizeof(request));
2853
2854 hdr = &request.hdr;
2855 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2856 hdr->nlmsg_type = XFRM_MSG_FLUSHSA;
2857 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush));
2858
2859 flush = NLMSG_DATA(hdr);
2860
2861 for (i = 0; i < countof(protos); i++)
2862 {
2863 DBG2(DBG_KNL, "flushing all %s SAD entries", protos[i].name);
2864
2865 flush->proto = protos[i].proto;
2866
2867 if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
2868 {
2869 DBG1(DBG_KNL, "unable to flush %s SAD entries", protos[i].name);
2870 return FAILED;
2871 }
2872 }
2873 return SUCCESS;
2874}
2875
2876/**
2877 * Unlock the mutex and signal waiting threads
2878 */
2879static void policy_change_done(private_kernel_netlink_ipsec_t *this,
2880 policy_entry_t *policy)
2881{
2882 policy->working = FALSE;
2883 if (policy->waiting)
2884 { /* don't need to wake threads waiting for other policies */
2885 this->condvar->broadcast(this->condvar);
2886 }
2887 this->mutex->unlock(this->mutex);
2888}
2889
2890/**
2891 * Find an XFRM interface with the given ID
2892 */
2893static bool find_xfrmi(private_kernel_netlink_ipsec_t *this, uint32_t target,
2894 char **if_name)
2895{
2896 enumerator_t *enumerator;
2897 char *name;
2898 uint32_t if_id;
2899
2900 enumerator = this->xfrmi->create_enumerator(this->xfrmi);
2901 while (enumerator->enumerate(enumerator, &name, &if_id, NULL, NULL))
2902 {
2903 if (if_id == target)
2904 {
2905 *if_name = strdup(name);
2906 enumerator->destroy(enumerator);
2907 return TRUE;
2908 }
2909 }
2910 enumerator->destroy(enumerator);
2911 return FALSE;
2912}
2913
2914/**
2915 * Install a route for the given policy if enabled and required
2916 */
2917static void install_route(private_kernel_netlink_ipsec_t *this,
2918 policy_entry_t *policy, policy_sa_t *mapping, ipsec_sa_t *ipsec)
2919{
2920 policy_sa_out_t *out = (policy_sa_out_t*)mapping;
2921 route_entry_t *route;
2922 host_t *iface;
2923
2924 INIT(route,
2925 .prefixlen = policy->sel.prefixlen_d,
2926 .pass = mapping->type == POLICY_PASS,
2927 );
2928
2929 if (charon->kernel->get_address_by_ts(charon->kernel, out->src_ts,
2930 &route->src_ip, NULL) != SUCCESS)
2931 {
2932 if (!route->pass)
2933 {
2934 free(route);
2935 return;
2936 }
2937 /* allow blank source IP for passthrough policies */
2938 route->src_ip = host_create_any(policy->sel.family);
2939 }
2940
2941 if (!ipsec->dst->is_anyaddr(ipsec->dst))
2942 {
2943 /* if if_ids are used, install a route via XFRM interface if any,
2944 * otherwise install the route via the interface we reach the peer */
2945 if (!policy->if_id || !this->xfrmi ||
2946 !find_xfrmi(this, policy->if_id, &route->if_name))
2947 {
2948 route->gateway = charon->kernel->get_nexthop(charon->kernel,
2949 ipsec->dst, -1, ipsec->src,
2950 &route->if_name);
2951 }
2952 }
2953 else
2954 { /* for shunt policies */
2955 iface = xfrm2host(policy->sel.family, &policy->sel.daddr, 0);
2956 route->gateway = charon->kernel->get_nexthop(charon->kernel,
2957 iface, policy->sel.prefixlen_d,
2958 route->src_ip, &route->if_name);
2959 iface->destroy(iface);
2960 }
2961 route->dst_net = chunk_alloc(policy->sel.family == AF_INET ? 4 : 16);
2962 memcpy(route->dst_net.ptr, &policy->sel.daddr, route->dst_net.len);
2963
2964 /* get the interface to install the route for, if we haven't one yet.
2965 * If we have a local address, use it. Otherwise (for shunt policies)
2966 * use the route's source address. */
2967 if (!route->if_name)
2968 {
2969 iface = ipsec->src;
2970 if (iface->is_anyaddr(iface))
2971 {
2972 iface = route->src_ip;
2973 }
2974 if (!charon->kernel->get_interface(charon->kernel, iface,
2975 &route->if_name) &&
2976 !route->pass)
2977 { /* don't require an interface for passthrough policies */
2978 route_entry_destroy(route);
2979 return;
2980 }
2981 }
2982 if (policy->route)
2983 {
2984 route_entry_t *old = policy->route;
2985 if (route_entry_equals(old, route))
2986 {
2987 route_entry_destroy(route);
2988 return;
2989 }
2990 /* uninstall previously installed route */
2991 if (charon->kernel->del_route(charon->kernel, old->dst_net,
2992 old->prefixlen, old->gateway,
2993 old->src_ip, old->if_name,
2994 old->pass) != SUCCESS)
2995 {
2996 DBG1(DBG_KNL, "error uninstalling route installed with policy "
2997 "%R === %R %N", out->src_ts, out->dst_ts, policy_dir_names,
2998 policy->direction);
2999 }
3000 route_entry_destroy(old);
3001 policy->route = NULL;
3002 }
3003
3004 DBG2(DBG_KNL, "installing route: %R via %H src %H dev %s", out->dst_ts,
3005 route->gateway, route->src_ip, route->if_name);
3006 switch (charon->kernel->add_route(charon->kernel, route->dst_net,
3007 route->prefixlen, route->gateway,
3008 route->src_ip, route->if_name,
3009 route->pass))
3010 {
3011 default:
3012 DBG1(DBG_KNL, "unable to install source route for %H",
3013 route->src_ip);
3014 /* FALL */
3015 case ALREADY_DONE:
3016 /* route exists, do not uninstall */
3017 route_entry_destroy(route);
3018 break;
3019 case SUCCESS:
3020 /* cache the installed route */
3021 policy->route = route;
3022 break;
3023 }
3024}
3025
3026/**
3027 * Add or update a policy in the kernel.
3028 *
3029 * Note: The mutex has to be locked when entering this function
3030 * and is unlocked here in any case.
3031 */
3032static status_t add_policy_internal(private_kernel_netlink_ipsec_t *this,
3033 policy_entry_t *policy, policy_sa_t *mapping, bool update)
3034{
3035 netlink_buf_t request;
3036 policy_entry_t clone;
3037 ipsec_sa_t *ipsec = mapping->sa;
3038 struct xfrm_userpolicy_info *policy_info;
3039 struct xfrm_user_offload *offload = NULL;
3040 struct nlmsghdr *hdr;
3041 status_t status;
3042 int i;
3043
3044 /* clone the policy so we are able to check it out again later */
3045 memcpy(&clone, policy, sizeof(policy_entry_t));
3046
3047 memset(&request, 0, sizeof(request));
3048 hdr = &request.hdr;
3049 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3050 hdr->nlmsg_type = update ? XFRM_MSG_UPDPOLICY : XFRM_MSG_NEWPOLICY;
3051 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info));
3052
3053 policy_info = NLMSG_DATA(hdr);
3054 policy_info->sel = policy->sel;
3055 policy_info->dir = policy->direction;
3056
3057 if (mapping->pcpu_acquires)
3058 {
3059 policy_info->flags |= XFRM_POLICY_CPU_ACQUIRE;
3060 }
3061
3062 /* calculate priority based on selector size, small size = high prio */
3063 policy_info->priority = mapping->priority;
3064 policy_info->action = mapping->type != POLICY_DROP ? XFRM_POLICY_ALLOW
3065 : XFRM_POLICY_BLOCK;
3066 policy_info->share = XFRM_SHARE_ANY;
3067
3068 /* policies don't expire */
3069 policy_info->lft.soft_byte_limit = XFRM_INF;
3070 policy_info->lft.soft_packet_limit = XFRM_INF;
3071 policy_info->lft.hard_byte_limit = XFRM_INF;
3072 policy_info->lft.hard_packet_limit = XFRM_INF;
3073 policy_info->lft.soft_add_expires_seconds = 0;
3074 policy_info->lft.hard_add_expires_seconds = 0;
3075 policy_info->lft.soft_use_expires_seconds = 0;
3076 policy_info->lft.hard_use_expires_seconds = 0;
3077
3078 if (mapping->type == POLICY_IPSEC && ipsec->cfg.reqid)
3079 {
3080 struct xfrm_user_tmpl *tmpl;
3081 struct {
3082 uint8_t proto;
3083 uint32_t spi;
3084 bool use;
3085 } protos[] = {
3086 { IPPROTO_COMP, htonl(ntohs(ipsec->cfg.ipcomp.cpi)),
3087 ipsec->cfg.ipcomp.transform != IPCOMP_NONE },
3088 { IPPROTO_ESP, ipsec->cfg.esp.spi, ipsec->cfg.esp.use },
3089 { IPPROTO_AH, ipsec->cfg.ah.spi, ipsec->cfg.ah.use },
3090 };
3091 ipsec_mode_t proto_mode = ipsec->cfg.mode;
3092 int count = 0;
3093
3094 for (i = 0; i < countof(protos); i++)
3095 {
3096 if (protos[i].use)
3097 {
3098 count++;
3099 }
3100 }
3101 tmpl = netlink_reserve(hdr, sizeof(request), XFRMA_TMPL,
3102 count * sizeof(*tmpl));
3103 if (!tmpl)
3104 {
3105 policy_change_done(this, policy);
3106 return FAILED;
3107 }
3108
3109 for (i = 0; i < countof(protos); i++)
3110 {
3111 if (!protos[i].use)
3112 {
3113 continue;
3114 }
3115 tmpl->reqid = ipsec->cfg.reqid;
3116 tmpl->id.proto = protos[i].proto;
3117 /* in order to match SAs with all matching labels, we can't have the
3118 * SPI in the template, similarly for per-CPU policies and sub-SAs */
3119 if (policy->direction == POLICY_OUT && !policy->label &&
3120 !mapping->pcpu_acquires)
3121 {
3122 tmpl->id.spi = protos[i].spi;
3123 }
3124 tmpl->aalgos = tmpl->ealgos = tmpl->calgos = ~0;
3125 tmpl->mode = mode2kernel(proto_mode);
3126 tmpl->optional = protos[i].proto == IPPROTO_COMP &&
3127 policy->direction != POLICY_OUT;
3128 tmpl->family = ipsec->src->get_family(ipsec->src);
3129
3130 if (proto_mode == MODE_TUNNEL || proto_mode == MODE_BEET ||
3131 proto_mode == MODE_IPTFS)
3132 { /* only for tunnel mode */
3133 host2xfrm(ipsec->src, &tmpl->saddr);
3134 host2xfrm(ipsec->dst, &tmpl->id.daddr);
3135 }
3136
3137 tmpl++;
3138
3139 /* use transport mode for other SAs */
3140 proto_mode = MODE_TRANSPORT;
3141 }
3142 }
3143
3144 if (!add_mark(hdr, sizeof(request), ipsec->mark))
3145 {
3146 policy_change_done(this, policy);
3147 return FAILED;
3148 }
3149 if (ipsec->if_id &&
3150 !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, ipsec->if_id))
3151 {
3152 policy_change_done(this, policy);
3153 return FAILED;
3154 }
3155 if (!add_label(hdr, sizeof(request), policy->label))
3156 {
3157 policy_change_done(this, policy);
3158 return FAILED;
3159 }
3160 /* make sure this is the last attribute added to the message */
3161 if (!add_hw_offload_policy(hdr, sizeof(request), policy, mapping, &offload))
3162 {
3163 policy_change_done(this, policy);
3164 return FAILED;
3165 }
3166 this->mutex->unlock(this->mutex);
3167
3168 status = this->socket_xfrm->send_ack(this->socket_xfrm, hdr);
3169
3170 if (status != SUCCESS && offload && mapping->sa->hw_offload == HW_OFFLOAD_AUTO)
3171 {
3172 DBG1(DBG_KNL, "failed to install SA with %N HW offload, trying without "
3173 "offload", hw_offload_names, HW_OFFLOAD_PACKET);
3174 /* the kernel only allows offloading with packet offload and rejects
3175 * the attribute if that flag is not set, so remove it again */
3176 hdr->nlmsg_len -= RTA_ALIGN(RTA_LENGTH(sizeof(*offload)));
3177 status = this->socket_xfrm->send_ack(this->socket_xfrm, hdr);
3178 }
3179
3180 if (status == ALREADY_DONE && !update)
3181 {
3182 DBG1(DBG_KNL, "policy already exists, try to update it");
3183 hdr->nlmsg_type = XFRM_MSG_UPDPOLICY;
3184 status = this->socket_xfrm->send_ack(this->socket_xfrm, hdr);
3185 }
3186
3187 this->mutex->lock(this->mutex);
3188 if (status != SUCCESS)
3189 {
3190 policy_change_done(this, policy);
3191 return FAILED;
3192 }
3193 /* install a route, if:
3194 * - this is an outbound policy (to just get one for each child)
3195 * - routing is not disabled via strongswan.conf
3196 * - the selector is not for a specific protocol/port
3197 * - routes via XFRM interfaces are enabled or no interface ID is configured
3198 * - we are in tunnel/BEET mode or install a bypass policy
3199 */
3200 if (policy->direction == POLICY_OUT && this->install_routes &&
3201 !policy->sel.proto && !policy->sel.dport && !policy->sel.sport &&
3202 (this->install_routes_xfrmi || !policy->if_id))
3203 {
3204 if (mapping->type == POLICY_PASS ||
3205 (mapping->type == POLICY_IPSEC && ipsec->cfg.mode != MODE_TRANSPORT))
3206 {
3207 install_route(this, policy, mapping, ipsec);
3208 }
3209 }
3210 policy_change_done(this, policy);
3211 return SUCCESS;
3212}
3213
3214METHOD(kernel_ipsec_t, add_policy, status_t,
3215 private_kernel_netlink_ipsec_t *this, kernel_ipsec_policy_id_t *id,
3216 kernel_ipsec_manage_policy_t *data)
3217{
3218 policy_entry_t *policy, *current;
3219 policy_sa_t *assigned_sa, *current_sa;
3220 enumerator_t *enumerator;
3221 bool found = FALSE, update = TRUE;
3222 char markstr[32] = "", labelstr[128] = "";
3223 uint32_t cur_priority DBG_UNUSED = 0;
3224 int use_count DBG_UNUSED;
3225
3226 /* create a policy */
3227 INIT(policy,
3228 .sel = ts2selector(id->src_ts, id->dst_ts, id->interface),
3229 .mark = id->mark.value & id->mark.mask,
3230 .if_id = id->if_id,
3231 .label = id->label ? id->label->clone(id->label) : NULL,
3232 .direction = id->dir,
3233 .reqid = data->sa->reqid,
3234 );
3235 format_mark(markstr, sizeof(markstr), id->mark);
3236 format_label(labelstr, sizeof(labelstr), id->label);
3237
3238 /* find the policy, which matches EXACTLY */
3239 this->mutex->lock(this->mutex);
3240 current = this->policies->get(this->policies, policy);
3241 if (current)
3242 { /* use existing policy */
3243 DBG2(DBG_KNL, "policy %R === %R %N%s%s already exists, increasing "
3244 "refcount", id->src_ts, id->dst_ts, policy_dir_names, id->dir,
3245 markstr, labelstr);
3246 policy_entry_destroy(this, policy);
3247 policy = current;
3248 found = TRUE;
3249
3250 policy->waiting++;
3251 while (policy->working)
3252 {
3253 this->condvar->wait(this->condvar, this->mutex);
3254 }
3255 policy->waiting--;
3256 policy->working = TRUE;
3257 }
3258 else
3259 { /* use the new one, if we have no such policy */
3260 policy->used_by = linked_list_create();
3261 this->policies->put(this->policies, policy, policy);
3262 }
3263
3264 /* cache the assigned IPsec SA */
3265 assigned_sa = policy_sa_create(this, id->dir, data->type, data->src,
3266 data->dst, id->src_ts, id->dst_ts, id->mark,
3267 id->if_id, data->hw_offload,
3268 data->pcpu_acquires, data->sa);
3269 assigned_sa->auto_priority = get_priority(policy, data->prio, id->interface);
3270 assigned_sa->priority = this->get_priority ? this->get_priority(id, data)
3271 : data->manual_prio;
3272 assigned_sa->priority = assigned_sa->priority ?: assigned_sa->auto_priority;
3273
3274 /* insert the SA according to its priority */
3275 enumerator = policy->used_by->create_enumerator(policy->used_by);
3276 while (enumerator->enumerate(enumerator, (void**)&current_sa))
3277 {
3278 if (current_sa->priority > assigned_sa->priority)
3279 {
3280 break;
3281 }
3282 if (current_sa->priority == assigned_sa->priority)
3283 {
3284 /* in case of equal manual prios order SAs by automatic priority */
3285 if (current_sa->auto_priority > assigned_sa->auto_priority)
3286 {
3287 break;
3288 }
3289 /* prefer SAs with a reqid over those without */
3290 if (current_sa->auto_priority == assigned_sa->auto_priority &&
3291 (!current_sa->sa->cfg.reqid || assigned_sa->sa->cfg.reqid))
3292 {
3293 break;
3294 }
3295 }
3296 if (update)
3297 {
3298 cur_priority = current_sa->priority;
3299 update = FALSE;
3300 }
3301 }
3302 policy->used_by->insert_before(policy->used_by, enumerator, assigned_sa);
3303 enumerator->destroy(enumerator);
3304
3305 use_count = policy->used_by->get_count(policy->used_by);
3306 if (!update)
3307 { /* we don't update the policy if the priority is lower than that of
3308 * the currently installed one */
3309 policy_change_done(this, policy);
3310 DBG2(DBG_KNL, "not updating policy %R === %R %N%s%s [priority %u, "
3311 "refcount %d]", id->src_ts, id->dst_ts, policy_dir_names,
3312 id->dir, markstr, labelstr, cur_priority, use_count);
3313 return SUCCESS;
3314 }
3315 if (policy->reqid != assigned_sa->sa->cfg.reqid)
3316 {
3317 DBG1(DBG_CFG, "updating reqid for policy %R === %R %N%s%s from %u "
3318 "to %u", id->src_ts, id->dst_ts, policy_dir_names, id->dir,
3319 markstr, labelstr, policy->reqid, assigned_sa->sa->cfg.reqid);
3320 policy->reqid = assigned_sa->sa->cfg.reqid;
3321 }
3322
3323 if (this->policy_update)
3324 {
3325 found = TRUE;
3326 }
3327
3328 DBG2(DBG_KNL, "%s policy %R === %R %N%s%s [priority %u, refcount %d]",
3329 found ? "updating" : "adding", id->src_ts, id->dst_ts,
3330 policy_dir_names, id->dir, markstr, labelstr, assigned_sa->priority,
3331 use_count);
3332
3333 if (add_policy_internal(this, policy, assigned_sa, found) != SUCCESS)
3334 {
3335 DBG1(DBG_KNL, "unable to %s policy %R === %R %N%s%s",
3336 found ? "update" : "add", id->src_ts, id->dst_ts,
3337 policy_dir_names, id->dir, markstr, labelstr);
3338 return FAILED;
3339 }
3340 return SUCCESS;
3341}
3342
3343METHOD(kernel_ipsec_t, query_policy, status_t,
3344 private_kernel_netlink_ipsec_t *this, kernel_ipsec_policy_id_t *id,
3345 kernel_ipsec_query_policy_t *data, time_t *use_time)
3346{
3347 netlink_buf_t request;
3348 struct nlmsghdr *out = NULL, *hdr;
3349 struct xfrm_userpolicy_id *policy_id;
3350 struct xfrm_userpolicy_info *policy = NULL;
3351 size_t len;
3352 char markstr[32] = "", labelstr[128] = "";
3353
3354 memset(&request, 0, sizeof(request));
3355 format_mark(markstr, sizeof(markstr), id->mark);
3356 format_label(labelstr, sizeof(labelstr), id->label);
3357
3358 DBG3(DBG_KNL, "querying policy %R === %R %N%s%s", id->src_ts, id->dst_ts,
3359 policy_dir_names, id->dir, markstr, labelstr);
3360
3361 hdr = &request.hdr;
3362 hdr->nlmsg_flags = NLM_F_REQUEST;
3363 hdr->nlmsg_type = XFRM_MSG_GETPOLICY;
3364 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
3365
3366 policy_id = NLMSG_DATA(hdr);
3367 policy_id->sel = ts2selector(id->src_ts, id->dst_ts, id->interface);
3368 policy_id->dir = id->dir;
3369
3370 if (!add_mark(hdr, sizeof(request), id->mark))
3371 {
3372 return FAILED;
3373 }
3374 if (id->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, id->if_id))
3375 {
3376 return FAILED;
3377 }
3378 if (!add_label(hdr, sizeof(request), id->label))
3379 {
3380 return FAILED;
3381 }
3382
3383 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
3384 {
3385 hdr = out;
3386 while (NLMSG_OK(hdr, len))
3387 {
3388 switch (hdr->nlmsg_type)
3389 {
3390 case XFRM_MSG_NEWPOLICY:
3391 {
3392 policy = NLMSG_DATA(hdr);
3393 break;
3394 }
3395 case NLMSG_ERROR:
3396 {
3397 netlink_log_error(hdr, "querying policy failed");
3398 break;
3399 }
3400 default:
3401 hdr = NLMSG_NEXT(hdr, len);
3402 continue;
3403 case NLMSG_DONE:
3404 break;
3405 }
3406 break;
3407 }
3408 }
3409
3410 if (policy == NULL)
3411 {
3412 DBG2(DBG_KNL, "unable to query policy %R === %R %N%s", id->src_ts,
3413 id->dst_ts, policy_dir_names, id->dir, markstr);
3414 free(out);
3415 return FAILED;
3416 }
3417
3418 if (policy->curlft.use_time)
3419 {
3420 /* we need the monotonic time, but the kernel returns system time. */
3421 *use_time = time_monotonic(NULL) - (time(NULL) - policy->curlft.use_time);
3422 }
3423 else
3424 {
3425 *use_time = 0;
3426 }
3427
3428 free(out);
3429 return SUCCESS;
3430}
3431
3432METHOD(kernel_ipsec_t, del_policy, status_t,
3433 private_kernel_netlink_ipsec_t *this, kernel_ipsec_policy_id_t *id,
3434 kernel_ipsec_manage_policy_t *data)
3435{
3436 policy_entry_t *current, policy;
3437 enumerator_t *enumerator;
3438 policy_sa_t *mapping;
3439 netlink_buf_t request;
3440 struct nlmsghdr *hdr;
3441 struct xfrm_userpolicy_id *policy_id;
3442 bool is_installed = TRUE;
3443 uint32_t priority, auto_priority, cur_priority DBG_UNUSED;
3444 ipsec_sa_t assigned_sa = {
3445 .src = data->src,
3446 .dst = data->dst,
3447 .mark = id->mark,
3448 .if_id = id->if_id,
3449 .hw_offload = data->hw_offload,
3450 .cfg = *data->sa,
3451 };
3452 char markstr[32] = "", labelstr[128] = "";
3453 int use_count;
3454 status_t status = SUCCESS;
3455
3456 format_mark(markstr, sizeof(markstr), id->mark);
3457 format_label(labelstr, sizeof(labelstr), id->label);
3458
3459 DBG2(DBG_KNL, "deleting policy %R === %R %N%s%s", id->src_ts, id->dst_ts,
3460 policy_dir_names, id->dir, markstr, labelstr);
3461
3462 /* create a policy */
3463 memset(&policy, 0, sizeof(policy_entry_t));
3464 policy.sel = ts2selector(id->src_ts, id->dst_ts, id->interface);
3465 policy.mark = id->mark.value & id->mark.mask;
3466 policy.if_id = id->if_id;
3467 policy.label = id->label;
3468 policy.direction = id->dir;
3469
3470 /* find the policy */
3471 this->mutex->lock(this->mutex);
3472 current = this->policies->get(this->policies, &policy);
3473 if (!current)
3474 {
3475 DBG1(DBG_KNL, "deleting policy %R === %R %N%s%s failed, not found",
3476 id->src_ts, id->dst_ts, policy_dir_names, id->dir, markstr,
3477 labelstr);
3478 this->mutex->unlock(this->mutex);
3479 return NOT_FOUND;
3480 }
3481 current->waiting++;
3482 while (current->working)
3483 {
3484 this->condvar->wait(this->condvar, this->mutex);
3485 }
3486 current->working = TRUE;
3487 current->waiting--;
3488
3489 /* remove mapping to SA by reqid and priority */
3490 auto_priority = get_priority(current, data->prio, id->interface);
3491 priority = this->get_priority ? this->get_priority(id, data)
3492 : data->manual_prio;
3493 priority = priority ?: auto_priority;
3494
3495 enumerator = current->used_by->create_enumerator(current->used_by);
3496 while (enumerator->enumerate(enumerator, (void**)&mapping))
3497 {
3498 if (priority == mapping->priority &&
3499 auto_priority == mapping->auto_priority &&
3500 data->type == mapping->type &&
3501 data->pcpu_acquires == mapping->pcpu_acquires &&
3502 ipsec_sa_equals(mapping->sa, &assigned_sa))
3503 {
3504 current->used_by->remove_at(current->used_by, enumerator);
3505 policy_sa_destroy(mapping, id->dir, this);
3506 break;
3507 }
3508 if (is_installed)
3509 {
3510 cur_priority = mapping->priority;
3511 is_installed = FALSE;
3512 }
3513 }
3514 enumerator->destroy(enumerator);
3515
3516 use_count = current->used_by->get_count(current->used_by);
3517 if (use_count > 0)
3518 { /* policy is used by more SAs, keep in kernel */
3519 DBG2(DBG_KNL, "policy still used by another CHILD_SA, not removed");
3520 if (!is_installed)
3521 { /* no need to update as the policy was not installed for this SA */
3522 policy_change_done(this, current);
3523 DBG2(DBG_KNL, "not updating policy %R === %R %N%s%s [priority %u, "
3524 "refcount %d]", id->src_ts, id->dst_ts, policy_dir_names,
3525 id->dir, markstr, labelstr, cur_priority, use_count);
3526 return SUCCESS;
3527 }
3528 current->used_by->get_first(current->used_by, (void**)&mapping);
3529 if (current->reqid != mapping->sa->cfg.reqid)
3530 {
3531 DBG1(DBG_CFG, "updating reqid for policy %R === %R %N%s%s from %u "
3532 "to %u", id->src_ts, id->dst_ts, policy_dir_names, id->dir,
3533 markstr, labelstr, current->reqid, mapping->sa->cfg.reqid);
3534 current->reqid = mapping->sa->cfg.reqid;
3535 }
3536
3537 DBG2(DBG_KNL, "updating policy %R === %R %N%s%s [priority %u, "
3538 "refcount %d]", id->src_ts, id->dst_ts, policy_dir_names, id->dir,
3539 markstr, labelstr, mapping->priority, use_count);
3540
3541 if (add_policy_internal(this, current, mapping, TRUE) != SUCCESS)
3542 {
3543 DBG1(DBG_KNL, "unable to update policy %R === %R %N%s%s",
3544 id->src_ts, id->dst_ts, policy_dir_names, id->dir, markstr,
3545 labelstr);
3546 return FAILED;
3547 }
3548 return SUCCESS;
3549 }
3550
3551 memset(&request, 0, sizeof(request));
3552
3553 hdr = &request.hdr;
3554 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3555 hdr->nlmsg_type = XFRM_MSG_DELPOLICY;
3556 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
3557
3558 policy_id = NLMSG_DATA(hdr);
3559 policy_id->sel = current->sel;
3560 policy_id->dir = id->dir;
3561
3562 if (!add_mark(hdr, sizeof(request), id->mark))
3563 {
3564 policy_change_done(this, current);
3565 return FAILED;
3566 }
3567 if (id->if_id && !add_uint32(hdr, sizeof(request), XFRMA_IF_ID, id->if_id))
3568 {
3569 policy_change_done(this, current);
3570 return FAILED;
3571 }
3572 if (!add_label(hdr, sizeof(request), id->label))
3573 {
3574 policy_change_done(this, current);
3575 return FAILED;
3576 }
3577
3578 if (current->route)
3579 {
3580 route_entry_t *route = current->route;
3581 if (charon->kernel->del_route(charon->kernel, route->dst_net,
3582 route->prefixlen, route->gateway,
3583 route->src_ip, route->if_name,
3584 route->pass) != SUCCESS)
3585 {
3586 DBG1(DBG_KNL, "error uninstalling route installed with policy "
3587 "%R === %R %N%s%s", id->src_ts, id->dst_ts, policy_dir_names,
3588 id->dir, markstr, labelstr);
3589 }
3590 }
3591 this->mutex->unlock(this->mutex);
3592
3593 if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
3594 {
3595 DBG1(DBG_KNL, "unable to delete policy %R === %R %N%s%s", id->src_ts,
3596 id->dst_ts, policy_dir_names, id->dir, markstr, labelstr);
3597 status = FAILED;
3598 }
3599
3600 this->mutex->lock(this->mutex);
3601 if (!current->waiting)
3602 { /* only if no other thread still needs the policy */
3603 this->policies->remove(this->policies, current);
3604 policy_entry_destroy(this, current);
3605 this->mutex->unlock(this->mutex);
3606 }
3607 else
3608 {
3609 policy_change_done(this, current);
3610 }
3611 return status;
3612}
3613
3614METHOD(kernel_ipsec_t, flush_policies, status_t,
3615 private_kernel_netlink_ipsec_t *this)
3616{
3617 netlink_buf_t request;
3618 struct nlmsghdr *hdr;
3619
3620 memset(&request, 0, sizeof(request));
3621
3622 DBG2(DBG_KNL, "flushing all policies from SPD");
3623
3624 hdr = &request.hdr;
3625 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3626 hdr->nlmsg_type = XFRM_MSG_FLUSHPOLICY;
3627 hdr->nlmsg_len = NLMSG_LENGTH(0); /* no data associated */
3628
3629 /* by adding an rtattr of type XFRMA_POLICY_TYPE we could restrict this
3630 * to main or sub policies (default is main) */
3631
3632 if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
3633 {
3634 DBG1(DBG_KNL, "unable to flush SPD entries");
3635 return FAILED;
3636 }
3637 return SUCCESS;
3638}
3639
3640/**
3641 * Bypass socket using a per-socket policy
3642 */
3643static bool add_socket_bypass(private_kernel_netlink_ipsec_t *this,
3644 int fd, int family)
3645{
3646 struct xfrm_userpolicy_info policy;
3647 u_int sol, ipsec_policy;
3648
3649 switch (family)
3650 {
3651 case AF_INET:
3652 sol = SOL_IP;
3653 ipsec_policy = IP_XFRM_POLICY;
3654 break;
3655 case AF_INET6:
3656 sol = SOL_IPV6;
3657 ipsec_policy = IPV6_XFRM_POLICY;
3658 break;
3659 default:
3660 return FALSE;
3661 }
3662
3663 memset(&policy, 0, sizeof(policy));
3664 policy.action = XFRM_POLICY_ALLOW;
3665 policy.sel.family = family;
3666
3667 policy.dir = XFRM_POLICY_OUT;
3668 if (setsockopt(fd, sol, ipsec_policy, &policy, sizeof(policy)) < 0)
3669 {
3670 DBG1(DBG_KNL, "unable to set IPSEC_POLICY on socket: %s (%d)",
3671 strerror(errno), errno);
3672 return FALSE;
3673 }
3674 policy.dir = XFRM_POLICY_IN;
3675 if (setsockopt(fd, sol, ipsec_policy, &policy, sizeof(policy)) < 0)
3676 {
3677 DBG1(DBG_KNL, "unable to set IPSEC_POLICY on socket: %s (%d)",
3678 strerror(errno), errno);
3679 return FALSE;
3680 }
3681 return TRUE;
3682}
3683
3684/**
3685 * Keep track of interface and its offload support
3686 */
3687typedef struct {
3688
3689 /**
3690 * Interface index
3691 */
3692 int ifindex;
3693
3694 /**
3695 * Name of the interface
3696 */
3697 char ifname[IFNAMSIZ];
3698
3699 /**
3700 * Interface flags
3701 */
3702 u_int flags;
3703
3704 /**
3705 * Offload state
3706 */
3707 enum {
3708 /** Offload support unknown */
3709 IFACE_OFFLOAD_UNKNOWN,
3710 /** No offload supported */
3711 IFACE_OFFLOAD_NONE,
3712 /** Interface supports at least crypto offload */
3713 IFACE_OFFLOAD_DETECTED,
3714 /** Interface supports crypto offload, but no packet and policy offload */
3715 IFACE_OFFLOAD_CRYPTO,
3716 /** Packet and policy offload supported */
3717 IFACE_OFFLOAD_PACKET,
3718 } offload;
3719
3720} offload_iface_t;
3721
3722/**
3723 * Port based IKE bypass policy
3724 */
3725typedef struct {
3726 /** address family */
3727 int family;
3728 /** layer 4 protocol */
3729 int proto;
3730 /** port number, network order */
3731 uint16_t port;
3732} bypass_t;
3733
3734/**
3735 * Add or remove a bypass policy from/to kernel. If an interface is given,
3736 * the policy is tried to be offloaded to that interface.
3737 */
3738static bool manage_bypass(private_kernel_netlink_ipsec_t *this,
3739 int type, policy_dir_t dir, bypass_t *bypass,
3740 char *ifname)
3741{
3742 netlink_buf_t request;
3743 struct xfrm_selector *sel;
3744 struct xfrm_user_offload *offload = NULL;
3745 struct nlmsghdr *hdr;
3746
3747 memset(&request, 0, sizeof(request));
3748 hdr = &request.hdr;
3749 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3750 hdr->nlmsg_type = type;
3751
3752 if (type == XFRM_MSG_NEWPOLICY)
3753 {
3754 struct xfrm_userpolicy_info *policy;
3755
3756 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info));
3757
3758 policy = NLMSG_DATA(hdr);
3759 policy->dir = dir;
3760 policy->priority = 32;
3761 policy->action = XFRM_POLICY_ALLOW;
3762 policy->share = XFRM_SHARE_ANY;
3763
3764 policy->lft.soft_byte_limit = XFRM_INF;
3765 policy->lft.soft_packet_limit = XFRM_INF;
3766 policy->lft.hard_byte_limit = XFRM_INF;
3767 policy->lft.hard_packet_limit = XFRM_INF;
3768
3769 sel = &policy->sel;
3770
3771 if (ifname &&
3772 !add_hw_offload(hdr, sizeof(request), NULL, ifname,
3773 HW_OFFLOAD_PACKET, &offload))
3774 {
3775 return FALSE;
3776 }
3777 }
3778 else /* XFRM_MSG_DELPOLICY */
3779 {
3780 struct xfrm_userpolicy_id *policy;
3781
3782 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id));
3783
3784 policy = NLMSG_DATA(hdr);
3785 policy->dir = dir;
3786
3787 sel = &policy->sel;
3788 }
3789
3790 sel->family = bypass->family;
3791 sel->proto = bypass->proto;
3792 if (dir == POLICY_IN)
3793 {
3794 sel->dport = bypass->port;
3795 sel->dport_mask = 0xffff;
3796 }
3797 else
3798 {
3799 sel->sport = bypass->port;
3800 sel->sport_mask = 0xffff;
3801 }
3802 if (ifname)
3803 {
3804 sel->ifindex = if_nametoindex(ifname);
3805 }
3806 return this->socket_xfrm->send_ack(this->socket_xfrm, hdr) == SUCCESS;
3807}
3808
3809CALLBACK(remove_port_bypass, void,
3810 bypass_t *bypass, int idx, void *user)
3811{
3812 private_kernel_netlink_ipsec_t *this = user;
3813 enumerator_t *enumerator;
3814 offload_iface_t *iface;
3815
3816 if (this->port_bypass)
3817 {
3818 manage_bypass(this, XFRM_MSG_DELPOLICY, POLICY_OUT, bypass, NULL);
3819 manage_bypass(this, XFRM_MSG_DELPOLICY, POLICY_IN, bypass, NULL);
3820 }
3821 if (this->offload_interfaces)
3822 {
3823 enumerator = this->offload_interfaces->create_enumerator(this->offload_interfaces);
3824 while (enumerator->enumerate(enumerator, NULL, &iface))
3825 {
3826 if (iface->offload == IFACE_OFFLOAD_PACKET &&
3827 iface->flags & IFF_UP)
3828 {
3829 manage_bypass(this, XFRM_MSG_DELPOLICY, POLICY_OUT, bypass,
3830 iface->ifname);
3831 manage_bypass(this, XFRM_MSG_DELPOLICY, POLICY_IN, bypass,
3832 iface->ifname);
3833 }
3834 }
3835 enumerator->destroy(enumerator);
3836 }
3837}
3838
3839/**
3840 * Bypass socket using a port-based bypass policy, optionally offloaded to a
3841 * given interface
3842 */
3843static bool add_port_bypass(private_kernel_netlink_ipsec_t *this,
3844 bypass_t *bypass, char *ifname)
3845{
3846 if (!manage_bypass(this, XFRM_MSG_NEWPOLICY, POLICY_IN, bypass, ifname))
3847 {
3848 return FALSE;
3849 }
3850 if (!manage_bypass(this, XFRM_MSG_NEWPOLICY, POLICY_OUT, bypass, ifname))
3851 {
3852 manage_bypass(this, XFRM_MSG_DELPOLICY, POLICY_IN, bypass, ifname);
3853 return FALSE;
3854 }
3855 return TRUE;
3856}
3857
3858/**
3859 * Offload the given port-based bypass policy to the given interface if possible.
3860 *
3861 * offload_mutex is assumed to be locked.
3862 */
3863static bool offload_bypass_iface(private_kernel_netlink_ipsec_t *this,
3864 bypass_t *bypass, offload_iface_t *iface)
3865{
3866 if ((iface->offload == IFACE_OFFLOAD_DETECTED ||
3867 iface->offload == IFACE_OFFLOAD_PACKET))
3868 {
3869 if (add_port_bypass(this, bypass, iface->ifname))
3870 {
3871 iface->offload = IFACE_OFFLOAD_PACKET;
3872 return TRUE;
3873 }
3874 else if (iface->offload == IFACE_OFFLOAD_DETECTED)
3875 {
3876 iface->offload = IFACE_OFFLOAD_CRYPTO;
3877 }
3878 }
3879 return FALSE;
3880}
3881
3882/**
3883 * Offload all known port-based bypass policies to the given interface.
3884 *
3885 * offload_mutex is assumed to be locked.
3886 */
3887static void offload_bypasses(private_kernel_netlink_ipsec_t *this,
3888 offload_iface_t *iface)
3889{
3890 enumerator_t *enumerator;
3891 bypass_t *bypass;
3892
3893 enumerator = array_create_enumerator(this->bypass);
3894 while (enumerator->enumerate(enumerator, &bypass))
3895 {
3896 if (!offload_bypass_iface(this, bypass, iface))
3897 { /* could indicate a failure but generally means that the interface
3898 * does not support offloading */
3899 break;
3900 }
3901 }
3902 enumerator->destroy(enumerator);
3903}
3904
3905/**
3906 * Offload a new port-based bypass policy to all known interfaces.
3907 *
3908 * offload_mutex is assumed to be locked.
3909 */
3910static void offload_bypass(private_kernel_netlink_ipsec_t *this,
3911 bypass_t *bypass)
3912{
3913 enumerator_t *enumerator;
3914 offload_iface_t *iface;
3915
3916 enumerator = this->offload_interfaces->create_enumerator(this->offload_interfaces);
3917 while (enumerator->enumerate(enumerator, NULL, &iface))
3918 {
3919 if (iface->flags & IFF_UP)
3920 {
3921 offload_bypass_iface(this, bypass, iface);
3922 }
3923 }
3924 enumerator->destroy(enumerator);
3925}
3926
3927/**
3928 * Offload a bypass policy on supported hardware if the kernel supports it and
3929 * optionally install a port-based bypass policy in software.
3930 */
3931static bool add_and_offload_port_bypass(private_kernel_netlink_ipsec_t *this,
3932 int fd, int family)
3933{
3934 union {
3935 struct sockaddr sa;
3936 struct sockaddr_in in;
3937 struct sockaddr_in6 in6;
3938 } saddr;
3939 socklen_t len;
3940 bypass_t bypass = {
3941 .family = family,
3942 };
3943
3944 len = sizeof(saddr);
3945 if (getsockname(fd, &saddr.sa, &len) != 0)
3946 {
3947 return FALSE;
3948 }
3949#ifdef SO_PROTOCOL /* since 2.6.32 */
3950 len = sizeof(bypass.proto);
3951 if (getsockopt(fd, SOL_SOCKET, SO_PROTOCOL, &bypass.proto, &len) != 0)
3952#endif
3953 { /* assume UDP if SO_PROTOCOL not supported */
3954 bypass.proto = IPPROTO_UDP;
3955 }
3956 switch (family)
3957 {
3958 case AF_INET:
3959 bypass.port = saddr.in.sin_port;
3960 break;
3961 case AF_INET6:
3962 bypass.port = saddr.in6.sin6_port;
3963 break;
3964 default:
3965 return FALSE;
3966 }
3967
3968 if (this->port_bypass &&
3969 !add_port_bypass(this, &bypass, NULL))
3970 {
3971 return FALSE;
3972 }
3973 if (this->offload_interfaces)
3974 {
3975 this->offload_mutex->lock(this->offload_mutex);
3976 offload_bypass(this, &bypass);
3977 /* store it even if no policy was offloaded because an interface that
3978 * supports offloading might get activated later */
3979 array_insert_create_value(&this->bypass, sizeof(bypass_t),
3980 ARRAY_TAIL, &bypass);
3981 this->offload_mutex->unlock(this->offload_mutex);
3982 }
3983 else
3984 {
3985 array_insert_create_value(&this->bypass, sizeof(bypass_t),
3986 ARRAY_TAIL, &bypass);
3987 }
3988 return TRUE;
3989}
3990
3991METHOD(kernel_ipsec_t, bypass_socket, bool,
3992 private_kernel_netlink_ipsec_t *this, int fd, int family)
3993{
3994 if ((this->offload_interfaces || this->port_bypass) &&
3995 !add_and_offload_port_bypass(this, fd, family))
3996 {
3997 return FALSE;
3998 }
3999 return this->port_bypass || add_socket_bypass(this, fd, family);
4000}
4001
4002METHOD(kernel_ipsec_t, enable_udp_decap, bool,
4003 private_kernel_netlink_ipsec_t *this, int fd, int family, uint16_t port)
4004{
4005 int type = UDP_ENCAP_ESPINUDP;
4006
4007 if (setsockopt(fd, SOL_UDP, UDP_ENCAP, &type, sizeof(type)) < 0)
4008 {
4009 DBG1(DBG_KNL, "unable to set UDP_ENCAP: %s", strerror(errno));
4010 return FALSE;
4011 }
4012 type = 1;
4013 if (setsockopt(fd, SOL_UDP, UDP_GRO, &type, sizeof(type)) < 0)
4014 {
4015 DBG1(DBG_KNL, "unable to set UDP_GRO: %s", strerror(errno));
4016 return FALSE;
4017 }
4018 return TRUE;
4019}
4020
4021CALLBACK(receive_link_events, void,
4022 private_kernel_netlink_ipsec_t *this, struct nlmsghdr *hdr)
4023{
4024 struct ifinfomsg *msg = NLMSG_DATA(hdr);
4025 struct rtattr *rta = IFLA_RTA(msg);
4026 size_t rtasize = IFLA_PAYLOAD (hdr);
4027 offload_iface_t *iface = NULL;
4028 char *name = NULL;
4029
4030 if (hdr->nlmsg_type != RTM_NEWLINK &&
4031 hdr->nlmsg_type != RTM_DELLINK)
4032 {
4033 return;
4034 }
4035
4036 while (RTA_OK(rta, rtasize))
4037 {
4038 switch (rta->rta_type)
4039 {
4040 case IFLA_IFNAME:
4041 name = RTA_DATA(rta);
4042 break;
4043 }
4044 rta = RTA_NEXT(rta, rtasize);
4045 }
4046 if (!name)
4047 {
4048 return;
4049 }
4050
4051 this->offload_mutex->lock(this->offload_mutex);
4052 if (hdr->nlmsg_type == RTM_NEWLINK)
4053 {
4054 iface = this->offload_interfaces->get(this->offload_interfaces,
4055 (void*)(uintptr_t)msg->ifi_index);
4056 if (!iface)
4057 {
4058 INIT(iface,
4059 .ifindex = msg->ifi_index
4060 );
4061 this->offload_interfaces->put(this->offload_interfaces,
4062 (void*)(uintptr_t)msg->ifi_index,
4063 iface);
4064 }
4065 /* update name in case an interface is renamed */
4066 strncpy(iface->ifname, name, IFNAMSIZ-1);
4067 iface->ifname[IFNAMSIZ-1] = '\0';
4068
4069 if (iface->offload == IFACE_OFFLOAD_UNKNOWN)
4070 {
4071 if (netlink_detect_offload(iface->ifname))
4072 {
4073 iface->offload = IFACE_OFFLOAD_DETECTED;
4074 }
4075 else
4076 {
4077 iface->offload = IFACE_OFFLOAD_NONE;
4078 }
4079 }
4080
4081 /* if an interface is activated or newly detected, try to offload known
4082 * IKE bypass policies. we don't have to do anything if the interface
4083 * goes down as the kernel automatically removes the state it has for
4084 * offloaded policies */
4085 if (!(iface->flags & IFF_UP) && (msg->ifi_flags & IFF_UP))
4086 {
4087 offload_bypasses(this, iface);
4088 }
4089 iface->flags = msg->ifi_flags;
4090 }
4091 else
4092 {
4093 iface = this->offload_interfaces->remove(this->offload_interfaces,
4094 (void*)(uintptr_t)msg->ifi_index);
4095 free(iface);
4096 }
4097 this->offload_mutex->unlock(this->offload_mutex);
4098}
4099
4100/**
4101 * Enumerate all interfaces and check if they support offloading
4102 */
4103static bool init_offload_interfaces(private_kernel_netlink_ipsec_t *this)
4104{
4105 netlink_buf_t request;
4106 netlink_socket_t *socket;
4107 struct nlmsghdr *out, *current, *in;
4108 struct rtgenmsg *msg;
4109 size_t len;
4110
4111 socket = netlink_socket_create(NETLINK_ROUTE, NULL, FALSE);
4112 if (!socket)
4113 {
4114 return FALSE;
4115 }
4116
4117 memset(&request, 0, sizeof(request));
4118
4119 in = &request.hdr;
4120 in->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
4121 in->nlmsg_type = RTM_GETLINK;
4122 in->nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
4123
4124 msg = NLMSG_DATA(in);
4125 msg->rtgen_family = AF_UNSPEC;
4126
4127 if (socket->send(socket, in, &out, &len) != SUCCESS)
4128 {
4129 socket->destroy(socket);
4130 return FALSE;
4131 }
4132
4133 current = out;
4134 while (NLMSG_OK(current, len))
4135 {
4136 receive_link_events(this, current);
4137 current = NLMSG_NEXT(current, len);
4138 }
4139 free(out);
4140 socket->destroy(socket);
4141 return TRUE;
4142}
4143
4144METHOD(kernel_ipsec_t, destroy, void,
4145 private_kernel_netlink_ipsec_t *this)
4146{
4147 enumerator_t *enumerator;
4148 policy_entry_t *policy;
4149 offload_iface_t *iface;
4150
4151 DESTROY_IF(this->socket_link_events);
4152 DESTROY_IF(this->socket_xfrm_events);
4153 array_destroy_function(this->bypass, remove_port_bypass, this);
4154 if (this->xfrmi)
4155 {
4156 lib->set(lib, KERNEL_NETLINK_XFRMI_MANAGER, NULL);
4157 kernel_netlink_xfrmi_destroy(this->xfrmi);
4158 }
4159 DESTROY_IF(this->socket_xfrm);
4160 enumerator = this->policies->create_enumerator(this->policies);
4161 while (enumerator->enumerate(enumerator, NULL, &policy))
4162 {
4163 policy_entry_destroy(this, policy);
4164 }
4165 enumerator->destroy(enumerator);
4166 this->policies->destroy(this->policies);
4167 this->sas->destroy(this->sas);
4168 if (this->offload_interfaces)
4169 {
4170 enumerator = this->offload_interfaces->create_enumerator(this->offload_interfaces);
4171 while (enumerator->enumerate(enumerator, NULL, &iface))
4172 {
4173 free(iface);
4174 }
4175 enumerator->destroy(enumerator);
4176 this->offload_interfaces->destroy(this->offload_interfaces);
4177 }
4178 this->condvar->destroy(this->condvar);
4179 this->mutex->destroy(this->mutex);
4180 DESTROY_IF(this->offload_mutex);
4181 free(this);
4182}
4183
4184/**
4185 * Get the currently configured SPD hashing thresholds for an address family
4186 */
4187static bool get_spd_hash_thresh(private_kernel_netlink_ipsec_t *this,
4188 int type, uint8_t *lbits, uint8_t *rbits)
4189{
4190 netlink_buf_t request;
4191 struct nlmsghdr *hdr, *out;
4192 struct xfrmu_spdhthresh *thresh;
4193 struct rtattr *rta;
4194 size_t len, rtasize;
4195 bool success = FALSE;
4196
4197 memset(&request, 0, sizeof(request));
4198
4199 hdr = &request.hdr;
4200 hdr->nlmsg_flags = NLM_F_REQUEST;
4201 hdr->nlmsg_type = XFRM_MSG_GETSPDINFO;
4202 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(uint32_t));
4203
4204 if (this->socket_xfrm->send(this->socket_xfrm, hdr, &out, &len) == SUCCESS)
4205 {
4206 hdr = out;
4207 while (NLMSG_OK(hdr, len))
4208 {
4209 switch (hdr->nlmsg_type)
4210 {
4211 case XFRM_MSG_NEWSPDINFO:
4212 {
4213 rta = XFRM_RTA(hdr, uint32_t);
4214 rtasize = XFRM_PAYLOAD(hdr, uint32_t);
4215 while (RTA_OK(rta, rtasize))
4216 {
4217 if (rta->rta_type == type &&
4218 RTA_PAYLOAD(rta) == sizeof(*thresh))
4219 {
4220 thresh = RTA_DATA(rta);
4221 *lbits = thresh->lbits;
4222 *rbits = thresh->rbits;
4223 success = TRUE;
4224 break;
4225 }
4226 rta = RTA_NEXT(rta, rtasize);
4227 }
4228 break;
4229 }
4230 case NLMSG_ERROR:
4231 {
4232 netlink_log_error(hdr, "getting SPD hash threshold failed");
4233 break;
4234 }
4235 default:
4236 hdr = NLMSG_NEXT(hdr, len);
4237 continue;
4238 case NLMSG_DONE:
4239 break;
4240 }
4241 break;
4242 }
4243 free(out);
4244 }
4245 return success;
4246}
4247
4248/**
4249 * Configure SPD hashing threshold for an address family
4250 */
4251static void setup_spd_hash_thresh(private_kernel_netlink_ipsec_t *this,
4252 char *key, int type, uint8_t def)
4253{
4254 struct xfrmu_spdhthresh *thresh;
4255 struct nlmsghdr *hdr;
4256 netlink_buf_t request;
4257 uint8_t lbits, rbits;
4258
4259 if (!get_spd_hash_thresh(this, type, &lbits, &rbits))
4260 {
4261 return;
4262 }
4263 memset(&request, 0, sizeof(request));
4264
4265 hdr = &request.hdr;
4266 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4267 hdr->nlmsg_type = XFRM_MSG_NEWSPDINFO;
4268 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(uint32_t));
4269
4270 thresh = netlink_reserve(hdr, sizeof(request), type, sizeof(*thresh));
4271 thresh->lbits = lib->settings->get_int(lib->settings,
4272 "%s.plugins.kernel-netlink.spdh_thresh.%s.lbits",
4273 def, lib->ns, key);
4274 thresh->rbits = lib->settings->get_int(lib->settings,
4275 "%s.plugins.kernel-netlink.spdh_thresh.%s.rbits",
4276 def, lib->ns, key);
4277 if (thresh->lbits != lbits || thresh->rbits != rbits)
4278 {
4279 if (this->socket_xfrm->send_ack(this->socket_xfrm, hdr) != SUCCESS)
4280 {
4281 DBG1(DBG_KNL, "setting SPD hash threshold failed");
4282 }
4283 }
4284}
4285
4286/**
4287 * Check for kernel features (currently only via version number)
4288 */
4289static void check_kernel_features(private_kernel_netlink_ipsec_t *this)
4290{
4291 struct utsname utsname;
4292 int a, b, c;
4293
4294 if (uname(&utsname) == 0)
4295 {
4296 switch(sscanf(utsname.release, "%d.%d.%d", &a, &b, &c))
4297 {
4298 case 2:
4299 case 3:
4300 /* before 6.2 the kernel only provided the last used time for
4301 * specific outbound IPv6 SAs */
4302 this->sa_lastused = a > 6 || (a == 6 && b >= 2);
4303 /* 6.10 added support for SA direction and enforces certain
4304 * flags e.g. 0 replay window for outbound SAs */
4305 this->sa_dir = a > 6 || (a == 6 && b >= 10);
4306 break;
4307 default:
4308 break;
4309 }
4310 }
4311}
4312
4313/*
4314 * Described in header.
4315 */
4316kernel_netlink_ipsec_t *kernel_netlink_ipsec_create()
4317{
4318 private_kernel_netlink_ipsec_t *this;
4319 uint32_t groups;
4320
4321 INIT(this,
4322 .public = {
4323 .interface = {
4324 .get_features = _get_features,
4325 .get_spi = _get_spi,
4326 .get_cpi = _get_cpi,
4327 .add_sa = _add_sa,
4328 .update_sa = _update_sa,
4329 .query_sa = _query_sa,
4330 .del_sa = _del_sa,
4331 .flush_sas = _flush_sas,
4332 .add_policy = _add_policy,
4333 .query_policy = _query_policy,
4334 .del_policy = _del_policy,
4335 .flush_policies = _flush_policies,
4336 .bypass_socket = _bypass_socket,
4337 .enable_udp_decap = _enable_udp_decap,
4338 .destroy = _destroy,
4339 },
4340 },
4341 .policies = hashtable_create((hashtable_hash_t)policy_hash,
4342 (hashtable_equals_t)policy_equals, 32),
4343 .sas = hashtable_create((hashtable_hash_t)ipsec_sa_hash,
4344 (hashtable_equals_t)ipsec_sa_equals, 32),
4345 .mutex = mutex_create(MUTEX_TYPE_DEFAULT),
4346 .condvar = condvar_create(CONDVAR_TYPE_DEFAULT),
4347 .get_priority = dlsym(RTLD_DEFAULT,
4348 "kernel_netlink_get_priority_custom"),
4349 .policy_update = lib->settings->get_bool(lib->settings,
4350 "%s.plugins.kernel-netlink.policy_update",
4351 FALSE, lib->ns),
4352 .install_routes = lib->settings->get_bool(lib->settings,
4353 "%s.install_routes", TRUE, lib->ns),
4354 .install_routes_xfrmi = lib->settings->get_bool(lib->settings,
4355 "%s.plugins.kernel-netlink.install_routes_xfrmi",
4356 FALSE, lib->ns),
4357 .proto_port_transport = lib->settings->get_bool(lib->settings,
4358 "%s.plugins.kernel-netlink.set_proto_port_transport_sa",
4359 FALSE, lib->ns),
4360 .port_bypass = lib->settings->get_bool(lib->settings,
4361 "%s.plugins.kernel-netlink.port_bypass", FALSE, lib->ns),
4362 );
4363
4364 check_kernel_features(this);
4365
4366 this->socket_xfrm = netlink_socket_create(NETLINK_XFRM, xfrm_msg_names,
4367 lib->settings->get_bool(lib->settings,
4368 "%s.plugins.kernel-netlink.parallel_xfrm", FALSE, lib->ns));
4369 if (!this->socket_xfrm)
4370 {
4371 destroy(this);
4372 return NULL;
4373 }
4374
4375 setup_spd_hash_thresh(this, "ipv4", XFRMA_SPD_IPV4_HTHRESH, 32);
4376 setup_spd_hash_thresh(this, "ipv6", XFRMA_SPD_IPV6_HTHRESH, 128);
4377
4378 groups = nl_group(XFRMNLGRP_ACQUIRE) | nl_group(XFRMNLGRP_EXPIRE) |
4379 nl_group(XFRMNLGRP_MIGRATE) | nl_group(XFRMNLGRP_MAPPING);
4380 this->socket_xfrm_events = netlink_event_socket_create(NETLINK_XFRM, groups,
4381 receive_events, this);
4382 if (!this->socket_xfrm_events)
4383 {
4384 destroy(this);
4385 return NULL;
4386 }
4387
4388 if (netlink_find_offload_feature(lib->settings->get_str(lib->settings,
4389 "%s.plugins.kernel-netlink.hw_offload_feature_interface",
4390 "lo", lib->ns)))
4391 {
4392 this->offload_interfaces = hashtable_create(hashtable_hash_ptr,
4393 hashtable_equals_ptr, 8);
4394 this->offload_mutex = mutex_create(MUTEX_TYPE_DEFAULT);
4395 this->socket_link_events = netlink_event_socket_create(NETLINK_ROUTE,
4396 nl_group(RTNLGRP_LINK),
4397 receive_link_events, this);
4398 if (!this->socket_link_events ||
4399 !init_offload_interfaces(this))
4400 {
4401 destroy(this);
4402 return NULL;
4403 }
4404 }
4405
4406 this->xfrmi = kernel_netlink_xfrmi_create(TRUE);
4407 if (this->xfrmi)
4408 {
4409 lib->set(lib, KERNEL_NETLINK_XFRMI_MANAGER, this->xfrmi);
4410 }
4411 return &this->public;
4412}