2 * Copyright (C) 2014 Martin Willi
3 * Copyright (C) 2008-2020 Tobias Brunner
5 * Copyright (C) secunet Security Networks AG
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2 of the License, or (at your
10 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 * Copyright (C) 2016 Thomas Egerer
21 * Permission is hereby granted, free of charge, to any person obtaining a copy
22 * of this software and associated documentation files (the "Software"), to deal
23 * in the Software without restriction, including without limitation the rights
24 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25 * copies of the Software, and to permit persons to whom the Software is
26 * furnished to do so, subject to the following conditions:
28 * The above copyright notice and this permission notice shall be included in
29 * all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
40 #include <sys/socket.h>
41 #include <linux/netlink.h>
42 #include <linux/rtnetlink.h>
43 #include <linux/xfrm.h>
47 #include "kernel_netlink_shared.h"
49 #include <utils/debug.h>
50 #include <threading/mutex.h>
51 #include <threading/condvar.h>
52 #include <collections/array.h>
53 #include <collections/hashtable.h>
55 typedef struct private_netlink_socket_t private_netlink_socket_t
;
58 * Private variables and functions of netlink_socket_t class.
60 struct private_netlink_socket_t
{
63 * public part of the netlink_socket_t object.
65 netlink_socket_t
public;
68 * mutex to lock access entries
73 * Netlink request entries currently active, uintptr_t seq => entry_t
78 * Current sequence number for Netlink requests
93 * Enum names for Netlink messages
98 * Timeout for Netlink replies, in ms
103 * Number of times to repeat timed out queries
108 * Buffer size for received Netlink messages
113 * Use parallel netlink queries
118 * Ignore errors potentially resulting from a retransmission
120 bool ignore_retransmit_errors
;
124 * #definable hook to simulate request message loss
126 #ifdef NETLINK_MSG_LOSS_HOOK
127 bool NETLINK_MSG_LOSS_HOOK(struct nlmsghdr
*msg
);
128 #define msg_loss_hook(msg) NETLINK_MSG_LOSS_HOOK(msg)
130 #define msg_loss_hook(msg) FALSE
134 * Request entry the answer for a waiting thread is collected in
137 /** Condition variable thread is waiting */
139 /** Array of hdrs in a multi-message response, as struct nlmsghdr* */
141 /** All response messages received? */
146 * Clean up a thread waiting entry
148 static void destroy_entry(entry_t
*entry
)
150 entry
->condvar
->destroy(entry
->condvar
);
151 array_destroy_function(entry
->hdrs
, (void*)free
, NULL
);
156 * Write a Netlink message to socket
158 static bool write_msg(private_netlink_socket_t
*this, struct nlmsghdr
*msg
)
160 struct sockaddr_nl addr
= {
161 .nl_family
= AF_NETLINK
,
165 if (msg_loss_hook(msg
))
172 len
= sendto(this->socket
, msg
, msg
->nlmsg_len
, 0,
173 (struct sockaddr
*)&addr
, sizeof(addr
));
174 if (len
!= msg
->nlmsg_len
)
180 DBG1(DBG_KNL
, "netlink write error: %s", strerror(errno
));
188 * Read a single Netlink message from socket, return 0 on error, -1 on timeout
190 static ssize_t
read_msg(private_netlink_socket_t
*this,
191 char *buf
, size_t buflen
, bool block
)
201 FD_SET(this->socket
, &set
);
202 timeval_add_ms(&tv
, this->timeout
);
204 if (select(this->socket
+ 1, &set
, NULL
, NULL
,
205 this->timeout
? &tv
: NULL
) <= 0)
210 len
= recv(this->socket
, buf
, buflen
, MSG_TRUNC
|(block
? 0 : MSG_DONTWAIT
));
213 DBG1(DBG_KNL
, "netlink response exceeds buffer size");
218 if (errno
!= EAGAIN
&& errno
!= EWOULDBLOCK
&& errno
!= EINTR
)
220 DBG1(DBG_KNL
, "netlink read error: %s", strerror(errno
));
228 * Queue received response message
230 static bool queue(private_netlink_socket_t
*this, struct nlmsghdr
*buf
)
232 struct nlmsghdr
*hdr
;
236 seq
= (uintptr_t)buf
->nlmsg_seq
;
238 this->mutex
->lock(this->mutex
);
239 entry
= this->entries
->get(this->entries
, (void*)seq
);
242 hdr
= malloc(buf
->nlmsg_len
);
243 memcpy(hdr
, buf
, buf
->nlmsg_len
);
244 array_insert(entry
->hdrs
, ARRAY_TAIL
, hdr
);
245 if (hdr
->nlmsg_type
== NLMSG_DONE
|| !(hdr
->nlmsg_flags
& NLM_F_MULTI
))
247 entry
->complete
= TRUE
;
248 entry
->condvar
->signal(entry
->condvar
);
253 DBG1(DBG_KNL
, "received unknown netlink seq %u, ignored", seq
);
255 this->mutex
->unlock(this->mutex
);
257 return entry
!= NULL
;
261 * Read and queue response message, optionally blocking, returns TRUE on timeout
263 static bool read_and_queue(private_netlink_socket_t
*this, bool block
)
265 struct nlmsghdr
*hdr
;
266 char buf
[this->buflen
];
267 ssize_t len
, read_len
;
270 len
= read_len
= read_msg(this, buf
, sizeof(buf
), block
);
277 hdr
= (struct nlmsghdr
*)buf
;
278 while (NLMSG_OK(hdr
, len
))
280 if (this->protocol
== NETLINK_XFRM
&&
281 hdr
->nlmsg_type
== XFRM_MSG_NEWSA
)
282 { /* wipe potential IPsec SA keys */
285 if (!queue(this, hdr
))
289 hdr
= NLMSG_NEXT(hdr
, len
);
294 memwipe(buf
, read_len
);
299 CALLBACK(watch
, bool,
300 private_netlink_socket_t
*this, int fd
, watcher_event_t event
)
302 if (event
== WATCHER_READ
)
304 read_and_queue(this, FALSE
);
310 * Send a netlink request, try once
312 static status_t
send_once(private_netlink_socket_t
*this, struct nlmsghdr
*in
,
313 uintptr_t seq
, struct nlmsghdr
**out
, size_t *out_len
)
315 struct nlmsghdr
*hdr
;
321 in
->nlmsg_pid
= getpid();
325 DBG3(DBG_KNL
, "sending %N %u: %b", this->names
, in
->nlmsg_type
,
326 (u_int
)seq
, in
, in
->nlmsg_len
);
329 this->mutex
->lock(this->mutex
);
330 if (!write_msg(this, in
))
332 this->mutex
->unlock(this->mutex
);
337 .condvar
= condvar_create(CONDVAR_TYPE_DEFAULT
),
338 .hdrs
= array_create(0, 0),
340 this->entries
->put(this->entries
, (void*)seq
, entry
);
342 while (!entry
->complete
)
344 if (this->parallel
&&
345 lib
->watcher
->get_state(lib
->watcher
) != WATCHER_STOPPED
&&
346 lib
->processor
->get_total_threads(lib
->processor
))
350 if (entry
->condvar
->timed_wait(entry
->condvar
, this->mutex
,
358 entry
->condvar
->wait(entry
->condvar
, this->mutex
);
362 { /* During (de-)initialization, no watcher thread is active.
363 * collect responses ourselves. */
364 if (read_and_queue(this, TRUE
))
370 this->entries
->remove(this->entries
, (void*)seq
);
372 this->mutex
->unlock(this->mutex
);
374 if (!entry
->complete
)
376 destroy_entry(entry
);
380 for (i
= 0, *out_len
= 0; i
< array_count(entry
->hdrs
); i
++)
382 array_get(entry
->hdrs
, i
, &hdr
);
383 *out_len
+= NLMSG_ALIGN(hdr
->nlmsg_len
);
385 ptr
= malloc(*out_len
);
386 *out
= (struct nlmsghdr
*)ptr
;
388 while (array_remove(entry
->hdrs
, ARRAY_HEAD
, &hdr
))
392 DBG3(DBG_KNL
, "received %N %u: %b", this->names
, hdr
->nlmsg_type
,
393 hdr
->nlmsg_seq
, hdr
, hdr
->nlmsg_len
);
395 memcpy(ptr
, hdr
, hdr
->nlmsg_len
);
396 ptr
+= NLMSG_ALIGN(hdr
->nlmsg_len
);
399 destroy_entry(entry
);
404 * Ignore errors for message types that might have completed previously
406 static void ignore_retransmit_error(private_netlink_socket_t
*this,
407 struct nlmsgerr
*err
, int type
)
412 switch (this->protocol
)
417 case XFRM_MSG_NEWPOLICY
:
438 switch (this->protocol
)
443 case XFRM_MSG_DELPOLICY
:
466 METHOD(netlink_socket_t
, netlink_send
, status_t
,
467 private_netlink_socket_t
*this, struct nlmsghdr
*in
, struct nlmsghdr
**out
,
473 seq
= ref_get(&this->seq
);
475 for (try = 0; try <= this->retries
; ++try)
477 struct nlmsghdr
*hdr
;
483 DBG1(DBG_KNL
, "retransmitting Netlink request (%u/%u)",
486 status
= send_once(this, in
, seq
, &hdr
, &len
);
496 if (hdr
->nlmsg_type
== NLMSG_ERROR
)
498 struct nlmsgerr
* err
;
500 err
= NLMSG_DATA(hdr
);
501 if (err
->error
== -EBUSY
)
507 if (this->ignore_retransmit_errors
&& try > 0)
509 ignore_retransmit_error(this, err
, in
->nlmsg_type
);
516 DBG1(DBG_KNL
, "Netlink request timed out after %u retransmits",
521 METHOD(netlink_socket_t
, netlink_send_ack
, status_t
,
522 private_netlink_socket_t
*this, struct nlmsghdr
*in
)
524 struct nlmsghdr
*out
, *hdr
;
527 if (netlink_send(this, in
, &out
, &len
) != SUCCESS
)
532 while (NLMSG_OK(hdr
, len
))
534 switch (hdr
->nlmsg_type
)
538 struct nlmsgerr
* err
= NLMSG_DATA(hdr
);
542 if (-err
->error
== EEXIST
)
543 { /* do not report existing routes */
547 if (-err
->error
== ESRCH
)
548 { /* do not report missing entries */
552 DBG1(DBG_KNL
, "received netlink error: %s (%d)",
553 strerror(-err
->error
), -err
->error
);
561 hdr
= NLMSG_NEXT(hdr
, len
);
568 DBG1(DBG_KNL
, "netlink request not acknowledged");
573 METHOD(netlink_socket_t
, destroy
, void,
574 private_netlink_socket_t
*this)
576 if (this->socket
!= -1)
580 lib
->watcher
->remove(lib
->watcher
, this->socket
);
584 this->entries
->destroy(this->entries
);
585 this->mutex
->destroy(this->mutex
);
590 * Described in header
592 u_int
netlink_get_buflen()
596 buflen
= lib
->settings
->get_int(lib
->settings
,
597 "%s.plugins.kernel-netlink.buflen", 0, lib
->ns
);
600 long pagesize
= sysconf(_SC_PAGESIZE
);
606 /* base this on NLMSG_GOODSIZE */
607 buflen
= min(pagesize
, 8192);
613 * Described in header
615 netlink_socket_t
*netlink_socket_create(int protocol
, enum_name_t
*names
,
618 private_netlink_socket_t
*this;
619 struct sockaddr_nl addr
= {
620 .nl_family
= AF_NETLINK
,
622 bool force_buf
= FALSE
;
627 .send
= _netlink_send
,
628 .send_ack
= _netlink_send_ack
,
632 .mutex
= mutex_create(MUTEX_TYPE_RECURSIVE
),
633 .socket
= socket(AF_NETLINK
, SOCK_RAW
, protocol
),
634 .entries
= hashtable_create(hashtable_hash_ptr
, hashtable_equals_ptr
, 4),
635 .protocol
= protocol
,
637 .buflen
= netlink_get_buflen(),
638 .timeout
= lib
->settings
->get_int(lib
->settings
,
639 "%s.plugins.kernel-netlink.timeout", 0, lib
->ns
),
640 .retries
= lib
->settings
->get_int(lib
->settings
,
641 "%s.plugins.kernel-netlink.retries", 0, lib
->ns
),
642 .ignore_retransmit_errors
= lib
->settings
->get_bool(lib
->settings
,
643 "%s.plugins.kernel-netlink.ignore_retransmit_errors",
645 .parallel
= parallel
,
648 if (this->socket
== -1)
650 DBG1(DBG_KNL
, "unable to create netlink socket: %s (%d)",
651 strerror(errno
), errno
);
655 if (bind(this->socket
, (struct sockaddr
*)&addr
, sizeof(addr
)))
657 DBG1(DBG_KNL
, "unable to bind netlink socket: %s (%d)",
658 strerror(errno
), errno
);
662 rcvbuf_size
= lib
->settings
->get_int(lib
->settings
,
663 "%s.plugins.kernel-netlink.receive_buffer_size",
664 rcvbuf_size
, lib
->ns
);
669 force_buf
= lib
->settings
->get_bool(lib
->settings
,
670 "%s.plugins.kernel-netlink.force_receive_buffer_size",
672 optname
= force_buf
? SO_RCVBUFFORCE
: SO_RCVBUF
;
674 if (setsockopt(this->socket
, SOL_SOCKET
, optname
, &rcvbuf_size
,
675 sizeof(rcvbuf_size
)) == -1)
677 DBG1(DBG_KNL
, "failed to %supdate receive buffer size to %d: %s",
678 force_buf
? "forcibly " : "", rcvbuf_size
, strerror(errno
));
683 lib
->watcher
->add(lib
->watcher
, this->socket
, WATCHER_READ
, watch
, this);
686 return &this->public;
690 * Described in header
692 void netlink_add_attribute(struct nlmsghdr
*hdr
, int rta_type
, chunk_t data
,
697 if (NLMSG_ALIGN(hdr
->nlmsg_len
) + RTA_LENGTH(data
.len
) > buflen
)
699 DBG1(DBG_KNL
, "unable to add attribute, buffer too small");
703 rta
= (struct rtattr
*)(((char*)hdr
) + NLMSG_ALIGN(hdr
->nlmsg_len
));
704 rta
->rta_type
= rta_type
;
705 rta
->rta_len
= RTA_LENGTH(data
.len
);
706 memcpy(RTA_DATA(rta
), data
.ptr
, data
.len
);
707 hdr
->nlmsg_len
= NLMSG_ALIGN(hdr
->nlmsg_len
) + RTA_ALIGN(rta
->rta_len
);
711 * Add an attribute to the given Netlink message
713 static struct rtattr
*add_rtattr(struct nlmsghdr
*hdr
, int buflen
, int type
,
718 if (NLMSG_ALIGN(hdr
->nlmsg_len
) + RTA_LENGTH(len
) > buflen
)
720 DBG1(DBG_KNL
, "unable to add attribute, buffer too small");
724 rta
= ((void*)hdr
) + NLMSG_ALIGN(hdr
->nlmsg_len
);
725 rta
->rta_type
= type
;
726 rta
->rta_len
= RTA_LENGTH(len
);
727 hdr
->nlmsg_len
= NLMSG_ALIGN(hdr
->nlmsg_len
) + RTA_ALIGN(rta
->rta_len
);
732 * Described in header
734 void *netlink_nested_start(struct nlmsghdr
*hdr
, size_t buflen
, int type
)
736 return add_rtattr(hdr
, buflen
, type
, 0);
740 * Described in header
742 void netlink_nested_end(struct nlmsghdr
*hdr
, void *attr
)
744 struct rtattr
*rta
= attr
;
749 end
= (char*)hdr
+ NLMSG_ALIGN(hdr
->nlmsg_len
);
750 rta
->rta_len
= end
- attr
;
755 * Described in header
757 void *netlink_reserve(struct nlmsghdr
*hdr
, int buflen
, int type
, int len
)
761 rta
= add_rtattr(hdr
, buflen
, type
, len
);
766 return RTA_DATA(rta
);
770 * Described in header
772 void route_entry_destroy(route_entry_t
*this)
775 DESTROY_IF(this->src_ip
);
776 DESTROY_IF(this->gateway
);
777 chunk_free(&this->dst_net
);
782 * Described in header
784 route_entry_t
*route_entry_clone(const route_entry_t
*this)
786 route_entry_t
*route
;
789 .if_name
= strdupnull(this->if_name
),
790 .src_ip
= this->src_ip
? this->src_ip
->clone(this->src_ip
) : NULL
,
791 .gateway
= this->gateway
? this->gateway
->clone(this->gateway
) : NULL
,
792 .dst_net
= chunk_clone(this->dst_net
),
793 .prefixlen
= this->prefixlen
,
800 * Described in header
802 u_int
route_entry_hash(const route_entry_t
*this)
804 return chunk_hash_inc(chunk_from_thing(this->prefixlen
),
805 chunk_hash(this->dst_net
));
809 * Compare two IP addresses, also accept it if both are NULL
811 static bool addrs_null_or_equal(host_t
*a
, host_t
*b
)
813 return (!a
&& !b
) || (a
&& b
&& a
->ip_equals(a
, b
));
817 * Described in header
819 bool route_entry_equals(const route_entry_t
*a
, const route_entry_t
*b
)
821 return streq(a
->if_name
, b
->if_name
) &&
822 a
->pass
== b
->pass
&&
823 a
->prefixlen
== b
->prefixlen
&&
824 chunk_equals(a
->dst_net
, b
->dst_net
) &&
825 addrs_null_or_equal(a
->src_ip
, b
->src_ip
) &&
826 addrs_null_or_equal(a
->gateway
, b
->gateway
);