2 * Copyright (C) 2014 Martin Willi
3 * Copyright (C) 2014 revosec AG
5 * Copyright (C) 2008-2020 Tobias Brunner
6 * HSR Hochschule fuer Technik Rapperswil
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2 of the License, or (at your
11 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 * Copyright (C) 2016 secunet Security Networks AG
21 * Copyright (C) 2016 Thomas Egerer
23 * Permission is hereby granted, free of charge, to any person obtaining a copy
24 * of this software and associated documentation files (the "Software"), to deal
25 * in the Software without restriction, including without limitation the rights
26 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
27 * copies of the Software, and to permit persons to whom the Software is
28 * furnished to do so, subject to the following conditions:
30 * The above copyright notice and this permission notice shall be included in
31 * all copies or substantial portions of the Software.
33 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
36 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
37 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
38 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
42 #include <sys/socket.h>
43 #include <linux/netlink.h>
44 #include <linux/rtnetlink.h>
45 #include <linux/xfrm.h>
49 #include "kernel_netlink_shared.h"
51 #include <utils/debug.h>
52 #include <threading/mutex.h>
53 #include <threading/condvar.h>
54 #include <collections/array.h>
55 #include <collections/hashtable.h>
57 typedef struct private_netlink_socket_t private_netlink_socket_t
;
60 * Private variables and functions of netlink_socket_t class.
62 struct private_netlink_socket_t
{
65 * public part of the netlink_socket_t object.
67 netlink_socket_t
public;
70 * mutex to lock access entries
75 * Netlink request entries currently active, uintptr_t seq => entry_t
80 * Current sequence number for Netlink requests
95 * Enum names for Netlink messages
100 * Timeout for Netlink replies, in ms
105 * Number of times to repeat timed out queries
110 * Buffer size for received Netlink messages
115 * Use parallel netlink queries
120 * Ignore errors potentially resulting from a retransmission
122 bool ignore_retransmit_errors
;
126 * #definable hook to simulate request message loss
128 #ifdef NETLINK_MSG_LOSS_HOOK
129 bool NETLINK_MSG_LOSS_HOOK(struct nlmsghdr
*msg
);
130 #define msg_loss_hook(msg) NETLINK_MSG_LOSS_HOOK(msg)
132 #define msg_loss_hook(msg) FALSE
136 * Request entry the answer for a waiting thread is collected in
139 /** Condition variable thread is waiting */
141 /** Array of hdrs in a multi-message response, as struct nlmsghdr* */
143 /** All response messages received? */
148 * Clean up a thread waiting entry
150 static void destroy_entry(entry_t
*entry
)
152 entry
->condvar
->destroy(entry
->condvar
);
153 array_destroy_function(entry
->hdrs
, (void*)free
, NULL
);
158 * Write a Netlink message to socket
160 static bool write_msg(private_netlink_socket_t
*this, struct nlmsghdr
*msg
)
162 struct sockaddr_nl addr
= {
163 .nl_family
= AF_NETLINK
,
167 if (msg_loss_hook(msg
))
174 len
= sendto(this->socket
, msg
, msg
->nlmsg_len
, 0,
175 (struct sockaddr
*)&addr
, sizeof(addr
));
176 if (len
!= msg
->nlmsg_len
)
182 DBG1(DBG_KNL
, "netlink write error: %s", strerror(errno
));
190 * Read a single Netlink message from socket, return 0 on error, -1 on timeout
192 static ssize_t
read_msg(private_netlink_socket_t
*this,
193 char *buf
, size_t buflen
, bool block
)
203 FD_SET(this->socket
, &set
);
204 timeval_add_ms(&tv
, this->timeout
);
206 if (select(this->socket
+ 1, &set
, NULL
, NULL
,
207 this->timeout
? &tv
: NULL
) <= 0)
212 len
= recv(this->socket
, buf
, buflen
, MSG_TRUNC
|(block
? 0 : MSG_DONTWAIT
));
215 DBG1(DBG_KNL
, "netlink response exceeds buffer size");
220 if (errno
!= EAGAIN
&& errno
!= EWOULDBLOCK
&& errno
!= EINTR
)
222 DBG1(DBG_KNL
, "netlink read error: %s", strerror(errno
));
230 * Queue received response message
232 static bool queue(private_netlink_socket_t
*this, struct nlmsghdr
*buf
)
234 struct nlmsghdr
*hdr
;
238 seq
= (uintptr_t)buf
->nlmsg_seq
;
240 this->mutex
->lock(this->mutex
);
241 entry
= this->entries
->get(this->entries
, (void*)seq
);
244 hdr
= malloc(buf
->nlmsg_len
);
245 memcpy(hdr
, buf
, buf
->nlmsg_len
);
246 array_insert(entry
->hdrs
, ARRAY_TAIL
, hdr
);
247 if (hdr
->nlmsg_type
== NLMSG_DONE
|| !(hdr
->nlmsg_flags
& NLM_F_MULTI
))
249 entry
->complete
= TRUE
;
250 entry
->condvar
->signal(entry
->condvar
);
255 DBG1(DBG_KNL
, "received unknown netlink seq %u, ignored", seq
);
257 this->mutex
->unlock(this->mutex
);
259 return entry
!= NULL
;
263 * Read and queue response message, optionally blocking, returns TRUE on timeout
265 static bool read_and_queue(private_netlink_socket_t
*this, bool block
)
267 struct nlmsghdr
*hdr
;
268 char buf
[this->buflen
];
269 ssize_t len
, read_len
;
272 len
= read_len
= read_msg(this, buf
, sizeof(buf
), block
);
279 hdr
= (struct nlmsghdr
*)buf
;
280 while (NLMSG_OK(hdr
, len
))
282 if (this->protocol
== NETLINK_XFRM
&&
283 hdr
->nlmsg_type
== XFRM_MSG_NEWSA
)
284 { /* wipe potential IPsec SA keys */
287 if (!queue(this, hdr
))
291 hdr
= NLMSG_NEXT(hdr
, len
);
296 memwipe(buf
, read_len
);
301 CALLBACK(watch
, bool,
302 private_netlink_socket_t
*this, int fd
, watcher_event_t event
)
304 if (event
== WATCHER_READ
)
306 read_and_queue(this, FALSE
);
312 * Send a netlink request, try once
314 static status_t
send_once(private_netlink_socket_t
*this, struct nlmsghdr
*in
,
315 uintptr_t seq
, struct nlmsghdr
**out
, size_t *out_len
)
317 struct nlmsghdr
*hdr
;
323 in
->nlmsg_pid
= getpid();
327 DBG3(DBG_KNL
, "sending %N %u: %b", this->names
, in
->nlmsg_type
,
328 (u_int
)seq
, in
, in
->nlmsg_len
);
331 this->mutex
->lock(this->mutex
);
332 if (!write_msg(this, in
))
334 this->mutex
->unlock(this->mutex
);
339 .condvar
= condvar_create(CONDVAR_TYPE_DEFAULT
),
340 .hdrs
= array_create(0, 0),
342 this->entries
->put(this->entries
, (void*)seq
, entry
);
344 while (!entry
->complete
)
346 if (this->parallel
&&
347 lib
->watcher
->get_state(lib
->watcher
) != WATCHER_STOPPED
&&
348 lib
->processor
->get_total_threads(lib
->processor
))
352 if (entry
->condvar
->timed_wait(entry
->condvar
, this->mutex
,
360 entry
->condvar
->wait(entry
->condvar
, this->mutex
);
364 { /* During (de-)initialization, no watcher thread is active.
365 * collect responses ourselves. */
366 if (read_and_queue(this, TRUE
))
372 this->entries
->remove(this->entries
, (void*)seq
);
374 this->mutex
->unlock(this->mutex
);
376 if (!entry
->complete
)
378 destroy_entry(entry
);
382 for (i
= 0, *out_len
= 0; i
< array_count(entry
->hdrs
); i
++)
384 array_get(entry
->hdrs
, i
, &hdr
);
385 *out_len
+= NLMSG_ALIGN(hdr
->nlmsg_len
);
387 ptr
= malloc(*out_len
);
388 *out
= (struct nlmsghdr
*)ptr
;
390 while (array_remove(entry
->hdrs
, ARRAY_HEAD
, &hdr
))
394 DBG3(DBG_KNL
, "received %N %u: %b", this->names
, hdr
->nlmsg_type
,
395 hdr
->nlmsg_seq
, hdr
, hdr
->nlmsg_len
);
397 memcpy(ptr
, hdr
, hdr
->nlmsg_len
);
398 ptr
+= NLMSG_ALIGN(hdr
->nlmsg_len
);
401 destroy_entry(entry
);
406 * Ignore errors for message types that might have completed previously
408 static void ignore_retransmit_error(private_netlink_socket_t
*this,
409 struct nlmsgerr
*err
, int type
)
414 switch (this->protocol
)
419 case XFRM_MSG_NEWPOLICY
:
440 switch (this->protocol
)
445 case XFRM_MSG_DELPOLICY
:
468 METHOD(netlink_socket_t
, netlink_send
, status_t
,
469 private_netlink_socket_t
*this, struct nlmsghdr
*in
, struct nlmsghdr
**out
,
475 seq
= ref_get(&this->seq
);
477 for (try = 0; try <= this->retries
; ++try)
479 struct nlmsghdr
*hdr
;
485 DBG1(DBG_KNL
, "retransmitting Netlink request (%u/%u)",
488 status
= send_once(this, in
, seq
, &hdr
, &len
);
498 if (hdr
->nlmsg_type
== NLMSG_ERROR
)
500 struct nlmsgerr
* err
;
502 err
= NLMSG_DATA(hdr
);
503 if (err
->error
== -EBUSY
)
509 if (this->ignore_retransmit_errors
&& try > 0)
511 ignore_retransmit_error(this, err
, in
->nlmsg_type
);
518 DBG1(DBG_KNL
, "Netlink request timed out after %u retransmits",
523 METHOD(netlink_socket_t
, netlink_send_ack
, status_t
,
524 private_netlink_socket_t
*this, struct nlmsghdr
*in
)
526 struct nlmsghdr
*out
, *hdr
;
529 if (netlink_send(this, in
, &out
, &len
) != SUCCESS
)
534 while (NLMSG_OK(hdr
, len
))
536 switch (hdr
->nlmsg_type
)
540 struct nlmsgerr
* err
= NLMSG_DATA(hdr
);
544 if (-err
->error
== EEXIST
)
545 { /* do not report existing routes */
549 if (-err
->error
== ESRCH
)
550 { /* do not report missing entries */
554 DBG1(DBG_KNL
, "received netlink error: %s (%d)",
555 strerror(-err
->error
), -err
->error
);
563 hdr
= NLMSG_NEXT(hdr
, len
);
570 DBG1(DBG_KNL
, "netlink request not acknowledged");
575 METHOD(netlink_socket_t
, destroy
, void,
576 private_netlink_socket_t
*this)
578 if (this->socket
!= -1)
582 lib
->watcher
->remove(lib
->watcher
, this->socket
);
586 this->entries
->destroy(this->entries
);
587 this->mutex
->destroy(this->mutex
);
592 * Described in header
594 u_int
netlink_get_buflen()
598 buflen
= lib
->settings
->get_int(lib
->settings
,
599 "%s.plugins.kernel-netlink.buflen", 0, lib
->ns
);
602 long pagesize
= sysconf(_SC_PAGESIZE
);
608 /* base this on NLMSG_GOODSIZE */
609 buflen
= min(pagesize
, 8192);
615 * Described in header
617 netlink_socket_t
*netlink_socket_create(int protocol
, enum_name_t
*names
,
620 private_netlink_socket_t
*this;
621 struct sockaddr_nl addr
= {
622 .nl_family
= AF_NETLINK
,
624 bool force_buf
= FALSE
;
629 .send
= _netlink_send
,
630 .send_ack
= _netlink_send_ack
,
634 .mutex
= mutex_create(MUTEX_TYPE_RECURSIVE
),
635 .socket
= socket(AF_NETLINK
, SOCK_RAW
, protocol
),
636 .entries
= hashtable_create(hashtable_hash_ptr
, hashtable_equals_ptr
, 4),
637 .protocol
= protocol
,
639 .buflen
= netlink_get_buflen(),
640 .timeout
= lib
->settings
->get_int(lib
->settings
,
641 "%s.plugins.kernel-netlink.timeout", 0, lib
->ns
),
642 .retries
= lib
->settings
->get_int(lib
->settings
,
643 "%s.plugins.kernel-netlink.retries", 0, lib
->ns
),
644 .ignore_retransmit_errors
= lib
->settings
->get_bool(lib
->settings
,
645 "%s.plugins.kernel-netlink.ignore_retransmit_errors",
647 .parallel
= parallel
,
650 if (this->socket
== -1)
652 DBG1(DBG_KNL
, "unable to create netlink socket: %s (%d)",
653 strerror(errno
), errno
);
657 if (bind(this->socket
, (struct sockaddr
*)&addr
, sizeof(addr
)))
659 DBG1(DBG_KNL
, "unable to bind netlink socket: %s (%d)",
660 strerror(errno
), errno
);
664 rcvbuf_size
= lib
->settings
->get_int(lib
->settings
,
665 "%s.plugins.kernel-netlink.receive_buffer_size",
666 rcvbuf_size
, lib
->ns
);
671 force_buf
= lib
->settings
->get_bool(lib
->settings
,
672 "%s.plugins.kernel-netlink.force_receive_buffer_size",
674 optname
= force_buf
? SO_RCVBUFFORCE
: SO_RCVBUF
;
676 if (setsockopt(this->socket
, SOL_SOCKET
, optname
, &rcvbuf_size
,
677 sizeof(rcvbuf_size
)) == -1)
679 DBG1(DBG_KNL
, "failed to %supdate receive buffer size to %d: %s",
680 force_buf
? "forcibly " : "", rcvbuf_size
, strerror(errno
));
685 lib
->watcher
->add(lib
->watcher
, this->socket
, WATCHER_READ
, watch
, this);
688 return &this->public;
692 * Described in header
694 void netlink_add_attribute(struct nlmsghdr
*hdr
, int rta_type
, chunk_t data
,
699 if (NLMSG_ALIGN(hdr
->nlmsg_len
) + RTA_LENGTH(data
.len
) > buflen
)
701 DBG1(DBG_KNL
, "unable to add attribute, buffer too small");
705 rta
= (struct rtattr
*)(((char*)hdr
) + NLMSG_ALIGN(hdr
->nlmsg_len
));
706 rta
->rta_type
= rta_type
;
707 rta
->rta_len
= RTA_LENGTH(data
.len
);
708 memcpy(RTA_DATA(rta
), data
.ptr
, data
.len
);
709 hdr
->nlmsg_len
= NLMSG_ALIGN(hdr
->nlmsg_len
) + RTA_ALIGN(rta
->rta_len
);
713 * Add an attribute to the given Netlink message
715 static struct rtattr
*add_rtattr(struct nlmsghdr
*hdr
, int buflen
, int type
,
720 if (NLMSG_ALIGN(hdr
->nlmsg_len
) + RTA_LENGTH(len
) > buflen
)
722 DBG1(DBG_KNL
, "unable to add attribute, buffer too small");
726 rta
= ((void*)hdr
) + NLMSG_ALIGN(hdr
->nlmsg_len
);
727 rta
->rta_type
= type
;
728 rta
->rta_len
= RTA_LENGTH(len
);
729 hdr
->nlmsg_len
= NLMSG_ALIGN(hdr
->nlmsg_len
) + RTA_ALIGN(rta
->rta_len
);
734 * Described in header
736 void *netlink_nested_start(struct nlmsghdr
*hdr
, size_t buflen
, int type
)
738 return add_rtattr(hdr
, buflen
, type
, 0);
742 * Described in header
744 void netlink_nested_end(struct nlmsghdr
*hdr
, void *attr
)
746 struct rtattr
*rta
= attr
;
751 end
= (char*)hdr
+ NLMSG_ALIGN(hdr
->nlmsg_len
);
752 rta
->rta_len
= end
- attr
;
757 * Described in header
759 void *netlink_reserve(struct nlmsghdr
*hdr
, int buflen
, int type
, int len
)
763 rta
= add_rtattr(hdr
, buflen
, type
, len
);
768 return RTA_DATA(rta
);
772 * Described in header
774 void route_entry_destroy(route_entry_t
*this)
777 DESTROY_IF(this->src_ip
);
778 DESTROY_IF(this->gateway
);
779 chunk_free(&this->dst_net
);
784 * Described in header
786 route_entry_t
*route_entry_clone(const route_entry_t
*this)
788 route_entry_t
*route
;
791 .if_name
= strdupnull(this->if_name
),
792 .src_ip
= this->src_ip
? this->src_ip
->clone(this->src_ip
) : NULL
,
793 .gateway
= this->gateway
? this->gateway
->clone(this->gateway
) : NULL
,
794 .dst_net
= chunk_clone(this->dst_net
),
795 .prefixlen
= this->prefixlen
,
802 * Described in header
804 u_int
route_entry_hash(const route_entry_t
*this)
806 return chunk_hash_inc(chunk_from_thing(this->prefixlen
),
807 chunk_hash(this->dst_net
));
811 * Compare two IP addresses, also accept it if both are NULL
813 static bool addrs_null_or_equal(host_t
*a
, host_t
*b
)
815 return (!a
&& !b
) || (a
&& b
&& a
->ip_equals(a
, b
));
819 * Described in header
821 bool route_entry_equals(const route_entry_t
*a
, const route_entry_t
*b
)
823 return streq(a
->if_name
, b
->if_name
) &&
824 a
->pass
== b
->pass
&&
825 a
->prefixlen
== b
->prefixlen
&&
826 chunk_equals(a
->dst_net
, b
->dst_net
) &&
827 addrs_null_or_equal(a
->src_ip
, b
->src_ip
) &&
828 addrs_null_or_equal(a
->gateway
, b
->gateway
);