1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Tom Gundersen <teg@jklm.no>
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <sys/socket.h>
25 #include "sd-netlink.h"
31 #include "netlink-internal.h"
32 #include "netlink-util.h"
33 #include "socket-util.h"
36 static int sd_netlink_new(sd_netlink
**ret
) {
37 _cleanup_netlink_unref_ sd_netlink
*rtnl
= NULL
;
39 assert_return(ret
, -EINVAL
);
41 rtnl
= new0(sd_netlink
, 1);
45 rtnl
->n_ref
= REFCNT_INIT
;
49 rtnl
->sockaddr
.nl
.nl_family
= AF_NETLINK
;
51 rtnl
->original_pid
= getpid();
53 LIST_HEAD_INIT(rtnl
->match_callbacks
);
55 /* We guarantee that the read buffer has at least space for
57 if (!greedy_realloc((void**)&rtnl
->rbuffer
, &rtnl
->rbuffer_allocated
,
58 sizeof(struct nlmsghdr
), sizeof(uint8_t)))
61 /* Change notification responses have sequence 0, so we must
62 * start our request sequence numbers at 1, or we may confuse our
63 * responses with notifications from the kernel */
72 int sd_netlink_new_from_netlink(sd_netlink
**ret
, int fd
) {
73 _cleanup_netlink_unref_ sd_netlink
*rtnl
= NULL
;
77 assert_return(ret
, -EINVAL
);
79 r
= sd_netlink_new(&rtnl
);
83 addrlen
= sizeof(rtnl
->sockaddr
);
85 r
= getsockname(fd
, &rtnl
->sockaddr
.sa
, &addrlen
);
97 static bool rtnl_pid_changed(sd_netlink
*rtnl
) {
100 /* We don't support people creating an rtnl connection and
101 * keeping it around over a fork(). Let's complain. */
103 return rtnl
->original_pid
!= getpid();
106 int sd_netlink_open_fd(sd_netlink
**ret
, int fd
) {
107 _cleanup_netlink_unref_ sd_netlink
*rtnl
= NULL
;
110 assert_return(ret
, -EINVAL
);
111 assert_return(fd
>= 0, -EBADF
);
113 r
= sd_netlink_new(&rtnl
);
119 r
= socket_bind(rtnl
);
129 int sd_netlink_open(sd_netlink
**ret
) {
130 _cleanup_close_
int fd
= -1;
133 fd
= socket_open(NETLINK_ROUTE
);
137 r
= sd_netlink_open_fd(ret
, fd
);
146 int sd_netlink_inc_rcvbuf(const sd_netlink
*const rtnl
, const int size
) {
147 return fd_inc_rcvbuf(rtnl
->fd
, size
);
150 sd_netlink
*sd_netlink_ref(sd_netlink
*rtnl
) {
151 assert_return(rtnl
, NULL
);
152 assert_return(!rtnl_pid_changed(rtnl
), NULL
);
155 assert_se(REFCNT_INC(rtnl
->n_ref
) >= 2);
160 sd_netlink
*sd_netlink_unref(sd_netlink
*rtnl
) {
164 assert_return(!rtnl_pid_changed(rtnl
), NULL
);
166 if (REFCNT_DEC(rtnl
->n_ref
) == 0) {
167 struct match_callback
*f
;
170 for (i
= 0; i
< rtnl
->rqueue_size
; i
++)
171 sd_netlink_message_unref(rtnl
->rqueue
[i
]);
174 for (i
= 0; i
< rtnl
->rqueue_partial_size
; i
++)
175 sd_netlink_message_unref(rtnl
->rqueue_partial
[i
]);
176 free(rtnl
->rqueue_partial
);
180 hashmap_free_free(rtnl
->reply_callbacks
);
181 prioq_free(rtnl
->reply_callbacks_prioq
);
183 sd_event_source_unref(rtnl
->io_event_source
);
184 sd_event_source_unref(rtnl
->time_event_source
);
185 sd_event_unref(rtnl
->event
);
187 while ((f
= rtnl
->match_callbacks
)) {
188 sd_netlink_remove_match(rtnl
, f
->type
, f
->callback
, f
->userdata
);
191 hashmap_free(rtnl
->broadcast_group_refs
);
193 safe_close(rtnl
->fd
);
200 static void rtnl_seal_message(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
202 assert(!rtnl_pid_changed(rtnl
));
206 /* don't use seq == 0, as that is used for broadcasts, so we
207 would get confused by replies to such messages */
208 m
->hdr
->nlmsg_seq
= rtnl
->serial
++ ? : rtnl
->serial
++;
210 rtnl_message_seal(m
);
215 int sd_netlink_send(sd_netlink
*nl
,
216 sd_netlink_message
*message
,
220 assert_return(nl
, -EINVAL
);
221 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
222 assert_return(message
, -EINVAL
);
223 assert_return(!message
->sealed
, -EPERM
);
225 rtnl_seal_message(nl
, message
);
227 r
= socket_write_message(nl
, message
);
232 *serial
= rtnl_message_get_serial(message
);
237 int rtnl_rqueue_make_room(sd_netlink
*rtnl
) {
240 if (rtnl
->rqueue_size
>= RTNL_RQUEUE_MAX
) {
241 log_debug("rtnl: exhausted the read queue size (%d)", RTNL_RQUEUE_MAX
);
245 if (!GREEDY_REALLOC(rtnl
->rqueue
, rtnl
->rqueue_allocated
, rtnl
->rqueue_size
+ 1))
251 int rtnl_rqueue_partial_make_room(sd_netlink
*rtnl
) {
254 if (rtnl
->rqueue_partial_size
>= RTNL_RQUEUE_MAX
) {
255 log_debug("rtnl: exhausted the partial read queue size (%d)", RTNL_RQUEUE_MAX
);
259 if (!GREEDY_REALLOC(rtnl
->rqueue_partial
, rtnl
->rqueue_partial_allocated
,
260 rtnl
->rqueue_partial_size
+ 1))
266 static int dispatch_rqueue(sd_netlink
*rtnl
, sd_netlink_message
**message
) {
272 if (rtnl
->rqueue_size
<= 0) {
273 /* Try to read a new message */
274 r
= socket_read_message(rtnl
);
279 /* Dispatch a queued message */
280 *message
= rtnl
->rqueue
[0];
281 rtnl
->rqueue_size
--;
282 memmove(rtnl
->rqueue
, rtnl
->rqueue
+ 1, sizeof(sd_netlink_message
*) * rtnl
->rqueue_size
);
287 static int process_timeout(sd_netlink
*rtnl
) {
288 _cleanup_netlink_message_unref_ sd_netlink_message
*m
= NULL
;
289 struct reply_callback
*c
;
295 c
= prioq_peek(rtnl
->reply_callbacks_prioq
);
299 n
= now(CLOCK_MONOTONIC
);
303 r
= rtnl_message_new_synthetic_error(-ETIMEDOUT
, c
->serial
, &m
);
307 assert_se(prioq_pop(rtnl
->reply_callbacks_prioq
) == c
);
308 hashmap_remove(rtnl
->reply_callbacks
, &c
->serial
);
310 r
= c
->callback(rtnl
, m
, c
->userdata
);
312 log_debug_errno(r
, "sd-netlink: timedout callback failed: %m");
319 static int process_reply(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
320 _cleanup_free_
struct reply_callback
*c
= NULL
;
328 serial
= rtnl_message_get_serial(m
);
329 c
= hashmap_remove(rtnl
->reply_callbacks
, &serial
);
334 prioq_remove(rtnl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
336 r
= sd_netlink_message_get_type(m
, &type
);
340 if (type
== NLMSG_DONE
)
343 r
= c
->callback(rtnl
, m
, c
->userdata
);
345 log_debug_errno(r
, "sd-netlink: callback failed: %m");
350 static int process_match(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
351 struct match_callback
*c
;
358 r
= sd_netlink_message_get_type(m
, &type
);
362 LIST_FOREACH(match_callbacks
, c
, rtnl
->match_callbacks
) {
363 if (type
== c
->type
) {
364 r
= c
->callback(rtnl
, m
, c
->userdata
);
367 log_debug_errno(r
, "sd-netlink: match callback failed: %m");
377 static int process_running(sd_netlink
*rtnl
, sd_netlink_message
**ret
) {
378 _cleanup_netlink_message_unref_ sd_netlink_message
*m
= NULL
;
383 r
= process_timeout(rtnl
);
387 r
= dispatch_rqueue(rtnl
, &m
);
393 if (sd_netlink_message_is_broadcast(m
)) {
394 r
= process_match(rtnl
, m
);
398 r
= process_reply(rtnl
, m
);
419 int sd_netlink_process(sd_netlink
*rtnl
, sd_netlink_message
**ret
) {
420 RTNL_DONT_DESTROY(rtnl
);
423 assert_return(rtnl
, -EINVAL
);
424 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
425 assert_return(!rtnl
->processing
, -EBUSY
);
427 rtnl
->processing
= true;
428 r
= process_running(rtnl
, ret
);
429 rtnl
->processing
= false;
434 static usec_t
calc_elapse(uint64_t usec
) {
435 if (usec
== (uint64_t) -1)
439 usec
= RTNL_DEFAULT_TIMEOUT
;
441 return now(CLOCK_MONOTONIC
) + usec
;
444 static int rtnl_poll(sd_netlink
*rtnl
, bool need_more
, uint64_t timeout_usec
) {
445 struct pollfd p
[1] = {};
447 usec_t m
= USEC_INFINITY
;
452 e
= sd_netlink_get_events(rtnl
);
457 /* Caller wants more data, and doesn't care about
458 * what's been read or any other timeouts. */
462 /* Caller wants to process if there is something to
463 * process, but doesn't care otherwise */
465 r
= sd_netlink_get_timeout(rtnl
, &until
);
470 nw
= now(CLOCK_MONOTONIC
);
471 m
= until
> nw
? until
- nw
: 0;
475 if (timeout_usec
!= (uint64_t) -1 && (m
== (uint64_t) -1 || timeout_usec
< m
))
481 r
= ppoll(p
, 1, m
== (uint64_t) -1 ? NULL
: timespec_store(&ts
, m
), NULL
);
485 return r
> 0 ? 1 : 0;
488 int sd_netlink_wait(sd_netlink
*nl
, uint64_t timeout_usec
) {
489 assert_return(nl
, -EINVAL
);
490 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
492 if (nl
->rqueue_size
> 0)
495 return rtnl_poll(nl
, false, timeout_usec
);
498 static int timeout_compare(const void *a
, const void *b
) {
499 const struct reply_callback
*x
= a
, *y
= b
;
501 if (x
->timeout
!= 0 && y
->timeout
== 0)
504 if (x
->timeout
== 0 && y
->timeout
!= 0)
507 if (x
->timeout
< y
->timeout
)
510 if (x
->timeout
> y
->timeout
)
516 int sd_netlink_call_async(sd_netlink
*nl
,
517 sd_netlink_message
*m
,
518 sd_netlink_message_handler_t callback
,
522 struct reply_callback
*c
;
526 assert_return(nl
, -EINVAL
);
527 assert_return(m
, -EINVAL
);
528 assert_return(callback
, -EINVAL
);
529 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
531 r
= hashmap_ensure_allocated(&nl
->reply_callbacks
, &uint64_hash_ops
);
535 if (usec
!= (uint64_t) -1) {
536 r
= prioq_ensure_allocated(&nl
->reply_callbacks_prioq
, timeout_compare
);
541 c
= new0(struct reply_callback
, 1);
545 c
->callback
= callback
;
546 c
->userdata
= userdata
;
547 c
->timeout
= calc_elapse(usec
);
549 k
= sd_netlink_send(nl
, m
, &s
);
557 r
= hashmap_put(nl
->reply_callbacks
, &c
->serial
, c
);
563 if (c
->timeout
!= 0) {
564 r
= prioq_put(nl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
567 sd_netlink_call_async_cancel(nl
, c
->serial
);
578 int sd_netlink_call_async_cancel(sd_netlink
*nl
, uint32_t serial
) {
579 struct reply_callback
*c
;
582 assert_return(nl
, -EINVAL
);
583 assert_return(serial
!= 0, -EINVAL
);
584 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
586 c
= hashmap_remove(nl
->reply_callbacks
, &s
);
591 prioq_remove(nl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
597 int sd_netlink_call(sd_netlink
*rtnl
,
598 sd_netlink_message
*message
,
600 sd_netlink_message
**ret
) {
605 assert_return(rtnl
, -EINVAL
);
606 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
607 assert_return(message
, -EINVAL
);
609 r
= sd_netlink_send(rtnl
, message
, &serial
);
613 timeout
= calc_elapse(usec
);
619 for (i
= 0; i
< rtnl
->rqueue_size
; i
++) {
620 uint32_t received_serial
;
622 received_serial
= rtnl_message_get_serial(rtnl
->rqueue
[i
]);
624 if (received_serial
== serial
) {
625 _cleanup_netlink_message_unref_ sd_netlink_message
*incoming
= NULL
;
628 incoming
= rtnl
->rqueue
[i
];
630 /* found a match, remove from rqueue and return it */
631 memmove(rtnl
->rqueue
+ i
,rtnl
->rqueue
+ i
+ 1,
632 sizeof(sd_netlink_message
*) * (rtnl
->rqueue_size
- i
- 1));
635 r
= sd_netlink_message_get_errno(incoming
);
639 r
= sd_netlink_message_get_type(incoming
, &type
);
643 if (type
== NLMSG_DONE
) {
657 r
= socket_read_message(rtnl
);
661 /* received message, so try to process straight away */
667 n
= now(CLOCK_MONOTONIC
);
673 left
= (uint64_t) -1;
675 r
= rtnl_poll(rtnl
, true, left
);
683 int sd_netlink_get_events(sd_netlink
*rtnl
) {
684 assert_return(rtnl
, -EINVAL
);
685 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
687 if (rtnl
->rqueue_size
== 0)
693 int sd_netlink_get_timeout(sd_netlink
*rtnl
, uint64_t *timeout_usec
) {
694 struct reply_callback
*c
;
696 assert_return(rtnl
, -EINVAL
);
697 assert_return(timeout_usec
, -EINVAL
);
698 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
700 if (rtnl
->rqueue_size
> 0) {
705 c
= prioq_peek(rtnl
->reply_callbacks_prioq
);
707 *timeout_usec
= (uint64_t) -1;
711 *timeout_usec
= c
->timeout
;
716 static int io_callback(sd_event_source
*s
, int fd
, uint32_t revents
, void *userdata
) {
717 sd_netlink
*rtnl
= userdata
;
722 r
= sd_netlink_process(rtnl
, NULL
);
729 static int time_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
730 sd_netlink
*rtnl
= userdata
;
735 r
= sd_netlink_process(rtnl
, NULL
);
742 static int prepare_callback(sd_event_source
*s
, void *userdata
) {
743 sd_netlink
*rtnl
= userdata
;
750 e
= sd_netlink_get_events(rtnl
);
754 r
= sd_event_source_set_io_events(rtnl
->io_event_source
, e
);
758 r
= sd_netlink_get_timeout(rtnl
, &until
);
764 j
= sd_event_source_set_time(rtnl
->time_event_source
, until
);
769 r
= sd_event_source_set_enabled(rtnl
->time_event_source
, r
> 0);
776 int sd_netlink_attach_event(sd_netlink
*rtnl
, sd_event
*event
, int priority
) {
779 assert_return(rtnl
, -EINVAL
);
780 assert_return(!rtnl
->event
, -EBUSY
);
782 assert(!rtnl
->io_event_source
);
783 assert(!rtnl
->time_event_source
);
786 rtnl
->event
= sd_event_ref(event
);
788 r
= sd_event_default(&rtnl
->event
);
793 r
= sd_event_add_io(rtnl
->event
, &rtnl
->io_event_source
, rtnl
->fd
, 0, io_callback
, rtnl
);
797 r
= sd_event_source_set_priority(rtnl
->io_event_source
, priority
);
801 r
= sd_event_source_set_description(rtnl
->io_event_source
, "rtnl-receive-message");
805 r
= sd_event_source_set_prepare(rtnl
->io_event_source
, prepare_callback
);
809 r
= sd_event_add_time(rtnl
->event
, &rtnl
->time_event_source
, CLOCK_MONOTONIC
, 0, 0, time_callback
, rtnl
);
813 r
= sd_event_source_set_priority(rtnl
->time_event_source
, priority
);
817 r
= sd_event_source_set_description(rtnl
->time_event_source
, "rtnl-timer");
824 sd_netlink_detach_event(rtnl
);
828 int sd_netlink_detach_event(sd_netlink
*rtnl
) {
829 assert_return(rtnl
, -EINVAL
);
830 assert_return(rtnl
->event
, -ENXIO
);
832 rtnl
->io_event_source
= sd_event_source_unref(rtnl
->io_event_source
);
834 rtnl
->time_event_source
= sd_event_source_unref(rtnl
->time_event_source
);
836 rtnl
->event
= sd_event_unref(rtnl
->event
);
841 int sd_netlink_add_match(sd_netlink
*rtnl
,
843 sd_netlink_message_handler_t callback
,
845 _cleanup_free_
struct match_callback
*c
= NULL
;
848 assert_return(rtnl
, -EINVAL
);
849 assert_return(callback
, -EINVAL
);
850 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
852 c
= new0(struct match_callback
, 1);
856 c
->callback
= callback
;
858 c
->userdata
= userdata
;
863 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_LINK
);
870 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV4_IFADDR
);
874 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV6_IFADDR
);
881 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV4_ROUTE
);
885 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV6_ROUTE
);
893 LIST_PREPEND(match_callbacks
, rtnl
->match_callbacks
, c
);
900 int sd_netlink_remove_match(sd_netlink
*rtnl
,
902 sd_netlink_message_handler_t callback
,
904 struct match_callback
*c
;
907 assert_return(rtnl
, -EINVAL
);
908 assert_return(callback
, -EINVAL
);
909 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
911 LIST_FOREACH(match_callbacks
, c
, rtnl
->match_callbacks
)
912 if (c
->callback
== callback
&& c
->type
== type
&& c
->userdata
== userdata
) {
913 LIST_REMOVE(match_callbacks
, rtnl
->match_callbacks
, c
);
919 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_LINK
);
926 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV4_IFADDR
);
930 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV6_IFADDR
);
937 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV4_ROUTE
);
941 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV6_ROUTE
);