1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Tom Gundersen <teg@jklm.no>
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <sys/socket.h>
25 #include "sd-netlink.h"
27 #include "alloc-util.h"
32 #include "netlink-internal.h"
33 #include "netlink-util.h"
34 #include "socket-util.h"
37 static int sd_netlink_new(sd_netlink
**ret
) {
38 _cleanup_(sd_netlink_unrefp
) sd_netlink
*rtnl
= NULL
;
40 assert_return(ret
, -EINVAL
);
42 rtnl
= new0(sd_netlink
, 1);
46 rtnl
->n_ref
= REFCNT_INIT
;
50 rtnl
->sockaddr
.nl
.nl_family
= AF_NETLINK
;
52 rtnl
->original_pid
= getpid();
54 LIST_HEAD_INIT(rtnl
->match_callbacks
);
56 /* We guarantee that the read buffer has at least space for
58 if (!greedy_realloc((void**)&rtnl
->rbuffer
, &rtnl
->rbuffer_allocated
,
59 sizeof(struct nlmsghdr
), sizeof(uint8_t)))
62 /* Change notification responses have sequence 0, so we must
63 * start our request sequence numbers at 1, or we may confuse our
64 * responses with notifications from the kernel */
73 int sd_netlink_new_from_netlink(sd_netlink
**ret
, int fd
) {
74 _cleanup_(sd_netlink_unrefp
) sd_netlink
*rtnl
= NULL
;
78 assert_return(ret
, -EINVAL
);
80 r
= sd_netlink_new(&rtnl
);
84 addrlen
= sizeof(rtnl
->sockaddr
);
86 r
= getsockname(fd
, &rtnl
->sockaddr
.sa
, &addrlen
);
98 static bool rtnl_pid_changed(sd_netlink
*rtnl
) {
101 /* We don't support people creating an rtnl connection and
102 * keeping it around over a fork(). Let's complain. */
104 return rtnl
->original_pid
!= getpid();
107 int sd_netlink_open_fd(sd_netlink
**ret
, int fd
) {
108 _cleanup_(sd_netlink_unrefp
) sd_netlink
*rtnl
= NULL
;
111 assert_return(ret
, -EINVAL
);
112 assert_return(fd
>= 0, -EBADF
);
114 r
= sd_netlink_new(&rtnl
);
120 r
= socket_bind(rtnl
);
130 int sd_netlink_open(sd_netlink
**ret
) {
131 _cleanup_close_
int fd
= -1;
134 fd
= socket_open(NETLINK_ROUTE
);
138 r
= sd_netlink_open_fd(ret
, fd
);
147 int sd_netlink_inc_rcvbuf(const sd_netlink
*const rtnl
, const int size
) {
148 return fd_inc_rcvbuf(rtnl
->fd
, size
);
151 sd_netlink
*sd_netlink_ref(sd_netlink
*rtnl
) {
152 assert_return(rtnl
, NULL
);
153 assert_return(!rtnl_pid_changed(rtnl
), NULL
);
156 assert_se(REFCNT_INC(rtnl
->n_ref
) >= 2);
161 sd_netlink
*sd_netlink_unref(sd_netlink
*rtnl
) {
165 assert_return(!rtnl_pid_changed(rtnl
), NULL
);
167 if (REFCNT_DEC(rtnl
->n_ref
) == 0) {
168 struct match_callback
*f
;
171 for (i
= 0; i
< rtnl
->rqueue_size
; i
++)
172 sd_netlink_message_unref(rtnl
->rqueue
[i
]);
175 for (i
= 0; i
< rtnl
->rqueue_partial_size
; i
++)
176 sd_netlink_message_unref(rtnl
->rqueue_partial
[i
]);
177 free(rtnl
->rqueue_partial
);
181 hashmap_free_free(rtnl
->reply_callbacks
);
182 prioq_free(rtnl
->reply_callbacks_prioq
);
184 sd_event_source_unref(rtnl
->io_event_source
);
185 sd_event_source_unref(rtnl
->time_event_source
);
186 sd_event_unref(rtnl
->event
);
188 while ((f
= rtnl
->match_callbacks
)) {
189 sd_netlink_remove_match(rtnl
, f
->type
, f
->callback
, f
->userdata
);
192 hashmap_free(rtnl
->broadcast_group_refs
);
194 safe_close(rtnl
->fd
);
201 static void rtnl_seal_message(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
203 assert(!rtnl_pid_changed(rtnl
));
207 /* don't use seq == 0, as that is used for broadcasts, so we
208 would get confused by replies to such messages */
209 m
->hdr
->nlmsg_seq
= rtnl
->serial
++ ? : rtnl
->serial
++;
211 rtnl_message_seal(m
);
216 int sd_netlink_send(sd_netlink
*nl
,
217 sd_netlink_message
*message
,
221 assert_return(nl
, -EINVAL
);
222 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
223 assert_return(message
, -EINVAL
);
224 assert_return(!message
->sealed
, -EPERM
);
226 rtnl_seal_message(nl
, message
);
228 r
= socket_write_message(nl
, message
);
233 *serial
= rtnl_message_get_serial(message
);
238 int rtnl_rqueue_make_room(sd_netlink
*rtnl
) {
241 if (rtnl
->rqueue_size
>= RTNL_RQUEUE_MAX
) {
242 log_debug("rtnl: exhausted the read queue size (%d)", RTNL_RQUEUE_MAX
);
246 if (!GREEDY_REALLOC(rtnl
->rqueue
, rtnl
->rqueue_allocated
, rtnl
->rqueue_size
+ 1))
252 int rtnl_rqueue_partial_make_room(sd_netlink
*rtnl
) {
255 if (rtnl
->rqueue_partial_size
>= RTNL_RQUEUE_MAX
) {
256 log_debug("rtnl: exhausted the partial read queue size (%d)", RTNL_RQUEUE_MAX
);
260 if (!GREEDY_REALLOC(rtnl
->rqueue_partial
, rtnl
->rqueue_partial_allocated
,
261 rtnl
->rqueue_partial_size
+ 1))
267 static int dispatch_rqueue(sd_netlink
*rtnl
, sd_netlink_message
**message
) {
273 if (rtnl
->rqueue_size
<= 0) {
274 /* Try to read a new message */
275 r
= socket_read_message(rtnl
);
280 /* Dispatch a queued message */
281 *message
= rtnl
->rqueue
[0];
282 rtnl
->rqueue_size
--;
283 memmove(rtnl
->rqueue
, rtnl
->rqueue
+ 1, sizeof(sd_netlink_message
*) * rtnl
->rqueue_size
);
288 static int process_timeout(sd_netlink
*rtnl
) {
289 _cleanup_(sd_netlink_message_unrefp
) sd_netlink_message
*m
= NULL
;
290 struct reply_callback
*c
;
296 c
= prioq_peek(rtnl
->reply_callbacks_prioq
);
300 n
= now(CLOCK_MONOTONIC
);
304 r
= rtnl_message_new_synthetic_error(-ETIMEDOUT
, c
->serial
, &m
);
308 assert_se(prioq_pop(rtnl
->reply_callbacks_prioq
) == c
);
309 hashmap_remove(rtnl
->reply_callbacks
, &c
->serial
);
311 r
= c
->callback(rtnl
, m
, c
->userdata
);
313 log_debug_errno(r
, "sd-netlink: timedout callback failed: %m");
320 static int process_reply(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
321 _cleanup_free_
struct reply_callback
*c
= NULL
;
329 serial
= rtnl_message_get_serial(m
);
330 c
= hashmap_remove(rtnl
->reply_callbacks
, &serial
);
335 prioq_remove(rtnl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
337 r
= sd_netlink_message_get_type(m
, &type
);
341 if (type
== NLMSG_DONE
)
344 r
= c
->callback(rtnl
, m
, c
->userdata
);
346 log_debug_errno(r
, "sd-netlink: callback failed: %m");
351 static int process_match(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
352 struct match_callback
*c
;
359 r
= sd_netlink_message_get_type(m
, &type
);
363 LIST_FOREACH(match_callbacks
, c
, rtnl
->match_callbacks
) {
364 if (type
== c
->type
) {
365 r
= c
->callback(rtnl
, m
, c
->userdata
);
368 log_debug_errno(r
, "sd-netlink: match callback failed: %m");
378 static int process_running(sd_netlink
*rtnl
, sd_netlink_message
**ret
) {
379 _cleanup_(sd_netlink_message_unrefp
) sd_netlink_message
*m
= NULL
;
384 r
= process_timeout(rtnl
);
388 r
= dispatch_rqueue(rtnl
, &m
);
394 if (sd_netlink_message_is_broadcast(m
)) {
395 r
= process_match(rtnl
, m
);
399 r
= process_reply(rtnl
, m
);
420 int sd_netlink_process(sd_netlink
*rtnl
, sd_netlink_message
**ret
) {
421 NETLINK_DONT_DESTROY(rtnl
);
424 assert_return(rtnl
, -EINVAL
);
425 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
426 assert_return(!rtnl
->processing
, -EBUSY
);
428 rtnl
->processing
= true;
429 r
= process_running(rtnl
, ret
);
430 rtnl
->processing
= false;
435 static usec_t
calc_elapse(uint64_t usec
) {
436 if (usec
== (uint64_t) -1)
440 usec
= RTNL_DEFAULT_TIMEOUT
;
442 return now(CLOCK_MONOTONIC
) + usec
;
445 static int rtnl_poll(sd_netlink
*rtnl
, bool need_more
, uint64_t timeout_usec
) {
446 struct pollfd p
[1] = {};
448 usec_t m
= USEC_INFINITY
;
453 e
= sd_netlink_get_events(rtnl
);
458 /* Caller wants more data, and doesn't care about
459 * what's been read or any other timeouts. */
463 /* Caller wants to process if there is something to
464 * process, but doesn't care otherwise */
466 r
= sd_netlink_get_timeout(rtnl
, &until
);
471 nw
= now(CLOCK_MONOTONIC
);
472 m
= until
> nw
? until
- nw
: 0;
476 if (timeout_usec
!= (uint64_t) -1 && (m
== (uint64_t) -1 || timeout_usec
< m
))
482 r
= ppoll(p
, 1, m
== (uint64_t) -1 ? NULL
: timespec_store(&ts
, m
), NULL
);
486 return r
> 0 ? 1 : 0;
489 int sd_netlink_wait(sd_netlink
*nl
, uint64_t timeout_usec
) {
490 assert_return(nl
, -EINVAL
);
491 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
493 if (nl
->rqueue_size
> 0)
496 return rtnl_poll(nl
, false, timeout_usec
);
499 static int timeout_compare(const void *a
, const void *b
) {
500 const struct reply_callback
*x
= a
, *y
= b
;
502 if (x
->timeout
!= 0 && y
->timeout
== 0)
505 if (x
->timeout
== 0 && y
->timeout
!= 0)
508 if (x
->timeout
< y
->timeout
)
511 if (x
->timeout
> y
->timeout
)
517 int sd_netlink_call_async(sd_netlink
*nl
,
518 sd_netlink_message
*m
,
519 sd_netlink_message_handler_t callback
,
523 struct reply_callback
*c
;
527 assert_return(nl
, -EINVAL
);
528 assert_return(m
, -EINVAL
);
529 assert_return(callback
, -EINVAL
);
530 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
532 r
= hashmap_ensure_allocated(&nl
->reply_callbacks
, &uint64_hash_ops
);
536 if (usec
!= (uint64_t) -1) {
537 r
= prioq_ensure_allocated(&nl
->reply_callbacks_prioq
, timeout_compare
);
542 c
= new0(struct reply_callback
, 1);
546 c
->callback
= callback
;
547 c
->userdata
= userdata
;
548 c
->timeout
= calc_elapse(usec
);
550 k
= sd_netlink_send(nl
, m
, &s
);
558 r
= hashmap_put(nl
->reply_callbacks
, &c
->serial
, c
);
564 if (c
->timeout
!= 0) {
565 r
= prioq_put(nl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
568 sd_netlink_call_async_cancel(nl
, c
->serial
);
579 int sd_netlink_call_async_cancel(sd_netlink
*nl
, uint32_t serial
) {
580 struct reply_callback
*c
;
583 assert_return(nl
, -EINVAL
);
584 assert_return(serial
!= 0, -EINVAL
);
585 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
587 c
= hashmap_remove(nl
->reply_callbacks
, &s
);
592 prioq_remove(nl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
598 int sd_netlink_call(sd_netlink
*rtnl
,
599 sd_netlink_message
*message
,
601 sd_netlink_message
**ret
) {
606 assert_return(rtnl
, -EINVAL
);
607 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
608 assert_return(message
, -EINVAL
);
610 r
= sd_netlink_send(rtnl
, message
, &serial
);
614 timeout
= calc_elapse(usec
);
620 for (i
= 0; i
< rtnl
->rqueue_size
; i
++) {
621 uint32_t received_serial
;
623 received_serial
= rtnl_message_get_serial(rtnl
->rqueue
[i
]);
625 if (received_serial
== serial
) {
626 _cleanup_(sd_netlink_message_unrefp
) sd_netlink_message
*incoming
= NULL
;
629 incoming
= rtnl
->rqueue
[i
];
631 /* found a match, remove from rqueue and return it */
632 memmove(rtnl
->rqueue
+ i
,rtnl
->rqueue
+ i
+ 1,
633 sizeof(sd_netlink_message
*) * (rtnl
->rqueue_size
- i
- 1));
636 r
= sd_netlink_message_get_errno(incoming
);
640 r
= sd_netlink_message_get_type(incoming
, &type
);
644 if (type
== NLMSG_DONE
) {
658 r
= socket_read_message(rtnl
);
662 /* received message, so try to process straight away */
668 n
= now(CLOCK_MONOTONIC
);
674 left
= (uint64_t) -1;
676 r
= rtnl_poll(rtnl
, true, left
);
684 int sd_netlink_get_events(sd_netlink
*rtnl
) {
685 assert_return(rtnl
, -EINVAL
);
686 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
688 if (rtnl
->rqueue_size
== 0)
694 int sd_netlink_get_timeout(sd_netlink
*rtnl
, uint64_t *timeout_usec
) {
695 struct reply_callback
*c
;
697 assert_return(rtnl
, -EINVAL
);
698 assert_return(timeout_usec
, -EINVAL
);
699 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
701 if (rtnl
->rqueue_size
> 0) {
706 c
= prioq_peek(rtnl
->reply_callbacks_prioq
);
708 *timeout_usec
= (uint64_t) -1;
712 *timeout_usec
= c
->timeout
;
717 static int io_callback(sd_event_source
*s
, int fd
, uint32_t revents
, void *userdata
) {
718 sd_netlink
*rtnl
= userdata
;
723 r
= sd_netlink_process(rtnl
, NULL
);
730 static int time_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
731 sd_netlink
*rtnl
= userdata
;
736 r
= sd_netlink_process(rtnl
, NULL
);
743 static int prepare_callback(sd_event_source
*s
, void *userdata
) {
744 sd_netlink
*rtnl
= userdata
;
751 e
= sd_netlink_get_events(rtnl
);
755 r
= sd_event_source_set_io_events(rtnl
->io_event_source
, e
);
759 r
= sd_netlink_get_timeout(rtnl
, &until
);
765 j
= sd_event_source_set_time(rtnl
->time_event_source
, until
);
770 r
= sd_event_source_set_enabled(rtnl
->time_event_source
, r
> 0);
777 int sd_netlink_attach_event(sd_netlink
*rtnl
, sd_event
*event
, int priority
) {
780 assert_return(rtnl
, -EINVAL
);
781 assert_return(!rtnl
->event
, -EBUSY
);
783 assert(!rtnl
->io_event_source
);
784 assert(!rtnl
->time_event_source
);
787 rtnl
->event
= sd_event_ref(event
);
789 r
= sd_event_default(&rtnl
->event
);
794 r
= sd_event_add_io(rtnl
->event
, &rtnl
->io_event_source
, rtnl
->fd
, 0, io_callback
, rtnl
);
798 r
= sd_event_source_set_priority(rtnl
->io_event_source
, priority
);
802 r
= sd_event_source_set_description(rtnl
->io_event_source
, "rtnl-receive-message");
806 r
= sd_event_source_set_prepare(rtnl
->io_event_source
, prepare_callback
);
810 r
= sd_event_add_time(rtnl
->event
, &rtnl
->time_event_source
, CLOCK_MONOTONIC
, 0, 0, time_callback
, rtnl
);
814 r
= sd_event_source_set_priority(rtnl
->time_event_source
, priority
);
818 r
= sd_event_source_set_description(rtnl
->time_event_source
, "rtnl-timer");
825 sd_netlink_detach_event(rtnl
);
829 int sd_netlink_detach_event(sd_netlink
*rtnl
) {
830 assert_return(rtnl
, -EINVAL
);
831 assert_return(rtnl
->event
, -ENXIO
);
833 rtnl
->io_event_source
= sd_event_source_unref(rtnl
->io_event_source
);
835 rtnl
->time_event_source
= sd_event_source_unref(rtnl
->time_event_source
);
837 rtnl
->event
= sd_event_unref(rtnl
->event
);
842 int sd_netlink_add_match(sd_netlink
*rtnl
,
844 sd_netlink_message_handler_t callback
,
846 _cleanup_free_
struct match_callback
*c
= NULL
;
849 assert_return(rtnl
, -EINVAL
);
850 assert_return(callback
, -EINVAL
);
851 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
853 c
= new0(struct match_callback
, 1);
857 c
->callback
= callback
;
859 c
->userdata
= userdata
;
864 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_LINK
);
871 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV4_IFADDR
);
875 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV6_IFADDR
);
882 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV4_ROUTE
);
886 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV6_ROUTE
);
894 LIST_PREPEND(match_callbacks
, rtnl
->match_callbacks
, c
);
901 int sd_netlink_remove_match(sd_netlink
*rtnl
,
903 sd_netlink_message_handler_t callback
,
905 struct match_callback
*c
;
908 assert_return(rtnl
, -EINVAL
);
909 assert_return(callback
, -EINVAL
);
910 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
912 LIST_FOREACH(match_callbacks
, c
, rtnl
->match_callbacks
)
913 if (c
->callback
== callback
&& c
->type
== type
&& c
->userdata
== userdata
) {
914 LIST_REMOVE(match_callbacks
, rtnl
->match_callbacks
, c
);
920 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_LINK
);
927 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV4_IFADDR
);
931 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV6_IFADDR
);
938 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV4_ROUTE
);
942 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV6_ROUTE
);