1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Tom Gundersen <teg@jklm.no>
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/socket.h>
30 #include "sd-netlink.h"
31 #include "netlink-internal.h"
32 #include "netlink-util.h"
34 static int sd_netlink_new(sd_netlink
**ret
) {
35 _cleanup_netlink_unref_ sd_netlink
*rtnl
= NULL
;
37 assert_return(ret
, -EINVAL
);
39 rtnl
= new0(sd_netlink
, 1);
43 rtnl
->n_ref
= REFCNT_INIT
;
47 rtnl
->sockaddr
.nl
.nl_family
= AF_NETLINK
;
49 rtnl
->original_pid
= getpid();
51 LIST_HEAD_INIT(rtnl
->match_callbacks
);
53 /* We guarantee that the read buffer has at least space for
55 if (!greedy_realloc((void**)&rtnl
->rbuffer
, &rtnl
->rbuffer_allocated
,
56 sizeof(struct nlmsghdr
), sizeof(uint8_t)))
59 /* Change notification responses have sequence 0, so we must
60 * start our request sequence numbers at 1, or we may confuse our
61 * responses with notifications from the kernel */
70 int sd_netlink_new_from_netlink(sd_netlink
**ret
, int fd
) {
71 _cleanup_netlink_unref_ sd_netlink
*rtnl
= NULL
;
75 assert_return(ret
, -EINVAL
);
77 r
= sd_netlink_new(&rtnl
);
81 addrlen
= sizeof(rtnl
->sockaddr
);
83 r
= getsockname(fd
, &rtnl
->sockaddr
.sa
, &addrlen
);
95 static bool rtnl_pid_changed(sd_netlink
*rtnl
) {
98 /* We don't support people creating an rtnl connection and
99 * keeping it around over a fork(). Let's complain. */
101 return rtnl
->original_pid
!= getpid();
104 int sd_netlink_open_fd(sd_netlink
**ret
, int fd
) {
105 _cleanup_netlink_unref_ sd_netlink
*rtnl
= NULL
;
108 assert_return(ret
, -EINVAL
);
109 assert_return(fd
>= 0, -EBADF
);
111 r
= sd_netlink_new(&rtnl
);
117 r
= socket_bind(rtnl
);
127 int sd_netlink_open(sd_netlink
**ret
) {
128 _cleanup_close_
int fd
= -1;
131 fd
= socket_open(NETLINK_ROUTE
);
135 r
= sd_netlink_open_fd(ret
, fd
);
144 int sd_netlink_inc_rcvbuf(const sd_netlink
*const rtnl
, const int size
) {
145 return fd_inc_rcvbuf(rtnl
->fd
, size
);
148 sd_netlink
*sd_netlink_ref(sd_netlink
*rtnl
) {
149 assert_return(rtnl
, NULL
);
150 assert_return(!rtnl_pid_changed(rtnl
), NULL
);
153 assert_se(REFCNT_INC(rtnl
->n_ref
) >= 2);
158 sd_netlink
*sd_netlink_unref(sd_netlink
*rtnl
) {
162 assert_return(!rtnl_pid_changed(rtnl
), NULL
);
164 if (REFCNT_DEC(rtnl
->n_ref
) == 0) {
165 struct match_callback
*f
;
168 for (i
= 0; i
< rtnl
->rqueue_size
; i
++)
169 sd_netlink_message_unref(rtnl
->rqueue
[i
]);
172 for (i
= 0; i
< rtnl
->rqueue_partial_size
; i
++)
173 sd_netlink_message_unref(rtnl
->rqueue_partial
[i
]);
174 free(rtnl
->rqueue_partial
);
178 hashmap_free_free(rtnl
->reply_callbacks
);
179 prioq_free(rtnl
->reply_callbacks_prioq
);
181 sd_event_source_unref(rtnl
->io_event_source
);
182 sd_event_source_unref(rtnl
->time_event_source
);
183 sd_event_unref(rtnl
->event
);
185 while ((f
= rtnl
->match_callbacks
)) {
186 LIST_REMOVE(match_callbacks
, rtnl
->match_callbacks
, f
);
190 safe_close(rtnl
->fd
);
197 static void rtnl_seal_message(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
199 assert(!rtnl_pid_changed(rtnl
));
203 /* don't use seq == 0, as that is used for broadcasts, so we
204 would get confused by replies to such messages */
205 m
->hdr
->nlmsg_seq
= rtnl
->serial
++ ? : rtnl
->serial
++;
207 rtnl_message_seal(m
);
212 int sd_netlink_send(sd_netlink
*nl
,
213 sd_netlink_message
*message
,
217 assert_return(nl
, -EINVAL
);
218 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
219 assert_return(message
, -EINVAL
);
220 assert_return(!message
->sealed
, -EPERM
);
222 rtnl_seal_message(nl
, message
);
224 r
= socket_write_message(nl
, message
);
229 *serial
= rtnl_message_get_serial(message
);
234 int rtnl_rqueue_make_room(sd_netlink
*rtnl
) {
237 if (rtnl
->rqueue_size
>= RTNL_RQUEUE_MAX
) {
238 log_debug("rtnl: exhausted the read queue size (%d)", RTNL_RQUEUE_MAX
);
242 if (!GREEDY_REALLOC(rtnl
->rqueue
, rtnl
->rqueue_allocated
, rtnl
->rqueue_size
+ 1))
248 int rtnl_rqueue_partial_make_room(sd_netlink
*rtnl
) {
251 if (rtnl
->rqueue_partial_size
>= RTNL_RQUEUE_MAX
) {
252 log_debug("rtnl: exhausted the partial read queue size (%d)", RTNL_RQUEUE_MAX
);
256 if (!GREEDY_REALLOC(rtnl
->rqueue_partial
, rtnl
->rqueue_partial_allocated
,
257 rtnl
->rqueue_partial_size
+ 1))
263 static int dispatch_rqueue(sd_netlink
*rtnl
, sd_netlink_message
**message
) {
269 if (rtnl
->rqueue_size
<= 0) {
270 /* Try to read a new message */
271 r
= socket_read_message(rtnl
);
276 /* Dispatch a queued message */
277 *message
= rtnl
->rqueue
[0];
278 rtnl
->rqueue_size
--;
279 memmove(rtnl
->rqueue
, rtnl
->rqueue
+ 1, sizeof(sd_netlink_message
*) * rtnl
->rqueue_size
);
284 static int process_timeout(sd_netlink
*rtnl
) {
285 _cleanup_netlink_message_unref_ sd_netlink_message
*m
= NULL
;
286 struct reply_callback
*c
;
292 c
= prioq_peek(rtnl
->reply_callbacks_prioq
);
296 n
= now(CLOCK_MONOTONIC
);
300 r
= rtnl_message_new_synthetic_error(-ETIMEDOUT
, c
->serial
, &m
);
304 assert_se(prioq_pop(rtnl
->reply_callbacks_prioq
) == c
);
305 hashmap_remove(rtnl
->reply_callbacks
, &c
->serial
);
307 r
= c
->callback(rtnl
, m
, c
->userdata
);
309 log_debug_errno(r
, "sd-netlink: timedout callback failed: %m");
316 static int process_reply(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
317 _cleanup_free_
struct reply_callback
*c
= NULL
;
325 serial
= rtnl_message_get_serial(m
);
326 c
= hashmap_remove(rtnl
->reply_callbacks
, &serial
);
331 prioq_remove(rtnl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
333 r
= sd_netlink_message_get_type(m
, &type
);
337 if (type
== NLMSG_DONE
)
340 r
= c
->callback(rtnl
, m
, c
->userdata
);
342 log_debug_errno(r
, "sd-netlink: callback failed: %m");
347 static int process_match(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
348 struct match_callback
*c
;
355 r
= sd_netlink_message_get_type(m
, &type
);
359 LIST_FOREACH(match_callbacks
, c
, rtnl
->match_callbacks
) {
360 if (type
== c
->type
) {
361 r
= c
->callback(rtnl
, m
, c
->userdata
);
364 log_debug_errno(r
, "sd-netlink: match callback failed: %m");
374 static int process_running(sd_netlink
*rtnl
, sd_netlink_message
**ret
) {
375 _cleanup_netlink_message_unref_ sd_netlink_message
*m
= NULL
;
380 r
= process_timeout(rtnl
);
384 r
= dispatch_rqueue(rtnl
, &m
);
390 if (sd_netlink_message_is_broadcast(m
)) {
391 r
= process_match(rtnl
, m
);
395 r
= process_reply(rtnl
, m
);
416 int sd_netlink_process(sd_netlink
*rtnl
, sd_netlink_message
**ret
) {
417 RTNL_DONT_DESTROY(rtnl
);
420 assert_return(rtnl
, -EINVAL
);
421 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
422 assert_return(!rtnl
->processing
, -EBUSY
);
424 rtnl
->processing
= true;
425 r
= process_running(rtnl
, ret
);
426 rtnl
->processing
= false;
431 static usec_t
calc_elapse(uint64_t usec
) {
432 if (usec
== (uint64_t) -1)
436 usec
= RTNL_DEFAULT_TIMEOUT
;
438 return now(CLOCK_MONOTONIC
) + usec
;
441 static int rtnl_poll(sd_netlink
*rtnl
, bool need_more
, uint64_t timeout_usec
) {
442 struct pollfd p
[1] = {};
444 usec_t m
= USEC_INFINITY
;
449 e
= sd_netlink_get_events(rtnl
);
454 /* Caller wants more data, and doesn't care about
455 * what's been read or any other timeouts. */
459 /* Caller wants to process if there is something to
460 * process, but doesn't care otherwise */
462 r
= sd_netlink_get_timeout(rtnl
, &until
);
467 nw
= now(CLOCK_MONOTONIC
);
468 m
= until
> nw
? until
- nw
: 0;
472 if (timeout_usec
!= (uint64_t) -1 && (m
== (uint64_t) -1 || timeout_usec
< m
))
478 r
= ppoll(p
, 1, m
== (uint64_t) -1 ? NULL
: timespec_store(&ts
, m
), NULL
);
482 return r
> 0 ? 1 : 0;
485 int sd_netlink_wait(sd_netlink
*nl
, uint64_t timeout_usec
) {
486 assert_return(nl
, -EINVAL
);
487 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
489 if (nl
->rqueue_size
> 0)
492 return rtnl_poll(nl
, false, timeout_usec
);
495 static int timeout_compare(const void *a
, const void *b
) {
496 const struct reply_callback
*x
= a
, *y
= b
;
498 if (x
->timeout
!= 0 && y
->timeout
== 0)
501 if (x
->timeout
== 0 && y
->timeout
!= 0)
504 if (x
->timeout
< y
->timeout
)
507 if (x
->timeout
> y
->timeout
)
513 int sd_netlink_call_async(sd_netlink
*nl
,
514 sd_netlink_message
*m
,
515 sd_netlink_message_handler_t callback
,
519 struct reply_callback
*c
;
523 assert_return(nl
, -EINVAL
);
524 assert_return(m
, -EINVAL
);
525 assert_return(callback
, -EINVAL
);
526 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
528 r
= hashmap_ensure_allocated(&nl
->reply_callbacks
, &uint64_hash_ops
);
532 if (usec
!= (uint64_t) -1) {
533 r
= prioq_ensure_allocated(&nl
->reply_callbacks_prioq
, timeout_compare
);
538 c
= new0(struct reply_callback
, 1);
542 c
->callback
= callback
;
543 c
->userdata
= userdata
;
544 c
->timeout
= calc_elapse(usec
);
546 k
= sd_netlink_send(nl
, m
, &s
);
554 r
= hashmap_put(nl
->reply_callbacks
, &c
->serial
, c
);
560 if (c
->timeout
!= 0) {
561 r
= prioq_put(nl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
564 sd_netlink_call_async_cancel(nl
, c
->serial
);
575 int sd_netlink_call_async_cancel(sd_netlink
*nl
, uint32_t serial
) {
576 struct reply_callback
*c
;
579 assert_return(nl
, -EINVAL
);
580 assert_return(serial
!= 0, -EINVAL
);
581 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
583 c
= hashmap_remove(nl
->reply_callbacks
, &s
);
588 prioq_remove(nl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
594 int sd_netlink_call(sd_netlink
*rtnl
,
595 sd_netlink_message
*message
,
597 sd_netlink_message
**ret
) {
602 assert_return(rtnl
, -EINVAL
);
603 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
604 assert_return(message
, -EINVAL
);
606 r
= sd_netlink_send(rtnl
, message
, &serial
);
610 timeout
= calc_elapse(usec
);
616 for (i
= 0; i
< rtnl
->rqueue_size
; i
++) {
617 uint32_t received_serial
;
619 received_serial
= rtnl_message_get_serial(rtnl
->rqueue
[i
]);
621 if (received_serial
== serial
) {
622 _cleanup_netlink_message_unref_ sd_netlink_message
*incoming
= NULL
;
625 incoming
= rtnl
->rqueue
[i
];
627 /* found a match, remove from rqueue and return it */
628 memmove(rtnl
->rqueue
+ i
,rtnl
->rqueue
+ i
+ 1,
629 sizeof(sd_netlink_message
*) * (rtnl
->rqueue_size
- i
- 1));
632 r
= sd_netlink_message_get_errno(incoming
);
636 r
= sd_netlink_message_get_type(incoming
, &type
);
640 if (type
== NLMSG_DONE
) {
654 r
= socket_read_message(rtnl
);
658 /* received message, so try to process straight away */
664 n
= now(CLOCK_MONOTONIC
);
670 left
= (uint64_t) -1;
672 r
= rtnl_poll(rtnl
, true, left
);
680 int sd_netlink_get_events(sd_netlink
*rtnl
) {
681 assert_return(rtnl
, -EINVAL
);
682 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
684 if (rtnl
->rqueue_size
== 0)
690 int sd_netlink_get_timeout(sd_netlink
*rtnl
, uint64_t *timeout_usec
) {
691 struct reply_callback
*c
;
693 assert_return(rtnl
, -EINVAL
);
694 assert_return(timeout_usec
, -EINVAL
);
695 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
697 if (rtnl
->rqueue_size
> 0) {
702 c
= prioq_peek(rtnl
->reply_callbacks_prioq
);
704 *timeout_usec
= (uint64_t) -1;
708 *timeout_usec
= c
->timeout
;
713 static int io_callback(sd_event_source
*s
, int fd
, uint32_t revents
, void *userdata
) {
714 sd_netlink
*rtnl
= userdata
;
719 r
= sd_netlink_process(rtnl
, NULL
);
726 static int time_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
727 sd_netlink
*rtnl
= userdata
;
732 r
= sd_netlink_process(rtnl
, NULL
);
739 static int prepare_callback(sd_event_source
*s
, void *userdata
) {
740 sd_netlink
*rtnl
= userdata
;
747 e
= sd_netlink_get_events(rtnl
);
751 r
= sd_event_source_set_io_events(rtnl
->io_event_source
, e
);
755 r
= sd_netlink_get_timeout(rtnl
, &until
);
761 j
= sd_event_source_set_time(rtnl
->time_event_source
, until
);
766 r
= sd_event_source_set_enabled(rtnl
->time_event_source
, r
> 0);
773 int sd_netlink_attach_event(sd_netlink
*rtnl
, sd_event
*event
, int priority
) {
776 assert_return(rtnl
, -EINVAL
);
777 assert_return(!rtnl
->event
, -EBUSY
);
779 assert(!rtnl
->io_event_source
);
780 assert(!rtnl
->time_event_source
);
783 rtnl
->event
= sd_event_ref(event
);
785 r
= sd_event_default(&rtnl
->event
);
790 r
= sd_event_add_io(rtnl
->event
, &rtnl
->io_event_source
, rtnl
->fd
, 0, io_callback
, rtnl
);
794 r
= sd_event_source_set_priority(rtnl
->io_event_source
, priority
);
798 r
= sd_event_source_set_description(rtnl
->io_event_source
, "rtnl-receive-message");
802 r
= sd_event_source_set_prepare(rtnl
->io_event_source
, prepare_callback
);
806 r
= sd_event_add_time(rtnl
->event
, &rtnl
->time_event_source
, CLOCK_MONOTONIC
, 0, 0, time_callback
, rtnl
);
810 r
= sd_event_source_set_priority(rtnl
->time_event_source
, priority
);
814 r
= sd_event_source_set_description(rtnl
->time_event_source
, "rtnl-timer");
821 sd_netlink_detach_event(rtnl
);
825 int sd_netlink_detach_event(sd_netlink
*rtnl
) {
826 assert_return(rtnl
, -EINVAL
);
827 assert_return(rtnl
->event
, -ENXIO
);
829 rtnl
->io_event_source
= sd_event_source_unref(rtnl
->io_event_source
);
831 rtnl
->time_event_source
= sd_event_source_unref(rtnl
->time_event_source
);
833 rtnl
->event
= sd_event_unref(rtnl
->event
);
838 int sd_netlink_add_match(sd_netlink
*rtnl
,
840 sd_netlink_message_handler_t callback
,
842 _cleanup_free_
struct match_callback
*c
= NULL
;
845 assert_return(rtnl
, -EINVAL
);
846 assert_return(callback
, -EINVAL
);
847 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
849 c
= new0(struct match_callback
, 1);
853 c
->callback
= callback
;
855 c
->userdata
= userdata
;
862 r
= socket_join_broadcast_group(rtnl
, RTNLGRP_LINK
);
870 r
= socket_join_broadcast_group(rtnl
, RTNLGRP_IPV4_IFADDR
);
874 r
= socket_join_broadcast_group(rtnl
, RTNLGRP_IPV6_IFADDR
);
883 LIST_PREPEND(match_callbacks
, rtnl
->match_callbacks
, c
);
890 int sd_netlink_remove_match(sd_netlink
*rtnl
,
892 sd_netlink_message_handler_t callback
,
894 struct match_callback
*c
;
896 assert_return(rtnl
, -EINVAL
);
897 assert_return(callback
, -EINVAL
);
898 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
900 /* we should unsubscribe from the broadcast groups at this point, but it is not so
901 trivial for a few reasons: the refcounting is a bit of a mess and not obvious
902 how it will look like after we add genetlink support, and it is also not possible
903 to query what broadcast groups were subscribed to when we inherit the socket to get
904 the initial refcount. The latter could indeed be done for the first 32 broadcast
905 groups (which incidentally is all we currently support in .socket units anyway),
906 but we better not rely on only ever using 32 groups. */
907 LIST_FOREACH(match_callbacks
, c
, rtnl
->match_callbacks
)
908 if (c
->callback
== callback
&& c
->type
== type
&& c
->userdata
== userdata
) {
909 LIST_REMOVE(match_callbacks
, rtnl
->match_callbacks
, c
);