1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2013 Tom Gundersen <teg@jklm.no>
9 #include <sys/socket.h>
11 #include "sd-netlink.h"
13 #include "alloc-util.h"
18 #include "netlink-internal.h"
19 #include "netlink-util.h"
20 #include "process-util.h"
21 #include "socket-util.h"
24 static int sd_netlink_new(sd_netlink
**ret
) {
25 _cleanup_(sd_netlink_unrefp
) sd_netlink
*rtnl
= NULL
;
27 assert_return(ret
, -EINVAL
);
29 rtnl
= new0(sd_netlink
, 1);
33 rtnl
->n_ref
= REFCNT_INIT
;
35 rtnl
->sockaddr
.nl
.nl_family
= AF_NETLINK
;
36 rtnl
->original_pid
= getpid_cached();
39 LIST_HEAD_INIT(rtnl
->match_callbacks
);
41 /* We guarantee that the read buffer has at least space for
43 if (!greedy_realloc((void**)&rtnl
->rbuffer
, &rtnl
->rbuffer_allocated
,
44 sizeof(struct nlmsghdr
), sizeof(uint8_t)))
47 /* Change notification responses have sequence 0, so we must
48 * start our request sequence numbers at 1, or we may confuse our
49 * responses with notifications from the kernel */
52 *ret
= TAKE_PTR(rtnl
);
57 int sd_netlink_new_from_netlink(sd_netlink
**ret
, int fd
) {
58 _cleanup_(sd_netlink_unrefp
) sd_netlink
*rtnl
= NULL
;
62 assert_return(ret
, -EINVAL
);
64 r
= sd_netlink_new(&rtnl
);
68 addrlen
= sizeof(rtnl
->sockaddr
);
70 r
= getsockname(fd
, &rtnl
->sockaddr
.sa
, &addrlen
);
74 if (rtnl
->sockaddr
.nl
.nl_family
!= AF_NETLINK
)
79 *ret
= TAKE_PTR(rtnl
);
84 static bool rtnl_pid_changed(sd_netlink
*rtnl
) {
87 /* We don't support people creating an rtnl connection and
88 * keeping it around over a fork(). Let's complain. */
90 return rtnl
->original_pid
!= getpid_cached();
93 int sd_netlink_open_fd(sd_netlink
**ret
, int fd
) {
94 _cleanup_(sd_netlink_unrefp
) sd_netlink
*rtnl
= NULL
;
99 assert_return(ret
, -EINVAL
);
100 assert_return(fd
>= 0, -EBADF
);
102 r
= sd_netlink_new(&rtnl
);
106 l
= sizeof(protocol
);
107 r
= getsockopt(fd
, SOL_SOCKET
, SO_PROTOCOL
, &protocol
, &l
);
112 rtnl
->protocol
= protocol
;
114 r
= socket_bind(rtnl
);
116 rtnl
->fd
= -1; /* on failure, the caller remains owner of the fd, hence don't close it here */
121 *ret
= TAKE_PTR(rtnl
);
126 int netlink_open_family(sd_netlink
**ret
, int family
) {
127 _cleanup_close_
int fd
= -1;
130 fd
= socket_open(family
);
134 r
= sd_netlink_open_fd(ret
, fd
);
143 int sd_netlink_open(sd_netlink
**ret
) {
144 return netlink_open_family(ret
, NETLINK_ROUTE
);
147 int sd_netlink_inc_rcvbuf(sd_netlink
*rtnl
, size_t size
) {
148 assert_return(rtnl
, -EINVAL
);
149 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
151 return fd_inc_rcvbuf(rtnl
->fd
, size
);
154 sd_netlink
*sd_netlink_ref(sd_netlink
*rtnl
) {
155 assert_return(rtnl
, NULL
);
156 assert_return(!rtnl_pid_changed(rtnl
), NULL
);
159 assert_se(REFCNT_INC(rtnl
->n_ref
) >= 2);
164 sd_netlink
*sd_netlink_unref(sd_netlink
*rtnl
) {
168 assert_return(!rtnl_pid_changed(rtnl
), NULL
);
170 if (REFCNT_DEC(rtnl
->n_ref
) == 0) {
171 struct match_callback
*f
;
174 for (i
= 0; i
< rtnl
->rqueue_size
; i
++)
175 sd_netlink_message_unref(rtnl
->rqueue
[i
]);
178 for (i
= 0; i
< rtnl
->rqueue_partial_size
; i
++)
179 sd_netlink_message_unref(rtnl
->rqueue_partial
[i
]);
180 free(rtnl
->rqueue_partial
);
184 hashmap_free_free(rtnl
->reply_callbacks
);
185 prioq_free(rtnl
->reply_callbacks_prioq
);
187 sd_event_source_unref(rtnl
->io_event_source
);
188 sd_event_source_unref(rtnl
->time_event_source
);
189 sd_event_unref(rtnl
->event
);
191 while ((f
= rtnl
->match_callbacks
)) {
192 sd_netlink_remove_match(rtnl
, f
->type
, f
->callback
, f
->userdata
);
195 hashmap_free(rtnl
->broadcast_group_refs
);
197 safe_close(rtnl
->fd
);
204 static void rtnl_seal_message(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
206 assert(!rtnl_pid_changed(rtnl
));
210 /* don't use seq == 0, as that is used for broadcasts, so we
211 would get confused by replies to such messages */
212 m
->hdr
->nlmsg_seq
= rtnl
->serial
++ ? : rtnl
->serial
++;
214 rtnl_message_seal(m
);
219 int sd_netlink_send(sd_netlink
*nl
,
220 sd_netlink_message
*message
,
224 assert_return(nl
, -EINVAL
);
225 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
226 assert_return(message
, -EINVAL
);
227 assert_return(!message
->sealed
, -EPERM
);
229 rtnl_seal_message(nl
, message
);
231 r
= socket_write_message(nl
, message
);
236 *serial
= rtnl_message_get_serial(message
);
241 int rtnl_rqueue_make_room(sd_netlink
*rtnl
) {
244 if (rtnl
->rqueue_size
>= RTNL_RQUEUE_MAX
) {
245 log_debug("rtnl: exhausted the read queue size (%d)", RTNL_RQUEUE_MAX
);
249 if (!GREEDY_REALLOC(rtnl
->rqueue
, rtnl
->rqueue_allocated
, rtnl
->rqueue_size
+ 1))
255 int rtnl_rqueue_partial_make_room(sd_netlink
*rtnl
) {
258 if (rtnl
->rqueue_partial_size
>= RTNL_RQUEUE_MAX
) {
259 log_debug("rtnl: exhausted the partial read queue size (%d)", RTNL_RQUEUE_MAX
);
263 if (!GREEDY_REALLOC(rtnl
->rqueue_partial
, rtnl
->rqueue_partial_allocated
,
264 rtnl
->rqueue_partial_size
+ 1))
270 static int dispatch_rqueue(sd_netlink
*rtnl
, sd_netlink_message
**message
) {
276 if (rtnl
->rqueue_size
<= 0) {
277 /* Try to read a new message */
278 r
= socket_read_message(rtnl
);
279 if (r
== -ENOBUFS
) { /* FIXME: ignore buffer overruns for now */
280 log_debug_errno(r
, "Got ENOBUFS from netlink socket, ignoring.");
287 /* Dispatch a queued message */
288 *message
= rtnl
->rqueue
[0];
290 memmove(rtnl
->rqueue
, rtnl
->rqueue
+ 1, sizeof(sd_netlink_message
*) * rtnl
->rqueue_size
);
295 static int process_timeout(sd_netlink
*rtnl
) {
296 _cleanup_(sd_netlink_message_unrefp
) sd_netlink_message
*m
= NULL
;
297 struct reply_callback
*c
;
303 c
= prioq_peek(rtnl
->reply_callbacks_prioq
);
307 n
= now(CLOCK_MONOTONIC
);
311 r
= rtnl_message_new_synthetic_error(rtnl
, -ETIMEDOUT
, c
->serial
, &m
);
315 assert_se(prioq_pop(rtnl
->reply_callbacks_prioq
) == c
);
316 hashmap_remove(rtnl
->reply_callbacks
, &c
->serial
);
318 r
= c
->callback(rtnl
, m
, c
->userdata
);
320 log_debug_errno(r
, "sd-netlink: timedout callback failed: %m");
327 static int process_reply(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
328 _cleanup_free_
struct reply_callback
*c
= NULL
;
336 serial
= rtnl_message_get_serial(m
);
337 c
= hashmap_remove(rtnl
->reply_callbacks
, &serial
);
342 prioq_remove(rtnl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
344 r
= sd_netlink_message_get_type(m
, &type
);
348 if (type
== NLMSG_DONE
)
351 r
= c
->callback(rtnl
, m
, c
->userdata
);
353 log_debug_errno(r
, "sd-netlink: callback failed: %m");
358 static int process_match(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
359 struct match_callback
*c
;
366 r
= sd_netlink_message_get_type(m
, &type
);
370 LIST_FOREACH(match_callbacks
, c
, rtnl
->match_callbacks
) {
371 if (type
== c
->type
) {
372 r
= c
->callback(rtnl
, m
, c
->userdata
);
375 log_debug_errno(r
, "sd-netlink: match callback failed: %m");
385 static int process_running(sd_netlink
*rtnl
, sd_netlink_message
**ret
) {
386 _cleanup_(sd_netlink_message_unrefp
) sd_netlink_message
*m
= NULL
;
391 r
= process_timeout(rtnl
);
395 r
= dispatch_rqueue(rtnl
, &m
);
401 if (sd_netlink_message_is_broadcast(m
)) {
402 r
= process_match(rtnl
, m
);
406 r
= process_reply(rtnl
, m
);
426 int sd_netlink_process(sd_netlink
*rtnl
, sd_netlink_message
**ret
) {
427 NETLINK_DONT_DESTROY(rtnl
);
430 assert_return(rtnl
, -EINVAL
);
431 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
432 assert_return(!rtnl
->processing
, -EBUSY
);
434 rtnl
->processing
= true;
435 r
= process_running(rtnl
, ret
);
436 rtnl
->processing
= false;
441 static usec_t
calc_elapse(uint64_t usec
) {
442 if (usec
== (uint64_t) -1)
446 usec
= RTNL_DEFAULT_TIMEOUT
;
448 return now(CLOCK_MONOTONIC
) + usec
;
451 static int rtnl_poll(sd_netlink
*rtnl
, bool need_more
, uint64_t timeout_usec
) {
452 struct pollfd p
[1] = {};
454 usec_t m
= USEC_INFINITY
;
459 e
= sd_netlink_get_events(rtnl
);
464 /* Caller wants more data, and doesn't care about
465 * what's been read or any other timeouts. */
469 /* Caller wants to process if there is something to
470 * process, but doesn't care otherwise */
472 r
= sd_netlink_get_timeout(rtnl
, &until
);
477 nw
= now(CLOCK_MONOTONIC
);
478 m
= until
> nw
? until
- nw
: 0;
482 if (timeout_usec
!= (uint64_t) -1 && (m
== (uint64_t) -1 || timeout_usec
< m
))
488 r
= ppoll(p
, 1, m
== (uint64_t) -1 ? NULL
: timespec_store(&ts
, m
), NULL
);
492 return r
> 0 ? 1 : 0;
495 int sd_netlink_wait(sd_netlink
*nl
, uint64_t timeout_usec
) {
496 assert_return(nl
, -EINVAL
);
497 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
499 if (nl
->rqueue_size
> 0)
502 return rtnl_poll(nl
, false, timeout_usec
);
505 static int timeout_compare(const void *a
, const void *b
) {
506 const struct reply_callback
*x
= a
, *y
= b
;
508 if (x
->timeout
!= 0 && y
->timeout
== 0)
511 if (x
->timeout
== 0 && y
->timeout
!= 0)
514 if (x
->timeout
< y
->timeout
)
517 if (x
->timeout
> y
->timeout
)
523 int sd_netlink_call_async(sd_netlink
*nl
,
524 sd_netlink_message
*m
,
525 sd_netlink_message_handler_t callback
,
529 struct reply_callback
*c
;
533 assert_return(nl
, -EINVAL
);
534 assert_return(m
, -EINVAL
);
535 assert_return(callback
, -EINVAL
);
536 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
538 r
= hashmap_ensure_allocated(&nl
->reply_callbacks
, &uint64_hash_ops
);
542 if (usec
!= (uint64_t) -1) {
543 r
= prioq_ensure_allocated(&nl
->reply_callbacks_prioq
, timeout_compare
);
548 c
= new0(struct reply_callback
, 1);
552 c
->callback
= callback
;
553 c
->userdata
= userdata
;
554 c
->timeout
= calc_elapse(usec
);
556 k
= sd_netlink_send(nl
, m
, &s
);
564 r
= hashmap_put(nl
->reply_callbacks
, &c
->serial
, c
);
570 if (c
->timeout
!= 0) {
571 r
= prioq_put(nl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
574 sd_netlink_call_async_cancel(nl
, c
->serial
);
585 int sd_netlink_call_async_cancel(sd_netlink
*nl
, uint32_t serial
) {
586 struct reply_callback
*c
;
589 assert_return(nl
, -EINVAL
);
590 assert_return(serial
!= 0, -EINVAL
);
591 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
593 c
= hashmap_remove(nl
->reply_callbacks
, &s
);
598 prioq_remove(nl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
604 int sd_netlink_call(sd_netlink
*rtnl
,
605 sd_netlink_message
*message
,
607 sd_netlink_message
**ret
) {
612 assert_return(rtnl
, -EINVAL
);
613 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
614 assert_return(message
, -EINVAL
);
616 r
= sd_netlink_send(rtnl
, message
, &serial
);
620 timeout
= calc_elapse(usec
);
626 for (i
= 0; i
< rtnl
->rqueue_size
; i
++) {
627 uint32_t received_serial
;
629 received_serial
= rtnl_message_get_serial(rtnl
->rqueue
[i
]);
631 if (received_serial
== serial
) {
632 _cleanup_(sd_netlink_message_unrefp
) sd_netlink_message
*incoming
= NULL
;
635 incoming
= rtnl
->rqueue
[i
];
637 /* found a match, remove from rqueue and return it */
638 memmove(rtnl
->rqueue
+ i
,rtnl
->rqueue
+ i
+ 1,
639 sizeof(sd_netlink_message
*) * (rtnl
->rqueue_size
- i
- 1));
642 r
= sd_netlink_message_get_errno(incoming
);
646 r
= sd_netlink_message_get_type(incoming
, &type
);
650 if (type
== NLMSG_DONE
) {
656 *ret
= TAKE_PTR(incoming
);
662 r
= socket_read_message(rtnl
);
666 /* received message, so try to process straight away */
672 n
= now(CLOCK_MONOTONIC
);
678 left
= (uint64_t) -1;
680 r
= rtnl_poll(rtnl
, true, left
);
688 int sd_netlink_get_events(sd_netlink
*rtnl
) {
689 assert_return(rtnl
, -EINVAL
);
690 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
692 if (rtnl
->rqueue_size
== 0)
698 int sd_netlink_get_timeout(sd_netlink
*rtnl
, uint64_t *timeout_usec
) {
699 struct reply_callback
*c
;
701 assert_return(rtnl
, -EINVAL
);
702 assert_return(timeout_usec
, -EINVAL
);
703 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
705 if (rtnl
->rqueue_size
> 0) {
710 c
= prioq_peek(rtnl
->reply_callbacks_prioq
);
712 *timeout_usec
= (uint64_t) -1;
716 *timeout_usec
= c
->timeout
;
721 static int io_callback(sd_event_source
*s
, int fd
, uint32_t revents
, void *userdata
) {
722 sd_netlink
*rtnl
= userdata
;
727 r
= sd_netlink_process(rtnl
, NULL
);
734 static int time_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
735 sd_netlink
*rtnl
= userdata
;
740 r
= sd_netlink_process(rtnl
, NULL
);
747 static int prepare_callback(sd_event_source
*s
, void *userdata
) {
748 sd_netlink
*rtnl
= userdata
;
755 e
= sd_netlink_get_events(rtnl
);
759 r
= sd_event_source_set_io_events(rtnl
->io_event_source
, e
);
763 r
= sd_netlink_get_timeout(rtnl
, &until
);
769 j
= sd_event_source_set_time(rtnl
->time_event_source
, until
);
774 r
= sd_event_source_set_enabled(rtnl
->time_event_source
, r
> 0);
781 int sd_netlink_attach_event(sd_netlink
*rtnl
, sd_event
*event
, int64_t priority
) {
784 assert_return(rtnl
, -EINVAL
);
785 assert_return(!rtnl
->event
, -EBUSY
);
787 assert(!rtnl
->io_event_source
);
788 assert(!rtnl
->time_event_source
);
791 rtnl
->event
= sd_event_ref(event
);
793 r
= sd_event_default(&rtnl
->event
);
798 r
= sd_event_add_io(rtnl
->event
, &rtnl
->io_event_source
, rtnl
->fd
, 0, io_callback
, rtnl
);
802 r
= sd_event_source_set_priority(rtnl
->io_event_source
, priority
);
806 r
= sd_event_source_set_description(rtnl
->io_event_source
, "rtnl-receive-message");
810 r
= sd_event_source_set_prepare(rtnl
->io_event_source
, prepare_callback
);
814 r
= sd_event_add_time(rtnl
->event
, &rtnl
->time_event_source
, CLOCK_MONOTONIC
, 0, 0, time_callback
, rtnl
);
818 r
= sd_event_source_set_priority(rtnl
->time_event_source
, priority
);
822 r
= sd_event_source_set_description(rtnl
->time_event_source
, "rtnl-timer");
829 sd_netlink_detach_event(rtnl
);
833 int sd_netlink_detach_event(sd_netlink
*rtnl
) {
834 assert_return(rtnl
, -EINVAL
);
835 assert_return(rtnl
->event
, -ENXIO
);
837 rtnl
->io_event_source
= sd_event_source_unref(rtnl
->io_event_source
);
839 rtnl
->time_event_source
= sd_event_source_unref(rtnl
->time_event_source
);
841 rtnl
->event
= sd_event_unref(rtnl
->event
);
846 int sd_netlink_add_match(sd_netlink
*rtnl
,
848 sd_netlink_message_handler_t callback
,
850 _cleanup_free_
struct match_callback
*c
= NULL
;
853 assert_return(rtnl
, -EINVAL
);
854 assert_return(callback
, -EINVAL
);
855 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
857 c
= new0(struct match_callback
, 1);
861 c
->callback
= callback
;
863 c
->userdata
= userdata
;
868 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_LINK
);
875 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV4_IFADDR
);
879 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV6_IFADDR
);
886 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV4_ROUTE
);
890 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV6_ROUTE
);
896 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV4_RULE
);
900 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV6_RULE
);
908 LIST_PREPEND(match_callbacks
, rtnl
->match_callbacks
, c
);
915 int sd_netlink_remove_match(sd_netlink
*rtnl
,
917 sd_netlink_message_handler_t callback
,
919 struct match_callback
*c
;
922 assert_return(rtnl
, -EINVAL
);
923 assert_return(callback
, -EINVAL
);
924 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
926 LIST_FOREACH(match_callbacks
, c
, rtnl
->match_callbacks
)
927 if (c
->callback
== callback
&& c
->type
== type
&& c
->userdata
== userdata
) {
928 LIST_REMOVE(match_callbacks
, rtnl
->match_callbacks
, c
);
934 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_LINK
);
941 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV4_IFADDR
);
945 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV6_IFADDR
);
952 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV4_ROUTE
);
956 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV6_ROUTE
);