1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2013 Tom Gundersen <teg@jklm.no>
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/socket.h>
24 #include "sd-netlink.h"
26 #include "alloc-util.h"
31 #include "netlink-internal.h"
32 #include "netlink-util.h"
33 #include "socket-util.h"
36 static int sd_netlink_new(sd_netlink
**ret
) {
37 _cleanup_(sd_netlink_unrefp
) sd_netlink
*rtnl
= NULL
;
39 assert_return(ret
, -EINVAL
);
41 rtnl
= new0(sd_netlink
, 1);
45 rtnl
->n_ref
= REFCNT_INIT
;
47 rtnl
->sockaddr
.nl
.nl_family
= AF_NETLINK
;
48 rtnl
->original_pid
= getpid_cached();
50 LIST_HEAD_INIT(rtnl
->match_callbacks
);
52 /* We guarantee that the read buffer has at least space for
54 if (!greedy_realloc((void**)&rtnl
->rbuffer
, &rtnl
->rbuffer_allocated
,
55 sizeof(struct nlmsghdr
), sizeof(uint8_t)))
58 /* Change notification responses have sequence 0, so we must
59 * start our request sequence numbers at 1, or we may confuse our
60 * responses with notifications from the kernel */
69 int sd_netlink_new_from_netlink(sd_netlink
**ret
, int fd
) {
70 _cleanup_(sd_netlink_unrefp
) sd_netlink
*rtnl
= NULL
;
74 assert_return(ret
, -EINVAL
);
76 r
= sd_netlink_new(&rtnl
);
80 addrlen
= sizeof(rtnl
->sockaddr
);
82 r
= getsockname(fd
, &rtnl
->sockaddr
.sa
, &addrlen
);
86 if (rtnl
->sockaddr
.nl
.nl_family
!= AF_NETLINK
)
97 static bool rtnl_pid_changed(sd_netlink
*rtnl
) {
100 /* We don't support people creating an rtnl connection and
101 * keeping it around over a fork(). Let's complain. */
103 return rtnl
->original_pid
!= getpid_cached();
106 int sd_netlink_open_fd(sd_netlink
**ret
, int fd
) {
107 _cleanup_(sd_netlink_unrefp
) sd_netlink
*rtnl
= NULL
;
110 assert_return(ret
, -EINVAL
);
111 assert_return(fd
>= 0, -EBADF
);
113 r
= sd_netlink_new(&rtnl
);
119 r
= socket_bind(rtnl
);
121 rtnl
->fd
= -1; /* on failure, the caller remains owner of the fd, hence don't close it here */
131 int sd_netlink_open(sd_netlink
**ret
) {
132 _cleanup_close_
int fd
= -1;
135 fd
= socket_open(NETLINK_ROUTE
);
139 r
= sd_netlink_open_fd(ret
, fd
);
148 int sd_netlink_inc_rcvbuf(sd_netlink
*rtnl
, size_t size
) {
149 assert_return(rtnl
, -EINVAL
);
150 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
152 return fd_inc_rcvbuf(rtnl
->fd
, size
);
155 sd_netlink
*sd_netlink_ref(sd_netlink
*rtnl
) {
156 assert_return(rtnl
, NULL
);
157 assert_return(!rtnl_pid_changed(rtnl
), NULL
);
160 assert_se(REFCNT_INC(rtnl
->n_ref
) >= 2);
165 sd_netlink
*sd_netlink_unref(sd_netlink
*rtnl
) {
169 assert_return(!rtnl_pid_changed(rtnl
), NULL
);
171 if (REFCNT_DEC(rtnl
->n_ref
) == 0) {
172 struct match_callback
*f
;
175 for (i
= 0; i
< rtnl
->rqueue_size
; i
++)
176 sd_netlink_message_unref(rtnl
->rqueue
[i
]);
179 for (i
= 0; i
< rtnl
->rqueue_partial_size
; i
++)
180 sd_netlink_message_unref(rtnl
->rqueue_partial
[i
]);
181 free(rtnl
->rqueue_partial
);
185 hashmap_free_free(rtnl
->reply_callbacks
);
186 prioq_free(rtnl
->reply_callbacks_prioq
);
188 sd_event_source_unref(rtnl
->io_event_source
);
189 sd_event_source_unref(rtnl
->time_event_source
);
190 sd_event_unref(rtnl
->event
);
192 while ((f
= rtnl
->match_callbacks
)) {
193 sd_netlink_remove_match(rtnl
, f
->type
, f
->callback
, f
->userdata
);
196 hashmap_free(rtnl
->broadcast_group_refs
);
198 safe_close(rtnl
->fd
);
205 static void rtnl_seal_message(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
207 assert(!rtnl_pid_changed(rtnl
));
211 /* don't use seq == 0, as that is used for broadcasts, so we
212 would get confused by replies to such messages */
213 m
->hdr
->nlmsg_seq
= rtnl
->serial
++ ? : rtnl
->serial
++;
215 rtnl_message_seal(m
);
220 int sd_netlink_send(sd_netlink
*nl
,
221 sd_netlink_message
*message
,
225 assert_return(nl
, -EINVAL
);
226 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
227 assert_return(message
, -EINVAL
);
228 assert_return(!message
->sealed
, -EPERM
);
230 rtnl_seal_message(nl
, message
);
232 r
= socket_write_message(nl
, message
);
237 *serial
= rtnl_message_get_serial(message
);
242 int rtnl_rqueue_make_room(sd_netlink
*rtnl
) {
245 if (rtnl
->rqueue_size
>= RTNL_RQUEUE_MAX
) {
246 log_debug("rtnl: exhausted the read queue size (%d)", RTNL_RQUEUE_MAX
);
250 if (!GREEDY_REALLOC(rtnl
->rqueue
, rtnl
->rqueue_allocated
, rtnl
->rqueue_size
+ 1))
256 int rtnl_rqueue_partial_make_room(sd_netlink
*rtnl
) {
259 if (rtnl
->rqueue_partial_size
>= RTNL_RQUEUE_MAX
) {
260 log_debug("rtnl: exhausted the partial read queue size (%d)", RTNL_RQUEUE_MAX
);
264 if (!GREEDY_REALLOC(rtnl
->rqueue_partial
, rtnl
->rqueue_partial_allocated
,
265 rtnl
->rqueue_partial_size
+ 1))
271 static int dispatch_rqueue(sd_netlink
*rtnl
, sd_netlink_message
**message
) {
277 if (rtnl
->rqueue_size
<= 0) {
278 /* Try to read a new message */
279 r
= socket_read_message(rtnl
);
280 if (r
== -ENOBUFS
) { /* FIXME: ignore buffer overruns for now */
281 log_debug_errno(r
, "Got ENOBUFS from netlink socket, ignoring.");
288 /* Dispatch a queued message */
289 *message
= rtnl
->rqueue
[0];
291 memmove(rtnl
->rqueue
, rtnl
->rqueue
+ 1, sizeof(sd_netlink_message
*) * rtnl
->rqueue_size
);
296 static int process_timeout(sd_netlink
*rtnl
) {
297 _cleanup_(sd_netlink_message_unrefp
) sd_netlink_message
*m
= NULL
;
298 struct reply_callback
*c
;
304 c
= prioq_peek(rtnl
->reply_callbacks_prioq
);
308 n
= now(CLOCK_MONOTONIC
);
312 r
= rtnl_message_new_synthetic_error(-ETIMEDOUT
, c
->serial
, &m
);
316 assert_se(prioq_pop(rtnl
->reply_callbacks_prioq
) == c
);
317 hashmap_remove(rtnl
->reply_callbacks
, &c
->serial
);
319 r
= c
->callback(rtnl
, m
, c
->userdata
);
321 log_debug_errno(r
, "sd-netlink: timedout callback failed: %m");
328 static int process_reply(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
329 _cleanup_free_
struct reply_callback
*c
= NULL
;
337 serial
= rtnl_message_get_serial(m
);
338 c
= hashmap_remove(rtnl
->reply_callbacks
, &serial
);
343 prioq_remove(rtnl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
345 r
= sd_netlink_message_get_type(m
, &type
);
349 if (type
== NLMSG_DONE
)
352 r
= c
->callback(rtnl
, m
, c
->userdata
);
354 log_debug_errno(r
, "sd-netlink: callback failed: %m");
359 static int process_match(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
360 struct match_callback
*c
;
367 r
= sd_netlink_message_get_type(m
, &type
);
371 LIST_FOREACH(match_callbacks
, c
, rtnl
->match_callbacks
) {
372 if (type
== c
->type
) {
373 r
= c
->callback(rtnl
, m
, c
->userdata
);
376 log_debug_errno(r
, "sd-netlink: match callback failed: %m");
386 static int process_running(sd_netlink
*rtnl
, sd_netlink_message
**ret
) {
387 _cleanup_(sd_netlink_message_unrefp
) sd_netlink_message
*m
= NULL
;
392 r
= process_timeout(rtnl
);
396 r
= dispatch_rqueue(rtnl
, &m
);
402 if (sd_netlink_message_is_broadcast(m
)) {
403 r
= process_match(rtnl
, m
);
407 r
= process_reply(rtnl
, m
);
428 int sd_netlink_process(sd_netlink
*rtnl
, sd_netlink_message
**ret
) {
429 NETLINK_DONT_DESTROY(rtnl
);
432 assert_return(rtnl
, -EINVAL
);
433 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
434 assert_return(!rtnl
->processing
, -EBUSY
);
436 rtnl
->processing
= true;
437 r
= process_running(rtnl
, ret
);
438 rtnl
->processing
= false;
443 static usec_t
calc_elapse(uint64_t usec
) {
444 if (usec
== (uint64_t) -1)
448 usec
= RTNL_DEFAULT_TIMEOUT
;
450 return now(CLOCK_MONOTONIC
) + usec
;
453 static int rtnl_poll(sd_netlink
*rtnl
, bool need_more
, uint64_t timeout_usec
) {
454 struct pollfd p
[1] = {};
456 usec_t m
= USEC_INFINITY
;
461 e
= sd_netlink_get_events(rtnl
);
466 /* Caller wants more data, and doesn't care about
467 * what's been read or any other timeouts. */
471 /* Caller wants to process if there is something to
472 * process, but doesn't care otherwise */
474 r
= sd_netlink_get_timeout(rtnl
, &until
);
479 nw
= now(CLOCK_MONOTONIC
);
480 m
= until
> nw
? until
- nw
: 0;
484 if (timeout_usec
!= (uint64_t) -1 && (m
== (uint64_t) -1 || timeout_usec
< m
))
490 r
= ppoll(p
, 1, m
== (uint64_t) -1 ? NULL
: timespec_store(&ts
, m
), NULL
);
494 return r
> 0 ? 1 : 0;
497 int sd_netlink_wait(sd_netlink
*nl
, uint64_t timeout_usec
) {
498 assert_return(nl
, -EINVAL
);
499 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
501 if (nl
->rqueue_size
> 0)
504 return rtnl_poll(nl
, false, timeout_usec
);
507 static int timeout_compare(const void *a
, const void *b
) {
508 const struct reply_callback
*x
= a
, *y
= b
;
510 if (x
->timeout
!= 0 && y
->timeout
== 0)
513 if (x
->timeout
== 0 && y
->timeout
!= 0)
516 if (x
->timeout
< y
->timeout
)
519 if (x
->timeout
> y
->timeout
)
525 int sd_netlink_call_async(sd_netlink
*nl
,
526 sd_netlink_message
*m
,
527 sd_netlink_message_handler_t callback
,
531 struct reply_callback
*c
;
535 assert_return(nl
, -EINVAL
);
536 assert_return(m
, -EINVAL
);
537 assert_return(callback
, -EINVAL
);
538 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
540 r
= hashmap_ensure_allocated(&nl
->reply_callbacks
, &uint64_hash_ops
);
544 if (usec
!= (uint64_t) -1) {
545 r
= prioq_ensure_allocated(&nl
->reply_callbacks_prioq
, timeout_compare
);
550 c
= new0(struct reply_callback
, 1);
554 c
->callback
= callback
;
555 c
->userdata
= userdata
;
556 c
->timeout
= calc_elapse(usec
);
558 k
= sd_netlink_send(nl
, m
, &s
);
566 r
= hashmap_put(nl
->reply_callbacks
, &c
->serial
, c
);
572 if (c
->timeout
!= 0) {
573 r
= prioq_put(nl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
576 sd_netlink_call_async_cancel(nl
, c
->serial
);
587 int sd_netlink_call_async_cancel(sd_netlink
*nl
, uint32_t serial
) {
588 struct reply_callback
*c
;
591 assert_return(nl
, -EINVAL
);
592 assert_return(serial
!= 0, -EINVAL
);
593 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
595 c
= hashmap_remove(nl
->reply_callbacks
, &s
);
600 prioq_remove(nl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
606 int sd_netlink_call(sd_netlink
*rtnl
,
607 sd_netlink_message
*message
,
609 sd_netlink_message
**ret
) {
614 assert_return(rtnl
, -EINVAL
);
615 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
616 assert_return(message
, -EINVAL
);
618 r
= sd_netlink_send(rtnl
, message
, &serial
);
622 timeout
= calc_elapse(usec
);
628 for (i
= 0; i
< rtnl
->rqueue_size
; i
++) {
629 uint32_t received_serial
;
631 received_serial
= rtnl_message_get_serial(rtnl
->rqueue
[i
]);
633 if (received_serial
== serial
) {
634 _cleanup_(sd_netlink_message_unrefp
) sd_netlink_message
*incoming
= NULL
;
637 incoming
= rtnl
->rqueue
[i
];
639 /* found a match, remove from rqueue and return it */
640 memmove(rtnl
->rqueue
+ i
,rtnl
->rqueue
+ i
+ 1,
641 sizeof(sd_netlink_message
*) * (rtnl
->rqueue_size
- i
- 1));
644 r
= sd_netlink_message_get_errno(incoming
);
648 r
= sd_netlink_message_get_type(incoming
, &type
);
652 if (type
== NLMSG_DONE
) {
666 r
= socket_read_message(rtnl
);
670 /* received message, so try to process straight away */
676 n
= now(CLOCK_MONOTONIC
);
682 left
= (uint64_t) -1;
684 r
= rtnl_poll(rtnl
, true, left
);
692 int sd_netlink_get_events(sd_netlink
*rtnl
) {
693 assert_return(rtnl
, -EINVAL
);
694 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
696 if (rtnl
->rqueue_size
== 0)
702 int sd_netlink_get_timeout(sd_netlink
*rtnl
, uint64_t *timeout_usec
) {
703 struct reply_callback
*c
;
705 assert_return(rtnl
, -EINVAL
);
706 assert_return(timeout_usec
, -EINVAL
);
707 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
709 if (rtnl
->rqueue_size
> 0) {
714 c
= prioq_peek(rtnl
->reply_callbacks_prioq
);
716 *timeout_usec
= (uint64_t) -1;
720 *timeout_usec
= c
->timeout
;
725 static int io_callback(sd_event_source
*s
, int fd
, uint32_t revents
, void *userdata
) {
726 sd_netlink
*rtnl
= userdata
;
731 r
= sd_netlink_process(rtnl
, NULL
);
738 static int time_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
739 sd_netlink
*rtnl
= userdata
;
744 r
= sd_netlink_process(rtnl
, NULL
);
751 static int prepare_callback(sd_event_source
*s
, void *userdata
) {
752 sd_netlink
*rtnl
= userdata
;
759 e
= sd_netlink_get_events(rtnl
);
763 r
= sd_event_source_set_io_events(rtnl
->io_event_source
, e
);
767 r
= sd_netlink_get_timeout(rtnl
, &until
);
773 j
= sd_event_source_set_time(rtnl
->time_event_source
, until
);
778 r
= sd_event_source_set_enabled(rtnl
->time_event_source
, r
> 0);
785 int sd_netlink_attach_event(sd_netlink
*rtnl
, sd_event
*event
, int64_t priority
) {
788 assert_return(rtnl
, -EINVAL
);
789 assert_return(!rtnl
->event
, -EBUSY
);
791 assert(!rtnl
->io_event_source
);
792 assert(!rtnl
->time_event_source
);
795 rtnl
->event
= sd_event_ref(event
);
797 r
= sd_event_default(&rtnl
->event
);
802 r
= sd_event_add_io(rtnl
->event
, &rtnl
->io_event_source
, rtnl
->fd
, 0, io_callback
, rtnl
);
806 r
= sd_event_source_set_priority(rtnl
->io_event_source
, priority
);
810 r
= sd_event_source_set_description(rtnl
->io_event_source
, "rtnl-receive-message");
814 r
= sd_event_source_set_prepare(rtnl
->io_event_source
, prepare_callback
);
818 r
= sd_event_add_time(rtnl
->event
, &rtnl
->time_event_source
, CLOCK_MONOTONIC
, 0, 0, time_callback
, rtnl
);
822 r
= sd_event_source_set_priority(rtnl
->time_event_source
, priority
);
826 r
= sd_event_source_set_description(rtnl
->time_event_source
, "rtnl-timer");
833 sd_netlink_detach_event(rtnl
);
837 int sd_netlink_detach_event(sd_netlink
*rtnl
) {
838 assert_return(rtnl
, -EINVAL
);
839 assert_return(rtnl
->event
, -ENXIO
);
841 rtnl
->io_event_source
= sd_event_source_unref(rtnl
->io_event_source
);
843 rtnl
->time_event_source
= sd_event_source_unref(rtnl
->time_event_source
);
845 rtnl
->event
= sd_event_unref(rtnl
->event
);
850 int sd_netlink_add_match(sd_netlink
*rtnl
,
852 sd_netlink_message_handler_t callback
,
854 _cleanup_free_
struct match_callback
*c
= NULL
;
857 assert_return(rtnl
, -EINVAL
);
858 assert_return(callback
, -EINVAL
);
859 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
861 c
= new0(struct match_callback
, 1);
865 c
->callback
= callback
;
867 c
->userdata
= userdata
;
872 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_LINK
);
879 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV4_IFADDR
);
883 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV6_IFADDR
);
890 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV4_ROUTE
);
894 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV6_ROUTE
);
900 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV4_RULE
);
904 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV6_RULE
);
912 LIST_PREPEND(match_callbacks
, rtnl
->match_callbacks
, c
);
919 int sd_netlink_remove_match(sd_netlink
*rtnl
,
921 sd_netlink_message_handler_t callback
,
923 struct match_callback
*c
;
926 assert_return(rtnl
, -EINVAL
);
927 assert_return(callback
, -EINVAL
);
928 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
930 LIST_FOREACH(match_callbacks
, c
, rtnl
->match_callbacks
)
931 if (c
->callback
== callback
&& c
->type
== type
&& c
->userdata
== userdata
) {
932 LIST_REMOVE(match_callbacks
, rtnl
->match_callbacks
, c
);
938 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_LINK
);
945 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV4_IFADDR
);
949 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV6_IFADDR
);
956 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV4_ROUTE
);
960 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV6_ROUTE
);