1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2013 Tom Gundersen <teg@jklm.no>
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/socket.h>
24 #include "sd-netlink.h"
26 #include "alloc-util.h"
31 #include "netlink-internal.h"
32 #include "netlink-util.h"
33 #include "process-util.h"
34 #include "socket-util.h"
37 static int sd_netlink_new(sd_netlink
**ret
) {
38 _cleanup_(sd_netlink_unrefp
) sd_netlink
*rtnl
= NULL
;
40 assert_return(ret
, -EINVAL
);
42 rtnl
= new0(sd_netlink
, 1);
46 rtnl
->n_ref
= REFCNT_INIT
;
48 rtnl
->sockaddr
.nl
.nl_family
= AF_NETLINK
;
49 rtnl
->original_pid
= getpid_cached();
52 LIST_HEAD_INIT(rtnl
->match_callbacks
);
54 /* We guarantee that the read buffer has at least space for
56 if (!greedy_realloc((void**)&rtnl
->rbuffer
, &rtnl
->rbuffer_allocated
,
57 sizeof(struct nlmsghdr
), sizeof(uint8_t)))
60 /* Change notification responses have sequence 0, so we must
61 * start our request sequence numbers at 1, or we may confuse our
62 * responses with notifications from the kernel */
71 int sd_netlink_new_from_netlink(sd_netlink
**ret
, int fd
) {
72 _cleanup_(sd_netlink_unrefp
) sd_netlink
*rtnl
= NULL
;
76 assert_return(ret
, -EINVAL
);
78 r
= sd_netlink_new(&rtnl
);
82 addrlen
= sizeof(rtnl
->sockaddr
);
84 r
= getsockname(fd
, &rtnl
->sockaddr
.sa
, &addrlen
);
88 if (rtnl
->sockaddr
.nl
.nl_family
!= AF_NETLINK
)
99 static bool rtnl_pid_changed(sd_netlink
*rtnl
) {
102 /* We don't support people creating an rtnl connection and
103 * keeping it around over a fork(). Let's complain. */
105 return rtnl
->original_pid
!= getpid_cached();
108 int sd_netlink_open_fd(sd_netlink
**ret
, int fd
) {
109 _cleanup_(sd_netlink_unrefp
) sd_netlink
*rtnl
= NULL
;
114 assert_return(ret
, -EINVAL
);
115 assert_return(fd
>= 0, -EBADF
);
117 r
= sd_netlink_new(&rtnl
);
121 l
= sizeof(protocol
);
122 r
= getsockopt(fd
, SOL_SOCKET
, SO_PROTOCOL
, &protocol
, &l
);
127 rtnl
->protocol
= protocol
;
129 r
= socket_bind(rtnl
);
131 rtnl
->fd
= -1; /* on failure, the caller remains owner of the fd, hence don't close it here */
142 int netlink_open_family(sd_netlink
**ret
, int family
) {
143 _cleanup_close_
int fd
= -1;
146 fd
= socket_open(family
);
150 r
= sd_netlink_open_fd(ret
, fd
);
159 int sd_netlink_open(sd_netlink
**ret
) {
160 return netlink_open_family(ret
, NETLINK_ROUTE
);
163 int sd_netlink_inc_rcvbuf(sd_netlink
*rtnl
, size_t size
) {
164 assert_return(rtnl
, -EINVAL
);
165 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
167 return fd_inc_rcvbuf(rtnl
->fd
, size
);
170 sd_netlink
*sd_netlink_ref(sd_netlink
*rtnl
) {
171 assert_return(rtnl
, NULL
);
172 assert_return(!rtnl_pid_changed(rtnl
), NULL
);
175 assert_se(REFCNT_INC(rtnl
->n_ref
) >= 2);
180 sd_netlink
*sd_netlink_unref(sd_netlink
*rtnl
) {
184 assert_return(!rtnl_pid_changed(rtnl
), NULL
);
186 if (REFCNT_DEC(rtnl
->n_ref
) == 0) {
187 struct match_callback
*f
;
190 for (i
= 0; i
< rtnl
->rqueue_size
; i
++)
191 sd_netlink_message_unref(rtnl
->rqueue
[i
]);
194 for (i
= 0; i
< rtnl
->rqueue_partial_size
; i
++)
195 sd_netlink_message_unref(rtnl
->rqueue_partial
[i
]);
196 free(rtnl
->rqueue_partial
);
200 hashmap_free_free(rtnl
->reply_callbacks
);
201 prioq_free(rtnl
->reply_callbacks_prioq
);
203 sd_event_source_unref(rtnl
->io_event_source
);
204 sd_event_source_unref(rtnl
->time_event_source
);
205 sd_event_unref(rtnl
->event
);
207 while ((f
= rtnl
->match_callbacks
)) {
208 sd_netlink_remove_match(rtnl
, f
->type
, f
->callback
, f
->userdata
);
211 hashmap_free(rtnl
->broadcast_group_refs
);
213 safe_close(rtnl
->fd
);
220 static void rtnl_seal_message(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
222 assert(!rtnl_pid_changed(rtnl
));
226 /* don't use seq == 0, as that is used for broadcasts, so we
227 would get confused by replies to such messages */
228 m
->hdr
->nlmsg_seq
= rtnl
->serial
++ ? : rtnl
->serial
++;
230 rtnl_message_seal(m
);
235 int sd_netlink_send(sd_netlink
*nl
,
236 sd_netlink_message
*message
,
240 assert_return(nl
, -EINVAL
);
241 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
242 assert_return(message
, -EINVAL
);
243 assert_return(!message
->sealed
, -EPERM
);
245 rtnl_seal_message(nl
, message
);
247 r
= socket_write_message(nl
, message
);
252 *serial
= rtnl_message_get_serial(message
);
257 int rtnl_rqueue_make_room(sd_netlink
*rtnl
) {
260 if (rtnl
->rqueue_size
>= RTNL_RQUEUE_MAX
) {
261 log_debug("rtnl: exhausted the read queue size (%d)", RTNL_RQUEUE_MAX
);
265 if (!GREEDY_REALLOC(rtnl
->rqueue
, rtnl
->rqueue_allocated
, rtnl
->rqueue_size
+ 1))
271 int rtnl_rqueue_partial_make_room(sd_netlink
*rtnl
) {
274 if (rtnl
->rqueue_partial_size
>= RTNL_RQUEUE_MAX
) {
275 log_debug("rtnl: exhausted the partial read queue size (%d)", RTNL_RQUEUE_MAX
);
279 if (!GREEDY_REALLOC(rtnl
->rqueue_partial
, rtnl
->rqueue_partial_allocated
,
280 rtnl
->rqueue_partial_size
+ 1))
286 static int dispatch_rqueue(sd_netlink
*rtnl
, sd_netlink_message
**message
) {
292 if (rtnl
->rqueue_size
<= 0) {
293 /* Try to read a new message */
294 r
= socket_read_message(rtnl
);
295 if (r
== -ENOBUFS
) { /* FIXME: ignore buffer overruns for now */
296 log_debug_errno(r
, "Got ENOBUFS from netlink socket, ignoring.");
303 /* Dispatch a queued message */
304 *message
= rtnl
->rqueue
[0];
306 memmove(rtnl
->rqueue
, rtnl
->rqueue
+ 1, sizeof(sd_netlink_message
*) * rtnl
->rqueue_size
);
311 static int process_timeout(sd_netlink
*rtnl
) {
312 _cleanup_(sd_netlink_message_unrefp
) sd_netlink_message
*m
= NULL
;
313 struct reply_callback
*c
;
319 c
= prioq_peek(rtnl
->reply_callbacks_prioq
);
323 n
= now(CLOCK_MONOTONIC
);
327 r
= rtnl_message_new_synthetic_error(rtnl
, -ETIMEDOUT
, c
->serial
, &m
);
331 assert_se(prioq_pop(rtnl
->reply_callbacks_prioq
) == c
);
332 hashmap_remove(rtnl
->reply_callbacks
, &c
->serial
);
334 r
= c
->callback(rtnl
, m
, c
->userdata
);
336 log_debug_errno(r
, "sd-netlink: timedout callback failed: %m");
343 static int process_reply(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
344 _cleanup_free_
struct reply_callback
*c
= NULL
;
352 serial
= rtnl_message_get_serial(m
);
353 c
= hashmap_remove(rtnl
->reply_callbacks
, &serial
);
358 prioq_remove(rtnl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
360 r
= sd_netlink_message_get_type(m
, &type
);
364 if (type
== NLMSG_DONE
)
367 r
= c
->callback(rtnl
, m
, c
->userdata
);
369 log_debug_errno(r
, "sd-netlink: callback failed: %m");
374 static int process_match(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
375 struct match_callback
*c
;
382 r
= sd_netlink_message_get_type(m
, &type
);
386 LIST_FOREACH(match_callbacks
, c
, rtnl
->match_callbacks
) {
387 if (type
== c
->type
) {
388 r
= c
->callback(rtnl
, m
, c
->userdata
);
391 log_debug_errno(r
, "sd-netlink: match callback failed: %m");
401 static int process_running(sd_netlink
*rtnl
, sd_netlink_message
**ret
) {
402 _cleanup_(sd_netlink_message_unrefp
) sd_netlink_message
*m
= NULL
;
407 r
= process_timeout(rtnl
);
411 r
= dispatch_rqueue(rtnl
, &m
);
417 if (sd_netlink_message_is_broadcast(m
)) {
418 r
= process_match(rtnl
, m
);
422 r
= process_reply(rtnl
, m
);
443 int sd_netlink_process(sd_netlink
*rtnl
, sd_netlink_message
**ret
) {
444 NETLINK_DONT_DESTROY(rtnl
);
447 assert_return(rtnl
, -EINVAL
);
448 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
449 assert_return(!rtnl
->processing
, -EBUSY
);
451 rtnl
->processing
= true;
452 r
= process_running(rtnl
, ret
);
453 rtnl
->processing
= false;
458 static usec_t
calc_elapse(uint64_t usec
) {
459 if (usec
== (uint64_t) -1)
463 usec
= RTNL_DEFAULT_TIMEOUT
;
465 return now(CLOCK_MONOTONIC
) + usec
;
468 static int rtnl_poll(sd_netlink
*rtnl
, bool need_more
, uint64_t timeout_usec
) {
469 struct pollfd p
[1] = {};
471 usec_t m
= USEC_INFINITY
;
476 e
= sd_netlink_get_events(rtnl
);
481 /* Caller wants more data, and doesn't care about
482 * what's been read or any other timeouts. */
486 /* Caller wants to process if there is something to
487 * process, but doesn't care otherwise */
489 r
= sd_netlink_get_timeout(rtnl
, &until
);
494 nw
= now(CLOCK_MONOTONIC
);
495 m
= until
> nw
? until
- nw
: 0;
499 if (timeout_usec
!= (uint64_t) -1 && (m
== (uint64_t) -1 || timeout_usec
< m
))
505 r
= ppoll(p
, 1, m
== (uint64_t) -1 ? NULL
: timespec_store(&ts
, m
), NULL
);
509 return r
> 0 ? 1 : 0;
512 int sd_netlink_wait(sd_netlink
*nl
, uint64_t timeout_usec
) {
513 assert_return(nl
, -EINVAL
);
514 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
516 if (nl
->rqueue_size
> 0)
519 return rtnl_poll(nl
, false, timeout_usec
);
522 static int timeout_compare(const void *a
, const void *b
) {
523 const struct reply_callback
*x
= a
, *y
= b
;
525 if (x
->timeout
!= 0 && y
->timeout
== 0)
528 if (x
->timeout
== 0 && y
->timeout
!= 0)
531 if (x
->timeout
< y
->timeout
)
534 if (x
->timeout
> y
->timeout
)
540 int sd_netlink_call_async(sd_netlink
*nl
,
541 sd_netlink_message
*m
,
542 sd_netlink_message_handler_t callback
,
546 struct reply_callback
*c
;
550 assert_return(nl
, -EINVAL
);
551 assert_return(m
, -EINVAL
);
552 assert_return(callback
, -EINVAL
);
553 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
555 r
= hashmap_ensure_allocated(&nl
->reply_callbacks
, &uint64_hash_ops
);
559 if (usec
!= (uint64_t) -1) {
560 r
= prioq_ensure_allocated(&nl
->reply_callbacks_prioq
, timeout_compare
);
565 c
= new0(struct reply_callback
, 1);
569 c
->callback
= callback
;
570 c
->userdata
= userdata
;
571 c
->timeout
= calc_elapse(usec
);
573 k
= sd_netlink_send(nl
, m
, &s
);
581 r
= hashmap_put(nl
->reply_callbacks
, &c
->serial
, c
);
587 if (c
->timeout
!= 0) {
588 r
= prioq_put(nl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
591 sd_netlink_call_async_cancel(nl
, c
->serial
);
602 int sd_netlink_call_async_cancel(sd_netlink
*nl
, uint32_t serial
) {
603 struct reply_callback
*c
;
606 assert_return(nl
, -EINVAL
);
607 assert_return(serial
!= 0, -EINVAL
);
608 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
610 c
= hashmap_remove(nl
->reply_callbacks
, &s
);
615 prioq_remove(nl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
621 int sd_netlink_call(sd_netlink
*rtnl
,
622 sd_netlink_message
*message
,
624 sd_netlink_message
**ret
) {
629 assert_return(rtnl
, -EINVAL
);
630 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
631 assert_return(message
, -EINVAL
);
633 r
= sd_netlink_send(rtnl
, message
, &serial
);
637 timeout
= calc_elapse(usec
);
643 for (i
= 0; i
< rtnl
->rqueue_size
; i
++) {
644 uint32_t received_serial
;
646 received_serial
= rtnl_message_get_serial(rtnl
->rqueue
[i
]);
648 if (received_serial
== serial
) {
649 _cleanup_(sd_netlink_message_unrefp
) sd_netlink_message
*incoming
= NULL
;
652 incoming
= rtnl
->rqueue
[i
];
654 /* found a match, remove from rqueue and return it */
655 memmove(rtnl
->rqueue
+ i
,rtnl
->rqueue
+ i
+ 1,
656 sizeof(sd_netlink_message
*) * (rtnl
->rqueue_size
- i
- 1));
659 r
= sd_netlink_message_get_errno(incoming
);
663 r
= sd_netlink_message_get_type(incoming
, &type
);
667 if (type
== NLMSG_DONE
) {
681 r
= socket_read_message(rtnl
);
685 /* received message, so try to process straight away */
691 n
= now(CLOCK_MONOTONIC
);
697 left
= (uint64_t) -1;
699 r
= rtnl_poll(rtnl
, true, left
);
707 int sd_netlink_get_events(sd_netlink
*rtnl
) {
708 assert_return(rtnl
, -EINVAL
);
709 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
711 if (rtnl
->rqueue_size
== 0)
717 int sd_netlink_get_timeout(sd_netlink
*rtnl
, uint64_t *timeout_usec
) {
718 struct reply_callback
*c
;
720 assert_return(rtnl
, -EINVAL
);
721 assert_return(timeout_usec
, -EINVAL
);
722 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
724 if (rtnl
->rqueue_size
> 0) {
729 c
= prioq_peek(rtnl
->reply_callbacks_prioq
);
731 *timeout_usec
= (uint64_t) -1;
735 *timeout_usec
= c
->timeout
;
740 static int io_callback(sd_event_source
*s
, int fd
, uint32_t revents
, void *userdata
) {
741 sd_netlink
*rtnl
= userdata
;
746 r
= sd_netlink_process(rtnl
, NULL
);
753 static int time_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
754 sd_netlink
*rtnl
= userdata
;
759 r
= sd_netlink_process(rtnl
, NULL
);
766 static int prepare_callback(sd_event_source
*s
, void *userdata
) {
767 sd_netlink
*rtnl
= userdata
;
774 e
= sd_netlink_get_events(rtnl
);
778 r
= sd_event_source_set_io_events(rtnl
->io_event_source
, e
);
782 r
= sd_netlink_get_timeout(rtnl
, &until
);
788 j
= sd_event_source_set_time(rtnl
->time_event_source
, until
);
793 r
= sd_event_source_set_enabled(rtnl
->time_event_source
, r
> 0);
800 int sd_netlink_attach_event(sd_netlink
*rtnl
, sd_event
*event
, int64_t priority
) {
803 assert_return(rtnl
, -EINVAL
);
804 assert_return(!rtnl
->event
, -EBUSY
);
806 assert(!rtnl
->io_event_source
);
807 assert(!rtnl
->time_event_source
);
810 rtnl
->event
= sd_event_ref(event
);
812 r
= sd_event_default(&rtnl
->event
);
817 r
= sd_event_add_io(rtnl
->event
, &rtnl
->io_event_source
, rtnl
->fd
, 0, io_callback
, rtnl
);
821 r
= sd_event_source_set_priority(rtnl
->io_event_source
, priority
);
825 r
= sd_event_source_set_description(rtnl
->io_event_source
, "rtnl-receive-message");
829 r
= sd_event_source_set_prepare(rtnl
->io_event_source
, prepare_callback
);
833 r
= sd_event_add_time(rtnl
->event
, &rtnl
->time_event_source
, CLOCK_MONOTONIC
, 0, 0, time_callback
, rtnl
);
837 r
= sd_event_source_set_priority(rtnl
->time_event_source
, priority
);
841 r
= sd_event_source_set_description(rtnl
->time_event_source
, "rtnl-timer");
848 sd_netlink_detach_event(rtnl
);
852 int sd_netlink_detach_event(sd_netlink
*rtnl
) {
853 assert_return(rtnl
, -EINVAL
);
854 assert_return(rtnl
->event
, -ENXIO
);
856 rtnl
->io_event_source
= sd_event_source_unref(rtnl
->io_event_source
);
858 rtnl
->time_event_source
= sd_event_source_unref(rtnl
->time_event_source
);
860 rtnl
->event
= sd_event_unref(rtnl
->event
);
865 int sd_netlink_add_match(sd_netlink
*rtnl
,
867 sd_netlink_message_handler_t callback
,
869 _cleanup_free_
struct match_callback
*c
= NULL
;
872 assert_return(rtnl
, -EINVAL
);
873 assert_return(callback
, -EINVAL
);
874 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
876 c
= new0(struct match_callback
, 1);
880 c
->callback
= callback
;
882 c
->userdata
= userdata
;
887 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_LINK
);
894 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV4_IFADDR
);
898 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV6_IFADDR
);
905 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV4_ROUTE
);
909 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV6_ROUTE
);
915 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV4_RULE
);
919 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV6_RULE
);
927 LIST_PREPEND(match_callbacks
, rtnl
->match_callbacks
, c
);
934 int sd_netlink_remove_match(sd_netlink
*rtnl
,
936 sd_netlink_message_handler_t callback
,
938 struct match_callback
*c
;
941 assert_return(rtnl
, -EINVAL
);
942 assert_return(callback
, -EINVAL
);
943 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
945 LIST_FOREACH(match_callbacks
, c
, rtnl
->match_callbacks
)
946 if (c
->callback
== callback
&& c
->type
== type
&& c
->userdata
== userdata
) {
947 LIST_REMOVE(match_callbacks
, rtnl
->match_callbacks
, c
);
953 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_LINK
);
960 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV4_IFADDR
);
964 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV6_IFADDR
);
971 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV4_ROUTE
);
975 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV6_ROUTE
);