1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Tom Gundersen <teg@jklm.no>
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <sys/socket.h>
25 #include "sd-netlink.h"
27 #include "alloc-util.h"
32 #include "netlink-internal.h"
33 #include "netlink-util.h"
34 #include "socket-util.h"
37 static int sd_netlink_new(sd_netlink
**ret
) {
38 _cleanup_(sd_netlink_unrefp
) sd_netlink
*rtnl
= NULL
;
40 assert_return(ret
, -EINVAL
);
42 rtnl
= new0(sd_netlink
, 1);
46 rtnl
->n_ref
= REFCNT_INIT
;
48 rtnl
->sockaddr
.nl
.nl_family
= AF_NETLINK
;
49 rtnl
->original_pid
= getpid();
51 LIST_HEAD_INIT(rtnl
->match_callbacks
);
53 /* We guarantee that the read buffer has at least space for
55 if (!greedy_realloc((void**)&rtnl
->rbuffer
, &rtnl
->rbuffer_allocated
,
56 sizeof(struct nlmsghdr
), sizeof(uint8_t)))
59 /* Change notification responses have sequence 0, so we must
60 * start our request sequence numbers at 1, or we may confuse our
61 * responses with notifications from the kernel */
70 int sd_netlink_new_from_netlink(sd_netlink
**ret
, int fd
) {
71 _cleanup_(sd_netlink_unrefp
) sd_netlink
*rtnl
= NULL
;
75 assert_return(ret
, -EINVAL
);
77 r
= sd_netlink_new(&rtnl
);
81 addrlen
= sizeof(rtnl
->sockaddr
);
83 r
= getsockname(fd
, &rtnl
->sockaddr
.sa
, &addrlen
);
87 if (rtnl
->sockaddr
.nl
.nl_family
!= AF_NETLINK
)
98 static bool rtnl_pid_changed(sd_netlink
*rtnl
) {
101 /* We don't support people creating an rtnl connection and
102 * keeping it around over a fork(). Let's complain. */
104 return rtnl
->original_pid
!= getpid();
107 int sd_netlink_open_fd(sd_netlink
**ret
, int fd
) {
108 _cleanup_(sd_netlink_unrefp
) sd_netlink
*rtnl
= NULL
;
111 assert_return(ret
, -EINVAL
);
112 assert_return(fd
>= 0, -EBADF
);
114 r
= sd_netlink_new(&rtnl
);
120 r
= socket_bind(rtnl
);
122 rtnl
->fd
= -1; /* on failure, the caller remains owner of the fd, hence don't close it here */
132 int sd_netlink_open(sd_netlink
**ret
) {
133 _cleanup_close_
int fd
= -1;
136 fd
= socket_open(NETLINK_ROUTE
);
140 r
= sd_netlink_open_fd(ret
, fd
);
149 int sd_netlink_inc_rcvbuf(const sd_netlink
*const rtnl
, const int size
) {
150 return fd_inc_rcvbuf(rtnl
->fd
, size
);
153 sd_netlink
*sd_netlink_ref(sd_netlink
*rtnl
) {
154 assert_return(rtnl
, NULL
);
155 assert_return(!rtnl_pid_changed(rtnl
), NULL
);
158 assert_se(REFCNT_INC(rtnl
->n_ref
) >= 2);
163 sd_netlink
*sd_netlink_unref(sd_netlink
*rtnl
) {
167 assert_return(!rtnl_pid_changed(rtnl
), NULL
);
169 if (REFCNT_DEC(rtnl
->n_ref
) == 0) {
170 struct match_callback
*f
;
173 for (i
= 0; i
< rtnl
->rqueue_size
; i
++)
174 sd_netlink_message_unref(rtnl
->rqueue
[i
]);
177 for (i
= 0; i
< rtnl
->rqueue_partial_size
; i
++)
178 sd_netlink_message_unref(rtnl
->rqueue_partial
[i
]);
179 free(rtnl
->rqueue_partial
);
183 hashmap_free_free(rtnl
->reply_callbacks
);
184 prioq_free(rtnl
->reply_callbacks_prioq
);
186 sd_event_source_unref(rtnl
->io_event_source
);
187 sd_event_source_unref(rtnl
->time_event_source
);
188 sd_event_unref(rtnl
->event
);
190 while ((f
= rtnl
->match_callbacks
)) {
191 sd_netlink_remove_match(rtnl
, f
->type
, f
->callback
, f
->userdata
);
194 hashmap_free(rtnl
->broadcast_group_refs
);
196 safe_close(rtnl
->fd
);
203 static void rtnl_seal_message(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
205 assert(!rtnl_pid_changed(rtnl
));
209 /* don't use seq == 0, as that is used for broadcasts, so we
210 would get confused by replies to such messages */
211 m
->hdr
->nlmsg_seq
= rtnl
->serial
++ ? : rtnl
->serial
++;
213 rtnl_message_seal(m
);
218 int sd_netlink_send(sd_netlink
*nl
,
219 sd_netlink_message
*message
,
223 assert_return(nl
, -EINVAL
);
224 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
225 assert_return(message
, -EINVAL
);
226 assert_return(!message
->sealed
, -EPERM
);
228 rtnl_seal_message(nl
, message
);
230 r
= socket_write_message(nl
, message
);
235 *serial
= rtnl_message_get_serial(message
);
240 int rtnl_rqueue_make_room(sd_netlink
*rtnl
) {
243 if (rtnl
->rqueue_size
>= RTNL_RQUEUE_MAX
) {
244 log_debug("rtnl: exhausted the read queue size (%d)", RTNL_RQUEUE_MAX
);
248 if (!GREEDY_REALLOC(rtnl
->rqueue
, rtnl
->rqueue_allocated
, rtnl
->rqueue_size
+ 1))
254 int rtnl_rqueue_partial_make_room(sd_netlink
*rtnl
) {
257 if (rtnl
->rqueue_partial_size
>= RTNL_RQUEUE_MAX
) {
258 log_debug("rtnl: exhausted the partial read queue size (%d)", RTNL_RQUEUE_MAX
);
262 if (!GREEDY_REALLOC(rtnl
->rqueue_partial
, rtnl
->rqueue_partial_allocated
,
263 rtnl
->rqueue_partial_size
+ 1))
269 static int dispatch_rqueue(sd_netlink
*rtnl
, sd_netlink_message
**message
) {
275 if (rtnl
->rqueue_size
<= 0) {
276 /* Try to read a new message */
277 r
= socket_read_message(rtnl
);
282 /* Dispatch a queued message */
283 *message
= rtnl
->rqueue
[0];
284 rtnl
->rqueue_size
--;
285 memmove(rtnl
->rqueue
, rtnl
->rqueue
+ 1, sizeof(sd_netlink_message
*) * rtnl
->rqueue_size
);
290 static int process_timeout(sd_netlink
*rtnl
) {
291 _cleanup_(sd_netlink_message_unrefp
) sd_netlink_message
*m
= NULL
;
292 struct reply_callback
*c
;
298 c
= prioq_peek(rtnl
->reply_callbacks_prioq
);
302 n
= now(CLOCK_MONOTONIC
);
306 r
= rtnl_message_new_synthetic_error(-ETIMEDOUT
, c
->serial
, &m
);
310 assert_se(prioq_pop(rtnl
->reply_callbacks_prioq
) == c
);
311 hashmap_remove(rtnl
->reply_callbacks
, &c
->serial
);
313 r
= c
->callback(rtnl
, m
, c
->userdata
);
315 log_debug_errno(r
, "sd-netlink: timedout callback failed: %m");
322 static int process_reply(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
323 _cleanup_free_
struct reply_callback
*c
= NULL
;
331 serial
= rtnl_message_get_serial(m
);
332 c
= hashmap_remove(rtnl
->reply_callbacks
, &serial
);
337 prioq_remove(rtnl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
339 r
= sd_netlink_message_get_type(m
, &type
);
343 if (type
== NLMSG_DONE
)
346 r
= c
->callback(rtnl
, m
, c
->userdata
);
348 log_debug_errno(r
, "sd-netlink: callback failed: %m");
353 static int process_match(sd_netlink
*rtnl
, sd_netlink_message
*m
) {
354 struct match_callback
*c
;
361 r
= sd_netlink_message_get_type(m
, &type
);
365 LIST_FOREACH(match_callbacks
, c
, rtnl
->match_callbacks
) {
366 if (type
== c
->type
) {
367 r
= c
->callback(rtnl
, m
, c
->userdata
);
370 log_debug_errno(r
, "sd-netlink: match callback failed: %m");
380 static int process_running(sd_netlink
*rtnl
, sd_netlink_message
**ret
) {
381 _cleanup_(sd_netlink_message_unrefp
) sd_netlink_message
*m
= NULL
;
386 r
= process_timeout(rtnl
);
390 r
= dispatch_rqueue(rtnl
, &m
);
396 if (sd_netlink_message_is_broadcast(m
)) {
397 r
= process_match(rtnl
, m
);
401 r
= process_reply(rtnl
, m
);
422 int sd_netlink_process(sd_netlink
*rtnl
, sd_netlink_message
**ret
) {
423 NETLINK_DONT_DESTROY(rtnl
);
426 assert_return(rtnl
, -EINVAL
);
427 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
428 assert_return(!rtnl
->processing
, -EBUSY
);
430 rtnl
->processing
= true;
431 r
= process_running(rtnl
, ret
);
432 rtnl
->processing
= false;
437 static usec_t
calc_elapse(uint64_t usec
) {
438 if (usec
== (uint64_t) -1)
442 usec
= RTNL_DEFAULT_TIMEOUT
;
444 return now(CLOCK_MONOTONIC
) + usec
;
447 static int rtnl_poll(sd_netlink
*rtnl
, bool need_more
, uint64_t timeout_usec
) {
448 struct pollfd p
[1] = {};
450 usec_t m
= USEC_INFINITY
;
455 e
= sd_netlink_get_events(rtnl
);
460 /* Caller wants more data, and doesn't care about
461 * what's been read or any other timeouts. */
465 /* Caller wants to process if there is something to
466 * process, but doesn't care otherwise */
468 r
= sd_netlink_get_timeout(rtnl
, &until
);
473 nw
= now(CLOCK_MONOTONIC
);
474 m
= until
> nw
? until
- nw
: 0;
478 if (timeout_usec
!= (uint64_t) -1 && (m
== (uint64_t) -1 || timeout_usec
< m
))
484 r
= ppoll(p
, 1, m
== (uint64_t) -1 ? NULL
: timespec_store(&ts
, m
), NULL
);
488 return r
> 0 ? 1 : 0;
491 int sd_netlink_wait(sd_netlink
*nl
, uint64_t timeout_usec
) {
492 assert_return(nl
, -EINVAL
);
493 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
495 if (nl
->rqueue_size
> 0)
498 return rtnl_poll(nl
, false, timeout_usec
);
501 static int timeout_compare(const void *a
, const void *b
) {
502 const struct reply_callback
*x
= a
, *y
= b
;
504 if (x
->timeout
!= 0 && y
->timeout
== 0)
507 if (x
->timeout
== 0 && y
->timeout
!= 0)
510 if (x
->timeout
< y
->timeout
)
513 if (x
->timeout
> y
->timeout
)
519 int sd_netlink_call_async(sd_netlink
*nl
,
520 sd_netlink_message
*m
,
521 sd_netlink_message_handler_t callback
,
525 struct reply_callback
*c
;
529 assert_return(nl
, -EINVAL
);
530 assert_return(m
, -EINVAL
);
531 assert_return(callback
, -EINVAL
);
532 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
534 r
= hashmap_ensure_allocated(&nl
->reply_callbacks
, &uint64_hash_ops
);
538 if (usec
!= (uint64_t) -1) {
539 r
= prioq_ensure_allocated(&nl
->reply_callbacks_prioq
, timeout_compare
);
544 c
= new0(struct reply_callback
, 1);
548 c
->callback
= callback
;
549 c
->userdata
= userdata
;
550 c
->timeout
= calc_elapse(usec
);
552 k
= sd_netlink_send(nl
, m
, &s
);
560 r
= hashmap_put(nl
->reply_callbacks
, &c
->serial
, c
);
566 if (c
->timeout
!= 0) {
567 r
= prioq_put(nl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
570 sd_netlink_call_async_cancel(nl
, c
->serial
);
581 int sd_netlink_call_async_cancel(sd_netlink
*nl
, uint32_t serial
) {
582 struct reply_callback
*c
;
585 assert_return(nl
, -EINVAL
);
586 assert_return(serial
!= 0, -EINVAL
);
587 assert_return(!rtnl_pid_changed(nl
), -ECHILD
);
589 c
= hashmap_remove(nl
->reply_callbacks
, &s
);
594 prioq_remove(nl
->reply_callbacks_prioq
, c
, &c
->prioq_idx
);
600 int sd_netlink_call(sd_netlink
*rtnl
,
601 sd_netlink_message
*message
,
603 sd_netlink_message
**ret
) {
608 assert_return(rtnl
, -EINVAL
);
609 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
610 assert_return(message
, -EINVAL
);
612 r
= sd_netlink_send(rtnl
, message
, &serial
);
616 timeout
= calc_elapse(usec
);
622 for (i
= 0; i
< rtnl
->rqueue_size
; i
++) {
623 uint32_t received_serial
;
625 received_serial
= rtnl_message_get_serial(rtnl
->rqueue
[i
]);
627 if (received_serial
== serial
) {
628 _cleanup_(sd_netlink_message_unrefp
) sd_netlink_message
*incoming
= NULL
;
631 incoming
= rtnl
->rqueue
[i
];
633 /* found a match, remove from rqueue and return it */
634 memmove(rtnl
->rqueue
+ i
,rtnl
->rqueue
+ i
+ 1,
635 sizeof(sd_netlink_message
*) * (rtnl
->rqueue_size
- i
- 1));
638 r
= sd_netlink_message_get_errno(incoming
);
642 r
= sd_netlink_message_get_type(incoming
, &type
);
646 if (type
== NLMSG_DONE
) {
660 r
= socket_read_message(rtnl
);
664 /* received message, so try to process straight away */
670 n
= now(CLOCK_MONOTONIC
);
676 left
= (uint64_t) -1;
678 r
= rtnl_poll(rtnl
, true, left
);
686 int sd_netlink_get_events(sd_netlink
*rtnl
) {
687 assert_return(rtnl
, -EINVAL
);
688 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
690 if (rtnl
->rqueue_size
== 0)
696 int sd_netlink_get_timeout(sd_netlink
*rtnl
, uint64_t *timeout_usec
) {
697 struct reply_callback
*c
;
699 assert_return(rtnl
, -EINVAL
);
700 assert_return(timeout_usec
, -EINVAL
);
701 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
703 if (rtnl
->rqueue_size
> 0) {
708 c
= prioq_peek(rtnl
->reply_callbacks_prioq
);
710 *timeout_usec
= (uint64_t) -1;
714 *timeout_usec
= c
->timeout
;
719 static int io_callback(sd_event_source
*s
, int fd
, uint32_t revents
, void *userdata
) {
720 sd_netlink
*rtnl
= userdata
;
725 r
= sd_netlink_process(rtnl
, NULL
);
732 static int time_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
733 sd_netlink
*rtnl
= userdata
;
738 r
= sd_netlink_process(rtnl
, NULL
);
745 static int prepare_callback(sd_event_source
*s
, void *userdata
) {
746 sd_netlink
*rtnl
= userdata
;
753 e
= sd_netlink_get_events(rtnl
);
757 r
= sd_event_source_set_io_events(rtnl
->io_event_source
, e
);
761 r
= sd_netlink_get_timeout(rtnl
, &until
);
767 j
= sd_event_source_set_time(rtnl
->time_event_source
, until
);
772 r
= sd_event_source_set_enabled(rtnl
->time_event_source
, r
> 0);
779 int sd_netlink_attach_event(sd_netlink
*rtnl
, sd_event
*event
, int priority
) {
782 assert_return(rtnl
, -EINVAL
);
783 assert_return(!rtnl
->event
, -EBUSY
);
785 assert(!rtnl
->io_event_source
);
786 assert(!rtnl
->time_event_source
);
789 rtnl
->event
= sd_event_ref(event
);
791 r
= sd_event_default(&rtnl
->event
);
796 r
= sd_event_add_io(rtnl
->event
, &rtnl
->io_event_source
, rtnl
->fd
, 0, io_callback
, rtnl
);
800 r
= sd_event_source_set_priority(rtnl
->io_event_source
, priority
);
804 r
= sd_event_source_set_description(rtnl
->io_event_source
, "rtnl-receive-message");
808 r
= sd_event_source_set_prepare(rtnl
->io_event_source
, prepare_callback
);
812 r
= sd_event_add_time(rtnl
->event
, &rtnl
->time_event_source
, CLOCK_MONOTONIC
, 0, 0, time_callback
, rtnl
);
816 r
= sd_event_source_set_priority(rtnl
->time_event_source
, priority
);
820 r
= sd_event_source_set_description(rtnl
->time_event_source
, "rtnl-timer");
827 sd_netlink_detach_event(rtnl
);
831 int sd_netlink_detach_event(sd_netlink
*rtnl
) {
832 assert_return(rtnl
, -EINVAL
);
833 assert_return(rtnl
->event
, -ENXIO
);
835 rtnl
->io_event_source
= sd_event_source_unref(rtnl
->io_event_source
);
837 rtnl
->time_event_source
= sd_event_source_unref(rtnl
->time_event_source
);
839 rtnl
->event
= sd_event_unref(rtnl
->event
);
844 int sd_netlink_add_match(sd_netlink
*rtnl
,
846 sd_netlink_message_handler_t callback
,
848 _cleanup_free_
struct match_callback
*c
= NULL
;
851 assert_return(rtnl
, -EINVAL
);
852 assert_return(callback
, -EINVAL
);
853 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
855 c
= new0(struct match_callback
, 1);
859 c
->callback
= callback
;
861 c
->userdata
= userdata
;
866 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_LINK
);
873 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV4_IFADDR
);
877 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV6_IFADDR
);
884 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV4_ROUTE
);
888 r
= socket_broadcast_group_ref(rtnl
, RTNLGRP_IPV6_ROUTE
);
896 LIST_PREPEND(match_callbacks
, rtnl
->match_callbacks
, c
);
903 int sd_netlink_remove_match(sd_netlink
*rtnl
,
905 sd_netlink_message_handler_t callback
,
907 struct match_callback
*c
;
910 assert_return(rtnl
, -EINVAL
);
911 assert_return(callback
, -EINVAL
);
912 assert_return(!rtnl_pid_changed(rtnl
), -ECHILD
);
914 LIST_FOREACH(match_callbacks
, c
, rtnl
->match_callbacks
)
915 if (c
->callback
== callback
&& c
->type
== type
&& c
->userdata
== userdata
) {
916 LIST_REMOVE(match_callbacks
, rtnl
->match_callbacks
, c
);
922 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_LINK
);
929 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV4_IFADDR
);
933 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV6_IFADDR
);
940 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV4_ROUTE
);
944 r
= socket_broadcast_group_unref(rtnl
, RTNLGRP_IPV6_ROUTE
);