2 * BIRD -- Linux Netlink Interface
4 * (c) 1999--2000 Martin Mares <mj@ucw.cz>
6 * Can be freely distributed and used under the terms of the GNU GPL.
13 #include <sys/socket.h>
19 #include "nest/bird.h"
20 #include "nest/route.h"
21 #include "nest/protocol.h"
22 #include "nest/iface.h"
23 #include "lib/alloca.h"
24 #include "sysdep/unix/unix.h"
25 #include "sysdep/unix/krt.h"
26 #include "lib/socket.h"
27 #include "lib/string.h"
29 #include "conf/conf.h"
31 #include <asm/types.h>
33 #include <linux/netlink.h>
34 #include <linux/rtnetlink.h>
36 #ifdef HAVE_MPLS_KERNEL
37 #include <linux/lwtunnel.h>
40 #ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */
41 #define MSG_TRUNC 0x20
49 #define IFF_LOWER_UP 0x10000
64 #ifndef RTA_ENCAP_TYPE
65 #define RTA_ENCAP_TYPE 21
72 #ifndef NETLINK_GET_STRICT_CHK
73 #define NETLINK_GET_STRICT_CHK 12
76 #define krt_ipv4(p) ((p)->af == AF_INET)
78 const int rt_default_ecmp
= 16;
82 struct krt_proto
*proto
;
90 * Synchronous Netlink interface
97 byte
*rx_buffer
; /* Receive buffer */
98 struct nlmsghdr
*last_hdr
; /* Recently received packet */
102 #define NL_RX_SIZE 32768
104 #define NL_OP_DELETE 0
105 #define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
106 #define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE)
107 #define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND)
109 static linpool
*nl_linpool
;
111 static struct nl_sock nl_scan
= {.fd
= -1}; /* Netlink socket for synchronous scan */
112 static struct nl_sock nl_req
= {.fd
= -1}; /* Netlink socket for requests */
115 nl_open_sock(struct nl_sock
*nl
)
119 nl
->fd
= socket(PF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
121 die("Unable to open rtnetlink socket: %m");
122 nl
->seq
= (u32
) (current_time() TO_S
); /* Or perhaps random_u32() ? */
123 nl
->rx_buffer
= xmalloc(NL_RX_SIZE
);
130 nl_set_strict_dump(struct nl_sock
*nl UNUSED
, int strict UNUSED
)
133 return setsockopt(nl
->fd
, SOL_NETLINK
, NETLINK_GET_STRICT_CHK
, &strict
, sizeof(strict
));
140 nl_set_rcvbuf(int fd
, uint val
)
142 if (setsockopt(fd
, SOL_SOCKET
, SO_RCVBUFFORCE
, &val
, sizeof(val
)) < 0)
143 log(L_WARN
"KRT: Cannot set netlink rx buffer size to %u: %m", val
);
147 nl_cfg_rx_buffer_size(struct config
*cfg
)
151 struct proto_config
*pc
;
152 WALK_LIST(pc
, cfg
->protos
)
153 if ((pc
->protocol
== &proto_unix_kernel
) && !pc
->disabled
)
154 bufsize
= MAX(bufsize
, ((struct krt_config
*) pc
)->sys
.netlink_rx_buffer
);
163 if ((nl_scan
.fd
>= 0) && (nl_req
.fd
>= 0))
166 nl_open_sock(&nl_scan
);
167 nl_open_sock(&nl_req
);
169 if (nl_set_strict_dump(&nl_scan
, 1) < 0)
171 log(L_WARN
"KRT: Netlink strict checking failed, will scan all tables at once");
172 krt_use_shared_scan();
177 nl_send(struct nl_sock
*nl
, struct nlmsghdr
*nh
)
179 struct sockaddr_nl sa
;
181 memset(&sa
, 0, sizeof(sa
));
182 sa
.nl_family
= AF_NETLINK
;
184 nh
->nlmsg_seq
= ++(nl
->seq
);
185 nh
->nlmsg_len
= NLMSG_ALIGN(nh
->nlmsg_len
);
186 if (sendto(nl
->fd
, nh
, nh
->nlmsg_len
, 0, (struct sockaddr
*)&sa
, sizeof(sa
)) < 0)
187 die("rtnetlink sendto: %m");
192 nl_request_dump_link(void)
196 struct ifinfomsg ifi
;
198 .nh
.nlmsg_type
= RTM_GETLINK
,
199 .nh
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct ifinfomsg
)),
200 .nh
.nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
,
201 .nh
.nlmsg_seq
= ++(nl_scan
.seq
),
202 .ifi
.ifi_family
= AF_UNSPEC
,
205 send(nl_scan
.fd
, &req
, sizeof(req
), 0);
206 nl_scan
.last_hdr
= NULL
;
210 nl_request_dump_addr(int af
)
214 struct ifaddrmsg ifa
;
216 .nh
.nlmsg_type
= RTM_GETADDR
,
217 .nh
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct ifaddrmsg
)),
218 .nh
.nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
,
219 .nh
.nlmsg_seq
= ++(nl_scan
.seq
),
220 .ifa
.ifa_family
= af
,
223 send(nl_scan
.fd
, &req
, sizeof(req
), 0);
224 nl_scan
.last_hdr
= NULL
;
228 nl_request_dump_route(int af
, int table_id
)
236 .nh
.nlmsg_type
= RTM_GETROUTE
,
237 .nh
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
)),
238 .nh
.nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
,
239 .nh
.nlmsg_seq
= ++(nl_scan
.seq
),
240 .rtm
.rtm_family
= af
,
244 req
.rtm
.rtm_table
= table_id
;
247 req
.rta
.rta_type
= RTA_TABLE
;
248 req
.rta
.rta_len
= RTA_LENGTH(4);
249 req
.table_id
= table_id
;
250 req
.nh
.nlmsg_len
= NLMSG_ALIGN(req
.nh
.nlmsg_len
) + req
.rta
.rta_len
;
253 send(nl_scan
.fd
, &req
, req
.nh
.nlmsg_len
, 0);
254 nl_scan
.last_hdr
= NULL
;
258 static struct nlmsghdr
*
259 nl_get_reply(struct nl_sock
*nl
)
265 struct iovec iov
= { nl
->rx_buffer
, NL_RX_SIZE
};
266 struct sockaddr_nl sa
;
269 .msg_namelen
= sizeof(sa
),
273 int x
= recvmsg(nl
->fd
, &m
, 0);
275 die("nl_get_reply: %m");
276 if (sa
.nl_pid
) /* It isn't from the kernel */
278 DBG("Non-kernel packet\n");
282 nl
->last_hdr
= (void *) nl
->rx_buffer
;
283 if (m
.msg_flags
& MSG_TRUNC
)
284 bug("nl_get_reply: got truncated reply which should be impossible");
286 if (NLMSG_OK(nl
->last_hdr
, nl
->last_size
))
288 struct nlmsghdr
*h
= nl
->last_hdr
;
289 nl
->last_hdr
= NLMSG_NEXT(h
, nl
->last_size
);
290 if (h
->nlmsg_seq
!= nl
->seq
)
292 log(L_WARN
"nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
293 h
->nlmsg_seq
, nl
->seq
);
299 log(L_WARN
"nl_get_reply: Found packet remnant of size %d", nl
->last_size
);
304 static struct tbf rl_netlink_err
= TBF_DEFAULT_LOG_LIMITS
;
307 nl_error(struct nlmsghdr
*h
, int ignore_esrch
)
312 if (h
->nlmsg_len
< NLMSG_LENGTH(sizeof(struct nlmsgerr
)))
314 log(L_WARN
"Netlink: Truncated error message received");
317 e
= (struct nlmsgerr
*) NLMSG_DATA(h
);
319 if (ec
&& !(ignore_esrch
&& (ec
== ESRCH
)))
320 log_rl(&rl_netlink_err
, L_WARN
"Netlink: %s", strerror(ec
));
324 static struct nlmsghdr
*
327 struct nlmsghdr
*h
= nl_get_reply(&nl_scan
);
329 if (h
->nlmsg_type
== NLMSG_DONE
)
331 if (h
->nlmsg_type
== NLMSG_ERROR
)
340 nl_exchange(struct nlmsghdr
*pkt
, int ignore_esrch
)
344 nl_send(&nl_req
, pkt
);
347 h
= nl_get_reply(&nl_req
);
348 if (h
->nlmsg_type
== NLMSG_ERROR
)
350 log(L_WARN
"nl_exchange: Unexpected reply received");
352 return nl_error(h
, ignore_esrch
) ? -1 : 0;
359 static int nl_attr_len
;
362 nl_checkin(struct nlmsghdr
*h
, int lsize
)
364 nl_attr_len
= h
->nlmsg_len
- NLMSG_LENGTH(lsize
);
367 log(L_ERR
"nl_checkin: underrun by %d bytes", -nl_attr_len
);
370 return NLMSG_DATA(h
);
373 struct nl_want_attrs
{
380 #define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
382 static struct nl_want_attrs ifla_attr_want
[BIRD_IFLA_MAX
] = {
383 [IFLA_IFNAME
] = { 1, 0, 0 },
384 [IFLA_MTU
] = { 1, 1, sizeof(u32
) },
385 [IFLA_MASTER
] = { 1, 1, sizeof(u32
) },
386 [IFLA_WIRELESS
] = { 1, 0, 0 },
390 #define BIRD_IFA_MAX (IFA_FLAGS+1)
392 static struct nl_want_attrs ifa_attr_want4
[BIRD_IFA_MAX
] = {
393 [IFA_ADDRESS
] = { 1, 1, sizeof(ip4_addr
) },
394 [IFA_LOCAL
] = { 1, 1, sizeof(ip4_addr
) },
395 [IFA_BROADCAST
] = { 1, 1, sizeof(ip4_addr
) },
396 [IFA_FLAGS
] = { 1, 1, sizeof(u32
) },
399 static struct nl_want_attrs ifa_attr_want6
[BIRD_IFA_MAX
] = {
400 [IFA_ADDRESS
] = { 1, 1, sizeof(ip6_addr
) },
401 [IFA_LOCAL
] = { 1, 1, sizeof(ip6_addr
) },
402 [IFA_FLAGS
] = { 1, 1, sizeof(u32
) },
406 #define BIRD_RTA_MAX (RTA_ENCAP+1)
408 static struct nl_want_attrs nexthop_attr_want4
[BIRD_RTA_MAX
] = {
409 [RTA_GATEWAY
] = { 1, 1, sizeof(ip4_addr
) },
410 [RTA_VIA
] = { 1, 0, 0 },
411 [RTA_FLOW
] = { 1, 1, sizeof(u32
) },
412 [RTA_ENCAP_TYPE
]= { 1, 1, sizeof(u16
) },
413 [RTA_ENCAP
] = { 1, 0, 0 },
416 static struct nl_want_attrs nexthop_attr_want6
[BIRD_RTA_MAX
] = {
417 [RTA_GATEWAY
] = { 1, 1, sizeof(ip6_addr
) },
418 [RTA_VIA
] = { 1, 0, 0 },
419 [RTA_FLOW
] = { 1, 1, sizeof(u32
) },
420 [RTA_ENCAP_TYPE
]= { 1, 1, sizeof(u16
) },
421 [RTA_ENCAP
] = { 1, 0, 0 },
424 #ifdef HAVE_MPLS_KERNEL
425 static struct nl_want_attrs nexthop_attr_want_mpls
[BIRD_RTA_MAX
] = {
426 [RTA_VIA
] = { 1, 0, 0 },
427 [RTA_NEWDST
] = { 1, 0, 0 },
430 static struct nl_want_attrs encap_mpls_want
[BIRD_RTA_MAX
] = {
431 [RTA_DST
] = { 1, 0, 0 },
435 static struct nl_want_attrs rtm_attr_want4
[BIRD_RTA_MAX
] = {
436 [RTA_DST
] = { 1, 1, sizeof(ip4_addr
) },
437 [RTA_OIF
] = { 1, 1, sizeof(u32
) },
438 [RTA_GATEWAY
] = { 1, 1, sizeof(ip4_addr
) },
439 [RTA_PRIORITY
] = { 1, 1, sizeof(u32
) },
440 [RTA_PREFSRC
] = { 1, 1, sizeof(ip4_addr
) },
441 [RTA_METRICS
] = { 1, 0, 0 },
442 [RTA_MULTIPATH
] = { 1, 0, 0 },
443 [RTA_FLOW
] = { 1, 1, sizeof(u32
) },
444 [RTA_TABLE
] = { 1, 1, sizeof(u32
) },
445 [RTA_VIA
] = { 1, 0, 0 },
446 [RTA_ENCAP_TYPE
]= { 1, 1, sizeof(u16
) },
447 [RTA_ENCAP
] = { 1, 0, 0 },
450 static struct nl_want_attrs rtm_attr_want6
[BIRD_RTA_MAX
] = {
451 [RTA_DST
] = { 1, 1, sizeof(ip6_addr
) },
452 [RTA_SRC
] = { 1, 1, sizeof(ip6_addr
) },
453 [RTA_IIF
] = { 1, 1, sizeof(u32
) },
454 [RTA_OIF
] = { 1, 1, sizeof(u32
) },
455 [RTA_GATEWAY
] = { 1, 1, sizeof(ip6_addr
) },
456 [RTA_PRIORITY
] = { 1, 1, sizeof(u32
) },
457 [RTA_PREFSRC
] = { 1, 1, sizeof(ip6_addr
) },
458 [RTA_METRICS
] = { 1, 0, 0 },
459 [RTA_MULTIPATH
] = { 1, 0, 0 },
460 [RTA_FLOW
] = { 1, 1, sizeof(u32
) },
461 [RTA_TABLE
] = { 1, 1, sizeof(u32
) },
462 [RTA_VIA
] = { 1, 0, 0 },
463 [RTA_ENCAP_TYPE
]= { 1, 1, sizeof(u16
) },
464 [RTA_ENCAP
] = { 1, 0, 0 },
467 #ifdef HAVE_MPLS_KERNEL
468 static struct nl_want_attrs rtm_attr_want_mpls
[BIRD_RTA_MAX
] = {
469 [RTA_DST
] = { 1, 1, sizeof(u32
) },
470 [RTA_IIF
] = { 1, 1, sizeof(u32
) },
471 [RTA_OIF
] = { 1, 1, sizeof(u32
) },
472 [RTA_PRIORITY
] = { 1, 1, sizeof(u32
) },
473 [RTA_METRICS
] = { 1, 0, 0 },
474 [RTA_MULTIPATH
] = { 1, 0, 0 },
475 [RTA_FLOW
] = { 1, 1, sizeof(u32
) },
476 [RTA_TABLE
] = { 1, 1, sizeof(u32
) },
477 [RTA_VIA
] = { 1, 0, 0 },
478 [RTA_NEWDST
] = { 1, 0, 0 },
484 nl_parse_attrs(struct rtattr
*a
, struct nl_want_attrs
*want
, struct rtattr
**k
, int ksize
)
486 int max
= ksize
/ sizeof(struct rtattr
*);
489 for ( ; RTA_OK(a
, nl_attr_len
); a
= RTA_NEXT(a
, nl_attr_len
))
491 if ((a
->rta_type
>= max
) || !want
[a
->rta_type
].defined
)
494 if (want
[a
->rta_type
].checksize
&& (RTA_PAYLOAD(a
) != want
[a
->rta_type
].size
))
496 log(L_ERR
"nl_parse_attrs: Malformed attribute received");
505 log(L_ERR
"nl_parse_attrs: remnant of size %d", nl_attr_len
);
512 static inline u16
rta_get_u16(struct rtattr
*a
)
513 { return *(u16
*) RTA_DATA(a
); }
515 static inline u32
rta_get_u32(struct rtattr
*a
)
516 { return *(u32
*) RTA_DATA(a
); }
518 static inline ip4_addr
rta_get_ip4(struct rtattr
*a
)
519 { return ip4_ntoh(*(ip4_addr
*) RTA_DATA(a
)); }
521 static inline ip6_addr
rta_get_ip6(struct rtattr
*a
)
522 { return ip6_ntoh(*(ip6_addr
*) RTA_DATA(a
)); }
524 static inline ip_addr
rta_get_ipa(struct rtattr
*a
)
526 if (RTA_PAYLOAD(a
) == sizeof(ip4_addr
))
527 return ipa_from_ip4(rta_get_ip4(a
));
529 return ipa_from_ip6(rta_get_ip6(a
));
532 #ifdef HAVE_MPLS_KERNEL
533 static inline ip_addr
rta_get_via(struct rtattr
*a
)
535 struct rtvia
*v
= RTA_DATA(a
);
536 switch(v
->rtvia_family
) {
537 case AF_INET
: return ipa_from_ip4(ip4_ntoh(*(ip4_addr
*) v
->rtvia_addr
));
538 case AF_INET6
: return ipa_from_ip6(ip6_ntoh(*(ip6_addr
*) v
->rtvia_addr
));
543 static u32 rta_mpls_stack
[MPLS_MAX_LABEL_STACK
];
544 static inline int rta_get_mpls(struct rtattr
*a
, u32
*stack
)
549 if (RTA_PAYLOAD(a
) % 4)
550 log(L_WARN
"KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a
));
552 int labels
= mpls_get(RTA_DATA(a
), RTA_PAYLOAD(a
) & ~0x3, stack
);
556 log(L_WARN
"KRT: Too long MPLS stack received, ignoring");
565 nl_add_attr(struct nlmsghdr
*h
, uint bufsize
, uint code
, const void *data
, uint dlen
)
567 uint pos
= NLMSG_ALIGN(h
->nlmsg_len
);
568 uint len
= RTA_LENGTH(dlen
);
570 if (pos
+ len
> bufsize
)
571 bug("nl_add_attr: packet buffer overflow");
573 struct rtattr
*a
= (struct rtattr
*)((char *)h
+ pos
);
576 h
->nlmsg_len
= pos
+ len
;
579 memcpy(RTA_DATA(a
), data
, dlen
);
584 static inline struct rtattr
*
585 nl_open_attr(struct nlmsghdr
*h
, uint bufsize
, uint code
)
587 return nl_add_attr(h
, bufsize
, code
, NULL
, 0);
591 nl_close_attr(struct nlmsghdr
*h
, struct rtattr
*a
)
593 a
->rta_len
= (void *)h
+ NLMSG_ALIGN(h
->nlmsg_len
) - (void *)a
;
597 nl_add_attr_u16(struct nlmsghdr
*h
, uint bufsize
, int code
, u16 data
)
599 nl_add_attr(h
, bufsize
, code
, &data
, 2);
603 nl_add_attr_u32(struct nlmsghdr
*h
, uint bufsize
, int code
, u32 data
)
605 nl_add_attr(h
, bufsize
, code
, &data
, 4);
609 nl_add_attr_ip4(struct nlmsghdr
*h
, uint bufsize
, int code
, ip4_addr ip4
)
612 nl_add_attr(h
, bufsize
, code
, &ip4
, sizeof(ip4
));
616 nl_add_attr_ip6(struct nlmsghdr
*h
, uint bufsize
, int code
, ip6_addr ip6
)
619 nl_add_attr(h
, bufsize
, code
, &ip6
, sizeof(ip6
));
623 nl_add_attr_ipa(struct nlmsghdr
*h
, uint bufsize
, int code
, ip_addr ipa
)
626 nl_add_attr_ip4(h
, bufsize
, code
, ipa_to_ip4(ipa
));
628 nl_add_attr_ip6(h
, bufsize
, code
, ipa_to_ip6(ipa
));
631 #ifdef HAVE_MPLS_KERNEL
633 nl_add_attr_mpls(struct nlmsghdr
*h
, uint bufsize
, int code
, int len
, u32
*stack
)
636 mpls_put(buf
, len
, stack
);
637 nl_add_attr(h
, bufsize
, code
, buf
, len
*4);
641 nl_add_attr_mpls_encap(struct nlmsghdr
*h
, uint bufsize
, int len
, u32
*stack
)
643 nl_add_attr_u16(h
, bufsize
, RTA_ENCAP_TYPE
, LWTUNNEL_ENCAP_MPLS
);
645 struct rtattr
*nest
= nl_open_attr(h
, bufsize
, RTA_ENCAP
);
646 nl_add_attr_mpls(h
, bufsize
, RTA_DST
, len
, stack
);
647 nl_close_attr(h
, nest
);
651 nl_add_attr_via(struct nlmsghdr
*h
, uint bufsize
, ip_addr ipa
)
653 struct rtvia
*via
= alloca(sizeof(struct rtvia
) + 16);
657 via
->rtvia_family
= AF_INET
;
658 put_ip4(via
->rtvia_addr
, ipa_to_ip4(ipa
));
659 nl_add_attr(h
, bufsize
, RTA_VIA
, via
, sizeof(struct rtvia
) + 4);
663 via
->rtvia_family
= AF_INET6
;
664 put_ip6(via
->rtvia_addr
, ipa_to_ip6(ipa
));
665 nl_add_attr(h
, bufsize
, RTA_VIA
, via
, sizeof(struct rtvia
) + 16);
670 static inline struct rtnexthop
*
671 nl_open_nexthop(struct nlmsghdr
*h
, uint bufsize
)
673 uint pos
= NLMSG_ALIGN(h
->nlmsg_len
);
674 uint len
= RTNH_LENGTH(0);
676 if (pos
+ len
> bufsize
)
677 bug("nl_open_nexthop: packet buffer overflow");
679 h
->nlmsg_len
= pos
+ len
;
681 return (void *)h
+ pos
;
685 nl_close_nexthop(struct nlmsghdr
*h
, struct rtnexthop
*nh
)
687 nh
->rtnh_len
= (void *)h
+ NLMSG_ALIGN(h
->nlmsg_len
) - (void *)nh
;
691 nl_add_nexthop(struct nlmsghdr
*h
, uint bufsize
, struct nexthop
*nh
, int af UNUSED
)
693 #ifdef HAVE_MPLS_KERNEL
696 nl_add_attr_mpls(h
, bufsize
, RTA_NEWDST
, nh
->labels
, nh
->label
);
698 nl_add_attr_mpls_encap(h
, bufsize
, nh
->labels
, nh
->label
);
700 if (ipa_nonzero(nh
->gw
))
702 if (af
== (ipa_is_ip4(nh
->gw
) ? AF_INET
: AF_INET6
))
703 nl_add_attr_ipa(h
, bufsize
, RTA_GATEWAY
, nh
->gw
);
705 nl_add_attr_via(h
, bufsize
, nh
->gw
);
709 if (ipa_nonzero(nh
->gw
))
710 nl_add_attr_ipa(h
, bufsize
, RTA_GATEWAY
, nh
->gw
);
715 nl_add_multipath(struct nlmsghdr
*h
, uint bufsize
, struct nexthop
*nh
, int af
, ea_list
*eattrs
)
717 struct rtattr
*a
= nl_open_attr(h
, bufsize
, RTA_MULTIPATH
);
718 eattr
*flow
= ea_find(eattrs
, EA_KRT_REALM
);
720 for (; nh
; nh
= nh
->next
)
722 struct rtnexthop
*rtnh
= nl_open_nexthop(h
, bufsize
);
724 rtnh
->rtnh_flags
= 0;
725 rtnh
->rtnh_hops
= nh
->weight
;
726 rtnh
->rtnh_ifindex
= nh
->iface
->index
;
728 nl_add_nexthop(h
, bufsize
, nh
, af
);
730 if (nh
->flags
& RNF_ONLINK
)
731 rtnh
->rtnh_flags
|= RTNH_F_ONLINK
;
733 /* Our KRT_REALM is per-route, but kernel RTA_FLOW is per-nexthop.
734 Therefore, we need to attach the same attribute to each nexthop. */
736 nl_add_attr_u32(h
, bufsize
, RTA_FLOW
, flow
->u
.data
);
738 nl_close_nexthop(h
, rtnh
);
744 static struct nexthop
*
745 nl_parse_multipath(struct nl_parse_state
*s
, struct krt_proto
*p
, const net_addr
*n
, struct rtattr
*ra
, int af
, int krt_src
)
747 struct rtattr
*a
[BIRD_RTA_MAX
];
748 struct rtnexthop
*nh
= RTA_DATA(ra
);
749 struct nexthop
*rv
, *first
, **last
;
750 unsigned len
= RTA_PAYLOAD(ra
);
757 /* Use RTNH_OK(nh,len) ?? */
758 if ((len
< sizeof(*nh
)) || (len
< nh
->rtnh_len
))
761 if ((nh
->rtnh_flags
& RTNH_F_DEAD
) && (krt_src
!= KRT_SRC_BIRD
))
764 *last
= rv
= lp_allocz(s
->pool
, NEXTHOP_MAX_SIZE
);
767 rv
->weight
= nh
->rtnh_hops
;
768 rv
->iface
= if_find_by_index(nh
->rtnh_ifindex
);
771 log(L_ERR
"KRT: Received route %N with unknown ifindex %u", n
, nh
->rtnh_ifindex
);
775 /* Nonexistent RTNH_PAYLOAD ?? */
776 nl_attr_len
= nh
->rtnh_len
- RTNH_LENGTH(0);
780 if (!nl_parse_attrs(RTNH_DATA(nh
), nexthop_attr_want4
, a
, sizeof(a
)))
785 if (!nl_parse_attrs(RTNH_DATA(nh
), nexthop_attr_want6
, a
, sizeof(a
)))
789 #ifdef HAVE_MPLS_KERNEL
791 if (!nl_parse_attrs(RTNH_DATA(nh
), nexthop_attr_want_mpls
, a
, sizeof(a
)))
795 rv
->labels
= rta_get_mpls(a
[RTA_NEWDST
], rv
->label
);
805 rv
->gw
= rta_get_ipa(a
[RTA_GATEWAY
]);
808 s
->rta_flow
= rta_get_u32(a
[RTA_FLOW
]);
810 #ifdef HAVE_MPLS_KERNEL
812 rv
->gw
= rta_get_via(a
[RTA_VIA
]);
815 if (ipa_nonzero(rv
->gw
))
817 if (nh
->rtnh_flags
& RTNH_F_ONLINK
)
818 rv
->flags
|= RNF_ONLINK
;
821 nbr
= neigh_find(&p
->p
, rv
->gw
, rv
->iface
,
822 (rv
->flags
& RNF_ONLINK
) ? NEF_ONLINK
: 0);
823 if (!nbr
|| (nbr
->scope
== SCOPE_HOST
))
825 log(L_ERR
"KRT: Received route %N with strange next-hop %I", n
, rv
->gw
);
830 #ifdef HAVE_MPLS_KERNEL
831 if (a
[RTA_ENCAP
] && a
[RTA_ENCAP_TYPE
])
833 if (rta_get_u16(a
[RTA_ENCAP_TYPE
]) != LWTUNNEL_ENCAP_MPLS
)
835 log(L_WARN
"KRT: Received route %N with unknown encapsulation method %d",
836 n
, rta_get_u16(a
[RTA_ENCAP_TYPE
]));
840 struct rtattr
*enca
[BIRD_RTA_MAX
];
841 nl_attr_len
= RTA_PAYLOAD(a
[RTA_ENCAP
]);
842 nl_parse_attrs(RTA_DATA(a
[RTA_ENCAP
]), encap_mpls_want
, enca
, sizeof(enca
));
843 rv
->labels
= rta_get_mpls(enca
[RTA_DST
], rv
->label
);
848 len
-= NLMSG_ALIGN(nh
->rtnh_len
);
852 /* Ensure nexthops are sorted to satisfy nest invariant */
853 if (!nexthop_is_sorted(first
))
854 first
= nexthop_sort(first
);
859 log(L_ERR
"KRT: Received strange multipath route %N", n
);
864 nl_add_metrics(struct nlmsghdr
*h
, uint bufsize
, u32
*metrics
, int max
)
866 struct rtattr
*a
= nl_open_attr(h
, bufsize
, RTA_METRICS
);
869 for (t
= 1; t
< max
; t
++)
870 if (metrics
[0] & (1 << t
))
871 nl_add_attr_u32(h
, bufsize
, t
, metrics
[t
]);
877 nl_parse_metrics(struct rtattr
*hdr
, u32
*metrics
, int max
)
879 struct rtattr
*a
= RTA_DATA(hdr
);
880 int len
= RTA_PAYLOAD(hdr
);
883 for (; RTA_OK(a
, len
); a
= RTA_NEXT(a
, len
))
885 if (a
->rta_type
== RTA_UNSPEC
)
888 if (a
->rta_type
>= max
)
891 if (RTA_PAYLOAD(a
) != 4)
894 metrics
[0] |= 1 << a
->rta_type
;
895 metrics
[a
->rta_type
] = rta_get_u32(a
);
906 * Scanning of interfaces
910 nl_parse_link(struct nlmsghdr
*h
, int scan
)
913 struct rtattr
*a
[BIRD_IFLA_MAX
];
914 int new = h
->nlmsg_type
== RTM_NEWLINK
;
921 if (!(i
= nl_checkin(h
, sizeof(*i
))) || !nl_parse_attrs(IFLA_RTA(i
), ifla_attr_want
, a
, sizeof(a
)))
923 if (!a
[IFLA_IFNAME
] || (RTA_PAYLOAD(a
[IFLA_IFNAME
]) < 2) || !a
[IFLA_MTU
])
926 * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
927 * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
928 * We simply ignore all such messages with IFLA_WIRELESS without notice.
931 if (a
[IFLA_WIRELESS
])
934 log(L_ERR
"KIF: Malformed message received");
938 name
= RTA_DATA(a
[IFLA_IFNAME
]);
939 mtu
= rta_get_u32(a
[IFLA_MTU
]);
942 master
= rta_get_u32(a
[IFLA_MASTER
]);
944 ifi
= if_find_by_index(i
->ifi_index
);
947 DBG("KIF: IF%d(%s) goes down\n", i
->ifi_index
, name
);
955 DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i
->ifi_index
, name
, mtu
, i
->ifi_flags
);
956 if (ifi
&& strncmp(ifi
->name
, name
, sizeof(ifi
->name
)-1))
959 strncpy(f
.name
, name
, sizeof(f
.name
)-1);
960 f
.index
= i
->ifi_index
;
963 f
.master_index
= master
;
964 f
.master
= if_find_by_index(master
);
968 f
.flags
|= IF_ADMIN_UP
;
969 if (fl
& IFF_LOWER_UP
)
970 f
.flags
|= IF_LINK_UP
;
971 if (fl
& IFF_LOOPBACK
) /* Loopback */
972 f
.flags
|= IF_MULTIACCESS
| IF_LOOPBACK
| IF_IGNORE
;
973 else if (fl
& IFF_POINTOPOINT
) /* PtP */
974 f
.flags
|= IF_MULTICAST
;
975 else if (fl
& IFF_BROADCAST
) /* Broadcast */
976 f
.flags
|= IF_MULTIACCESS
| IF_BROADCAST
| IF_MULTICAST
;
978 f
.flags
|= IF_MULTIACCESS
; /* NBMA */
980 if (fl
& IFF_MULTICAST
)
981 f
.flags
|= IF_MULTICAST
;
986 if_end_partial_update(ifi
);
991 nl_parse_addr4(struct ifaddrmsg
*i
, int scan
, int new)
993 struct rtattr
*a
[BIRD_IFA_MAX
];
998 if (!nl_parse_attrs(IFA_RTA(i
), ifa_attr_want4
, a
, sizeof(a
)))
1003 log(L_ERR
"KIF: Malformed message received (missing IFA_LOCAL)");
1006 if (!a
[IFA_ADDRESS
])
1008 log(L_ERR
"KIF: Malformed message received (missing IFA_ADDRESS)");
1012 ifi
= if_find_by_index(i
->ifa_index
);
1015 log(L_ERR
"KIF: Received address message for unknown interface %d", i
->ifa_index
);
1020 ifa_flags
= rta_get_u32(a
[IFA_FLAGS
]);
1022 ifa_flags
= i
->ifa_flags
;
1025 bzero(&ifa
, sizeof(ifa
));
1027 if (ifa_flags
& IFA_F_SECONDARY
)
1028 ifa
.flags
|= IA_SECONDARY
;
1030 ifa
.ip
= rta_get_ipa(a
[IFA_LOCAL
]);
1032 if (i
->ifa_prefixlen
> IP4_MAX_PREFIX_LENGTH
)
1034 log(L_ERR
"KIF: Invalid prefix length for interface %s: %d", ifi
->name
, i
->ifa_prefixlen
);
1037 if (i
->ifa_prefixlen
== IP4_MAX_PREFIX_LENGTH
)
1039 ifa
.brd
= rta_get_ipa(a
[IFA_ADDRESS
]);
1040 net_fill_ip4(&ifa
.prefix
, rta_get_ip4(a
[IFA_ADDRESS
]), i
->ifa_prefixlen
);
1042 /* It is either a host address or a peer address */
1043 if (ipa_equal(ifa
.ip
, ifa
.brd
))
1044 ifa
.flags
|= IA_HOST
;
1047 ifa
.flags
|= IA_PEER
;
1048 ifa
.opposite
= ifa
.brd
;
1053 net_fill_ip4(&ifa
.prefix
, ipa_to_ip4(ifa
.ip
), i
->ifa_prefixlen
);
1054 net_normalize(&ifa
.prefix
);
1056 if (i
->ifa_prefixlen
== IP4_MAX_PREFIX_LENGTH
- 1)
1057 ifa
.opposite
= ipa_opposite_m1(ifa
.ip
);
1059 if (i
->ifa_prefixlen
== IP4_MAX_PREFIX_LENGTH
- 2)
1060 ifa
.opposite
= ipa_opposite_m2(ifa
.ip
);
1062 if (ifi
->flags
& IF_BROADCAST
)
1064 /* If kernel offers us a broadcast address, we trust it */
1065 if (a
[IFA_BROADCAST
])
1066 ifa
.brd
= ipa_from_ip4(rta_get_ip4(a
[IFA_BROADCAST
]));
1067 /* Otherwise we create one (except for /31) */
1068 else if (i
->ifa_prefixlen
< (IP4_MAX_PREFIX_LENGTH
- 1))
1069 ifa
.brd
= ipa_from_ip4(ip4_or(ipa_to_ip4(ifa
.ip
),
1070 ip4_not(ip4_mkmask(i
->ifa_prefixlen
))));
1074 scope
= ipa_classify(ifa
.ip
);
1077 log(L_ERR
"KIF: Invalid interface address %I for %s", ifa
.ip
, ifi
->name
);
1080 ifa
.scope
= scope
& IADDR_SCOPE_MASK
;
1082 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
1083 ifi
->index
, ifi
->name
,
1084 new ? "added" : "removed",
1085 ifa
.ip
, ifa
.flags
, &ifa
.prefix
, ifa
.brd
, ifa
.opposite
);
1093 if_end_partial_update(ifi
);
1097 nl_parse_addr6(struct ifaddrmsg
*i
, int scan
, int new)
1099 struct rtattr
*a
[BIRD_IFA_MAX
];
1104 if (!nl_parse_attrs(IFA_RTA(i
), ifa_attr_want6
, a
, sizeof(a
)))
1107 if (!a
[IFA_ADDRESS
])
1109 log(L_ERR
"KIF: Malformed message received (missing IFA_ADDRESS)");
1113 ifi
= if_find_by_index(i
->ifa_index
);
1116 log(L_ERR
"KIF: Received address message for unknown interface %d", i
->ifa_index
);
1121 ifa_flags
= rta_get_u32(a
[IFA_FLAGS
]);
1123 ifa_flags
= i
->ifa_flags
;
1126 bzero(&ifa
, sizeof(ifa
));
1128 if (ifa_flags
& IFA_F_SECONDARY
)
1129 ifa
.flags
|= IA_SECONDARY
;
1131 /* Ignore tentative addresses silently */
1132 if (ifa_flags
& IFA_F_TENTATIVE
)
1135 /* IFA_LOCAL can be unset for IPv6 interfaces */
1136 ifa
.ip
= rta_get_ipa(a
[IFA_LOCAL
] ? : a
[IFA_ADDRESS
]);
1138 if (i
->ifa_prefixlen
> IP6_MAX_PREFIX_LENGTH
)
1140 log(L_ERR
"KIF: Invalid prefix length for interface %s: %d", ifi
->name
, i
->ifa_prefixlen
);
1143 if (i
->ifa_prefixlen
== IP6_MAX_PREFIX_LENGTH
)
1145 ifa
.brd
= rta_get_ipa(a
[IFA_ADDRESS
]);
1146 net_fill_ip6(&ifa
.prefix
, rta_get_ip6(a
[IFA_ADDRESS
]), i
->ifa_prefixlen
);
1148 /* It is either a host address or a peer address */
1149 if (ipa_equal(ifa
.ip
, ifa
.brd
))
1150 ifa
.flags
|= IA_HOST
;
1153 ifa
.flags
|= IA_PEER
;
1154 ifa
.opposite
= ifa
.brd
;
1159 net_fill_ip6(&ifa
.prefix
, ipa_to_ip6(ifa
.ip
), i
->ifa_prefixlen
);
1160 net_normalize(&ifa
.prefix
);
1162 if (i
->ifa_prefixlen
== IP6_MAX_PREFIX_LENGTH
- 1)
1163 ifa
.opposite
= ipa_opposite_m1(ifa
.ip
);
1166 scope
= ipa_classify(ifa
.ip
);
1169 log(L_ERR
"KIF: Invalid interface address %I for %s", ifa
.ip
, ifi
->name
);
1172 ifa
.scope
= scope
& IADDR_SCOPE_MASK
;
1174 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
1175 ifi
->index
, ifi
->name
,
1176 new ? "added" : "removed",
1177 ifa
.ip
, ifa
.flags
, &ifa
.prefix
, ifa
.brd
, ifa
.opposite
);
1185 if_end_partial_update(ifi
);
1189 nl_parse_addr(struct nlmsghdr
*h
, int scan
)
1191 struct ifaddrmsg
*i
;
1193 if (!(i
= nl_checkin(h
, sizeof(*i
))))
1196 int new = (h
->nlmsg_type
== RTM_NEWADDR
);
1198 switch (i
->ifa_family
)
1201 return nl_parse_addr4(i
, scan
, new);
1204 return nl_parse_addr6(i
, scan
, new);
1209 kif_do_scan(struct kif_proto
*p UNUSED
)
1215 nl_request_dump_link();
1216 while (h
= nl_get_scan())
1217 if (h
->nlmsg_type
== RTM_NEWLINK
|| h
->nlmsg_type
== RTM_DELLINK
)
1218 nl_parse_link(h
, 1);
1220 log(L_DEBUG
"nl_scan_ifaces: Unknown packet received (type=%d)", h
->nlmsg_type
);
1222 /* Re-resolve master interface for slaves */
1224 WALK_LIST(i
, iface_list
)
1225 if (i
->master_index
)
1231 .master_index
= i
->master_index
,
1232 .master
= if_find_by_index(i
->master_index
)
1235 if (f
.master
!= i
->master
)
1237 memcpy(f
.name
, i
->name
, sizeof(f
.name
));
1242 nl_request_dump_addr(AF_INET
);
1243 while (h
= nl_get_scan())
1244 if (h
->nlmsg_type
== RTM_NEWADDR
|| h
->nlmsg_type
== RTM_DELADDR
)
1245 nl_parse_addr(h
, 1);
1247 log(L_DEBUG
"nl_scan_ifaces: Unknown packet received (type=%d)", h
->nlmsg_type
);
1249 nl_request_dump_addr(AF_INET6
);
1250 while (h
= nl_get_scan())
1251 if (h
->nlmsg_type
== RTM_NEWADDR
|| h
->nlmsg_type
== RTM_DELADDR
)
1252 nl_parse_addr(h
, 1);
1254 log(L_DEBUG
"nl_scan_ifaces: Unknown packet received (type=%d)", h
->nlmsg_type
);
1264 krt_table_id(struct krt_proto
*p
)
1266 return KRT_CF
->sys
.table_id
;
1269 static HASH(struct krt_proto
) nl_table_map
;
1271 #define RTH_KEY(p) p->af, krt_table_id(p)
1272 #define RTH_NEXT(p) p->sys.hash_next
1273 #define RTH_EQ(a1,i1,a2,i2) a1 == a2 && i1 == i2
1274 #define RTH_FN(a,i) a ^ u32_hash(i)
1276 #define RTH_REHASH rth_rehash
1277 #define RTH_PARAMS /8, *2, 2, 2, 6, 20
1279 HASH_DEFINE_REHASH_FN(RTH
, struct krt_proto
)
1290 case RTD_UNREACHABLE
:
1300 nh_bufsize(struct nexthop
*nh
)
1303 for (; nh
!= NULL
; nh
= nh
->next
)
1304 rv
+= RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr
)));
1309 nl_send_route(struct krt_proto
*p
, rte
*e
, int op
)
1314 ea_list
*eattrs
= a
->eattrs
;
1315 int bufsize
= 128 + KRT_METRICS_MAX
*8 + nh_bufsize(&(a
->nh
));
1324 int rsize
= sizeof(*r
) + bufsize
;
1327 DBG("nl_send_route(%N,op=%x)\n", net
->n
.addr
, op
);
1329 bzero(&r
->h
, sizeof(r
->h
));
1330 bzero(&r
->r
, sizeof(r
->r
));
1331 r
->h
.nlmsg_type
= op
? RTM_NEWROUTE
: RTM_DELROUTE
;
1332 r
->h
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
1333 r
->h
.nlmsg_flags
= op
| NLM_F_REQUEST
| NLM_F_ACK
;
1335 r
->r
.rtm_family
= p
->af
;
1336 r
->r
.rtm_dst_len
= net_pxlen(net
->n
.addr
);
1337 r
->r
.rtm_protocol
= RTPROT_BIRD
;
1338 r
->r
.rtm_scope
= RT_SCOPE_NOWHERE
;
1339 #ifdef HAVE_MPLS_KERNEL
1340 if (p
->af
== AF_MPLS
)
1343 * Kernel MPLS code is a bit picky. We must:
1344 * 1) Always set RT_SCOPE_UNIVERSE and RTN_UNICAST (even for RTM_DELROUTE)
1345 * 2) Never use RTA_PRIORITY
1348 u32 label
= net_mpls(net
->n
.addr
);
1349 nl_add_attr_mpls(&r
->h
, rsize
, RTA_DST
, 1, &label
);
1350 r
->r
.rtm_scope
= RT_SCOPE_UNIVERSE
;
1351 r
->r
.rtm_type
= RTN_UNICAST
;
1356 nl_add_attr_ipa(&r
->h
, rsize
, RTA_DST
, net_prefix(net
->n
.addr
));
1358 /* Add source address for IPv6 SADR routes */
1359 if (net
->n
.addr
->type
== NET_IP6_SADR
)
1361 net_addr_ip6_sadr
*a
= (void *) &net
->n
.addr
;
1362 nl_add_attr_ip6(&r
->h
, rsize
, RTA_SRC
, a
->src_prefix
);
1363 r
->r
.rtm_src_len
= a
->src_pxlen
;
1368 * Strange behavior for RTM_DELROUTE:
1369 * 1) rtm_family is ignored in IPv6, works for IPv4
1370 * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
1371 * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
1374 if (krt_table_id(p
) < 256)
1375 r
->r
.rtm_table
= krt_table_id(p
);
1377 nl_add_attr_u32(&r
->h
, rsize
, RTA_TABLE
, krt_table_id(p
));
1379 if (p
->af
== AF_MPLS
)
1381 else if (a
->source
== RTS_DUMMY
)
1382 priority
= e
->u
.krt
.metric
;
1383 else if (KRT_CF
->sys
.metric
)
1384 priority
= KRT_CF
->sys
.metric
;
1385 else if ((op
!= NL_OP_DELETE
) && (ea
= ea_find(eattrs
, EA_KRT_METRIC
)))
1386 priority
= ea
->u
.data
;
1389 nl_add_attr_u32(&r
->h
, rsize
, RTA_PRIORITY
, priority
);
1391 /* For route delete, we do not specify remaining route attributes */
1392 if (op
== NL_OP_DELETE
)
1395 /* Default scope is LINK for device routes, UNIVERSE otherwise */
1396 if (p
->af
== AF_MPLS
)
1397 r
->r
.rtm_scope
= RT_SCOPE_UNIVERSE
;
1398 else if (ea
= ea_find(eattrs
, EA_KRT_SCOPE
))
1399 r
->r
.rtm_scope
= ea
->u
.data
;
1400 else if (a
->dest
== RTD_UNICAST
&& ipa_zero(a
->nh
.gw
))
1401 r
->r
.rtm_scope
= RT_SCOPE_LINK
;
1403 r
->r
.rtm_scope
= RT_SCOPE_UNIVERSE
;
1405 if (ea
= ea_find(eattrs
, EA_KRT_PREFSRC
))
1406 nl_add_attr_ipa(&r
->h
, rsize
, RTA_PREFSRC
, *(ip_addr
*)ea
->u
.ptr
->data
);
1408 if (ea
= ea_find(eattrs
, EA_KRT_REALM
))
1409 nl_add_attr_u32(&r
->h
, rsize
, RTA_FLOW
, ea
->u
.data
);
1412 u32 metrics
[KRT_METRICS_MAX
];
1415 struct ea_walk_state ews
= { .eattrs
= eattrs
};
1416 while (ea
= ea_walk(&ews
, EA_KRT_METRICS
, KRT_METRICS_MAX
))
1418 int id
= ea
->id
- EA_KRT_METRICS
;
1419 metrics
[0] |= 1 << id
;
1420 metrics
[id
] = ea
->u
.data
;
1424 nl_add_metrics(&r
->h
, rsize
, metrics
, KRT_METRICS_MAX
);
1429 r
->r
.rtm_type
= RTN_UNICAST
;
1430 struct nexthop
*nh
= &(a
->nh
);
1432 nl_add_multipath(&r
->h
, rsize
, nh
, p
->af
, eattrs
);
1435 nl_add_attr_u32(&r
->h
, rsize
, RTA_OIF
, nh
->iface
->index
);
1436 nl_add_nexthop(&r
->h
, rsize
, nh
, p
->af
);
1438 if (nh
->flags
& RNF_ONLINK
)
1439 r
->r
.rtm_flags
|= RTNH_F_ONLINK
;
1443 r
->r
.rtm_type
= RTN_BLACKHOLE
;
1445 case RTD_UNREACHABLE
:
1446 r
->r
.rtm_type
= RTN_UNREACHABLE
;
1449 r
->r
.rtm_type
= RTN_PROHIBIT
;
1454 bug("krt_capable inconsistent with nl_send_route");
1458 /* Ignore missing for DELETE */
1459 return nl_exchange(&r
->h
, (op
== NL_OP_DELETE
));
1463 krt_replace_rte(struct krt_proto
*p
, net
*n UNUSED
, rte
*new, rte
*old
)
1469 err
= nl_send_route(p
, new, NL_OP_REPLACE
);
1474 nl_send_route(p
, old
, NL_OP_DELETE
);
1477 err
= nl_send_route(p
, new, NL_OP_ADD
);
1483 bmap_clear(&p
->sync_map
, new->id
);
1485 bmap_set(&p
->sync_map
, new->id
);
1490 #define SKIP0(ARG, ...) do { DBG("KRT: Ignoring route - " ARG, ##__VA_ARGS__); return; } while(0)
1491 #define SKIP(ARG, ...) do { DBG("KRT: Ignoring route %N - " ARG, &dst, ##__VA_ARGS__); return; } while(0)
1494 nl_parse_route(struct nl_parse_state
*s
, struct nlmsghdr
*h
)
1496 struct krt_proto
*p
;
1498 struct rtattr
*a
[BIRD_RTA_MAX
];
1499 int new = h
->nlmsg_type
== RTM_NEWROUTE
;
1501 net_addr dst
, src
= {};
1505 u32 def_scope
= RT_SCOPE_UNIVERSE
;
1508 if (!(i
= nl_checkin(h
, sizeof(*i
))))
1511 switch (i
->rtm_family
)
1514 if (!nl_parse_attrs(RTM_RTA(i
), rtm_attr_want4
, a
, sizeof(a
)))
1518 net_fill_ip4(&dst
, rta_get_ip4(a
[RTA_DST
]), i
->rtm_dst_len
);
1520 net_fill_ip4(&dst
, IP4_NONE
, 0);
1524 if (!nl_parse_attrs(RTM_RTA(i
), rtm_attr_want6
, a
, sizeof(a
)))
1528 net_fill_ip6(&dst
, rta_get_ip6(a
[RTA_DST
]), i
->rtm_dst_len
);
1530 net_fill_ip6(&dst
, IP6_NONE
, 0);
1533 net_fill_ip6(&src
, rta_get_ip6(a
[RTA_SRC
]), i
->rtm_src_len
);
1535 net_fill_ip6(&src
, IP6_NONE
, 0);
1538 #ifdef HAVE_MPLS_KERNEL
1540 if (!nl_parse_attrs(RTM_RTA(i
), rtm_attr_want_mpls
, a
, sizeof(a
)))
1544 SKIP0("MPLS route without RTA_DST\n");
1546 if (rta_get_mpls(a
[RTA_DST
], rta_mpls_stack
) != 1)
1547 SKIP0("MPLS route with multi-label RTA_DST\n");
1549 net_fill_mpls(&dst
, rta_mpls_stack
[0]);
1558 oif
= rta_get_u32(a
[RTA_OIF
]);
1561 table_id
= rta_get_u32(a
[RTA_TABLE
]);
1563 table_id
= i
->rtm_table
;
1565 if (i
->rtm_flags
& RTM_F_CLONED
)
1568 /* Do we know this table? */
1569 p
= HASH_FIND(nl_table_map
, RTH
, i
->rtm_family
, table_id
);
1571 SKIP("unknown table %u\n", table_id
);
1573 if (a
[RTA_SRC
] && (p
->p
.net_type
!= NET_IP6_SADR
))
1574 SKIP("src prefix for non-SADR channel\n");
1579 if (i
->rtm_tos
!= 0) /* We don't support TOS */
1580 SKIP("TOS %02x\n", i
->rtm_tos
);
1582 if (s
->scan
&& !new)
1583 SKIP("RTM_DELROUTE in scan\n");
1585 if (a
[RTA_PRIORITY
])
1586 priority
= rta_get_u32(a
[RTA_PRIORITY
]);
1588 int c
= net_classify(&dst
);
1589 if ((c
< 0) || !(c
& IADDR_HOST
) || ((c
& IADDR_SCOPE_MASK
) <= SCOPE_LINK
))
1590 SKIP("strange class/scope\n");
1592 switch (i
->rtm_protocol
)
1595 SKIP("proto unspec\n");
1597 case RTPROT_REDIRECT
:
1598 krt_src
= KRT_SRC_REDIRECT
;
1602 krt_src
= KRT_SRC_KERNEL
;
1608 krt_src
= KRT_SRC_BIRD
;
1613 krt_src
= KRT_SRC_ALIEN
;
1617 if (p
->p
.net_type
== NET_IP6_SADR
)
1619 n
= alloca(sizeof(net_addr_ip6_sadr
));
1620 net_fill_ip6_sadr(n
, net6_prefix(&dst
), net6_pxlen(&dst
),
1621 net6_prefix(&src
), net6_pxlen(&src
));
1624 net
*net
= net_get(p
->p
.main_channel
->table
, n
);
1626 rta
*ra
= lp_allocz(s
->pool
, RTA_MAX_SIZE
);
1627 ra
->src
= p
->p
.main_source
;
1628 ra
->source
= RTS_INHERIT
;
1629 ra
->scope
= SCOPE_UNIVERSE
;
1632 s
->rta_flow
= rta_get_u32(a
[RTA_FLOW
]);
1636 switch (i
->rtm_type
)
1639 ra
->dest
= RTD_UNICAST
;
1641 if (a
[RTA_MULTIPATH
])
1643 struct nexthop
*nh
= nl_parse_multipath(s
, p
, n
, a
[RTA_MULTIPATH
], i
->rtm_family
, krt_src
);
1645 SKIP("strange RTA_MULTIPATH\n");
1647 nexthop_link(ra
, nh
);
1651 if ((i
->rtm_flags
& RTNH_F_DEAD
) && (krt_src
!= KRT_SRC_BIRD
))
1652 SKIP("ignore RTNH_F_DEAD\n");
1654 ra
->nh
.iface
= if_find_by_index(oif
);
1657 log(L_ERR
"KRT: Received route %N with unknown ifindex %u", net
->n
.addr
, oif
);
1662 ra
->nh
.gw
= rta_get_ipa(a
[RTA_GATEWAY
]);
1664 #ifdef HAVE_MPLS_KERNEL
1666 ra
->nh
.gw
= rta_get_via(a
[RTA_VIA
]);
1669 if (ipa_nonzero(ra
->nh
.gw
))
1671 /* Silently skip strange 6to4 routes */
1672 const net_addr_ip6 sit
= NET_ADDR_IP6(IP6_NONE
, 96);
1673 if ((i
->rtm_family
== AF_INET6
) && ipa_in_netX(ra
->nh
.gw
, (net_addr
*) &sit
))
1676 if (i
->rtm_flags
& RTNH_F_ONLINK
)
1677 ra
->nh
.flags
|= RNF_ONLINK
;
1680 nbr
= neigh_find(&p
->p
, ra
->nh
.gw
, ra
->nh
.iface
,
1681 (ra
->nh
.flags
& RNF_ONLINK
) ? NEF_ONLINK
: 0);
1682 if (!nbr
|| (nbr
->scope
== SCOPE_HOST
))
1684 log(L_ERR
"KRT: Received route %N with strange next-hop %I", net
->n
.addr
,
1692 ra
->dest
= RTD_BLACKHOLE
;
1694 case RTN_UNREACHABLE
:
1695 ra
->dest
= RTD_UNREACHABLE
;
1698 ra
->dest
= RTD_PROHIBIT
;
1700 /* FIXME: What about RTN_THROW? */
1702 SKIP("type %d\n", i
->rtm_type
);
1706 #ifdef HAVE_MPLS_KERNEL
1707 if ((i
->rtm_family
== AF_MPLS
) && a
[RTA_NEWDST
] && !ra
->nh
.next
)
1708 ra
->nh
.labels
= rta_get_mpls(a
[RTA_NEWDST
], ra
->nh
.label
);
1710 if (a
[RTA_ENCAP
] && a
[RTA_ENCAP_TYPE
] && !ra
->nh
.next
)
1712 switch (rta_get_u16(a
[RTA_ENCAP_TYPE
]))
1714 case LWTUNNEL_ENCAP_MPLS
:
1716 struct rtattr
*enca
[BIRD_RTA_MAX
];
1717 nl_attr_len
= RTA_PAYLOAD(a
[RTA_ENCAP
]);
1718 nl_parse_attrs(RTA_DATA(a
[RTA_ENCAP
]), encap_mpls_want
, enca
, sizeof(enca
));
1719 ra
->nh
.labels
= rta_get_mpls(enca
[RTA_DST
], ra
->nh
.label
);
1723 SKIP("unknown encapsulation method %d\n", rta_get_u16(a
[RTA_ENCAP_TYPE
]));
1729 if (i
->rtm_scope
!= def_scope
)
1731 ea_list
*ea
= lp_alloc(s
->pool
, sizeof(ea_list
) + sizeof(eattr
));
1732 ea
->next
= ra
->eattrs
;
1734 ea
->flags
= EALF_SORTED
;
1736 ea
->attrs
[0].id
= EA_KRT_SCOPE
;
1737 ea
->attrs
[0].flags
= 0;
1738 ea
->attrs
[0].type
= EAF_TYPE_INT
;
1739 ea
->attrs
[0].u
.data
= i
->rtm_scope
;
1744 ip_addr ps
= rta_get_ipa(a
[RTA_PREFSRC
]);
1746 ea_list
*ea
= lp_alloc(s
->pool
, sizeof(ea_list
) + sizeof(eattr
));
1747 ea
->next
= ra
->eattrs
;
1749 ea
->flags
= EALF_SORTED
;
1751 ea
->attrs
[0].id
= EA_KRT_PREFSRC
;
1752 ea
->attrs
[0].flags
= 0;
1753 ea
->attrs
[0].type
= EAF_TYPE_IP_ADDRESS
;
1755 struct adata
*ad
= lp_alloc(s
->pool
, sizeof(struct adata
) + sizeof(ps
));
1756 ad
->length
= sizeof(ps
);
1757 memcpy(ad
->data
, &ps
, sizeof(ps
));
1759 ea
->attrs
[0].u
.ptr
= ad
;
1762 /* Can be set per-route or per-nexthop */
1765 ea_list
*ea
= lp_alloc(s
->pool
, sizeof(ea_list
) + sizeof(eattr
));
1766 ea
->next
= ra
->eattrs
;
1768 ea
->flags
= EALF_SORTED
;
1770 ea
->attrs
[0].id
= EA_KRT_REALM
;
1771 ea
->attrs
[0].flags
= 0;
1772 ea
->attrs
[0].type
= EAF_TYPE_INT
;
1773 ea
->attrs
[0].u
.data
= s
->rta_flow
;
1778 u32 metrics
[KRT_METRICS_MAX
];
1779 ea_list
*ea
= lp_alloc(s
->pool
, sizeof(ea_list
) + KRT_METRICS_MAX
* sizeof(eattr
));
1782 if (nl_parse_metrics(a
[RTA_METRICS
], metrics
, ARRAY_SIZE(metrics
)) < 0)
1784 log(L_ERR
"KRT: Received route %N with strange RTA_METRICS attribute", net
->n
.addr
);
1788 for (t
= 1; t
< KRT_METRICS_MAX
; t
++)
1789 if (metrics
[0] & (1 << t
))
1791 ea
->attrs
[n
].id
= EA_CODE(PROTOCOL_KERNEL
, KRT_METRICS_OFFSET
+ t
);
1792 ea
->attrs
[n
].flags
= 0;
1793 ea
->attrs
[n
].type
= EAF_TYPE_INT
; /* FIXME: Some are EAF_TYPE_BITFIELD */
1794 ea
->attrs
[n
].u
.data
= metrics
[t
];
1800 ea
->next
= ra
->eattrs
;
1801 ea
->flags
= EALF_SORTED
;
1807 rte
*e
= rte_get_temp(ra
);
1809 e
->u
.krt
.src
= krt_src
;
1810 e
->u
.krt
.proto
= i
->rtm_protocol
;
1813 e
->u
.krt
.metric
= priority
;
1816 krt_got_route(p
, e
);
1818 krt_got_route_async(p
, e
, new);
1824 krt_do_scan(struct krt_proto
*p
)
1826 struct nl_parse_state s
= {
1832 /* Table-specific scan or shared scan */
1834 nl_request_dump_route(p
->af
, krt_table_id(p
));
1836 nl_request_dump_route(AF_UNSPEC
, 0);
1839 while (h
= nl_get_scan())
1841 if (h
->nlmsg_type
== RTM_NEWROUTE
|| h
->nlmsg_type
== RTM_DELROUTE
)
1842 nl_parse_route(&s
, h
);
1844 log(L_DEBUG
"nl_scan_fire: Unknown packet received (type=%d)", h
->nlmsg_type
);
1849 * Asynchronous Netlink interface
1852 static sock
*nl_async_sk
; /* BIRD socket for asynchronous notifications */
1853 static byte
*nl_async_rx_buffer
; /* Receive buffer */
1854 static uint nl_async_bufsize
; /* Kernel rx buffer size for the netlink socket */
1855 static struct config
*nl_last_config
; /* For tracking changes to nl_async_bufsize */
1858 nl_async_msg(struct nlmsghdr
*h
)
1860 struct nl_parse_state s
= {
1866 switch (h
->nlmsg_type
)
1870 DBG("KRT: Received async route notification (%d)\n", h
->nlmsg_type
);
1871 nl_parse_route(&s
, h
);
1875 DBG("KRT: Received async link notification (%d)\n", h
->nlmsg_type
);
1877 nl_parse_link(h
, 0);
1881 DBG("KRT: Received async address notification (%d)\n", h
->nlmsg_type
);
1883 nl_parse_addr(h
, 0);
1886 DBG("KRT: Received unknown async notification (%d)\n", h
->nlmsg_type
);
1891 nl_async_hook(sock
*sk
, uint size UNUSED
)
1893 struct iovec iov
= { nl_async_rx_buffer
, NL_RX_SIZE
};
1894 struct sockaddr_nl sa
;
1897 .msg_namelen
= sizeof(sa
),
1905 x
= recvmsg(sk
->fd
, &m
, 0);
1908 if (errno
== ENOBUFS
)
1911 * Netlink reports some packets have been thrown away.
1912 * One day we might react to it by asking for route table
1913 * scan in near future.
1915 log(L_WARN
"Kernel dropped some netlink messages, will resync on next scan.");
1916 return 1; /* More data are likely to be ready */
1918 else if (errno
!= EWOULDBLOCK
)
1919 log(L_ERR
"Netlink recvmsg: %m");
1922 if (sa
.nl_pid
) /* It isn't from the kernel */
1924 DBG("Non-kernel packet\n");
1927 h
= (void *) nl_async_rx_buffer
;
1929 if (m
.msg_flags
& MSG_TRUNC
)
1931 log(L_WARN
"Netlink got truncated asynchronous message");
1934 while (NLMSG_OK(h
, len
))
1937 h
= NLMSG_NEXT(h
, len
);
1940 log(L_WARN
"nl_async_hook: Found packet remnant of size %d", len
);
1945 nl_async_err_hook(sock
*sk
, int e UNUSED
)
1947 nl_async_hook(sk
, 0);
1954 struct sockaddr_nl sa
;
1960 DBG("KRT: Opening async netlink socket\n");
1962 fd
= socket(PF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
1965 log(L_ERR
"Unable to open asynchronous rtnetlink socket: %m");
1969 bzero(&sa
, sizeof(sa
));
1970 sa
.nl_family
= AF_NETLINK
;
1971 sa
.nl_groups
= RTMGRP_LINK
|
1972 RTMGRP_IPV4_IFADDR
| RTMGRP_IPV4_ROUTE
|
1973 RTMGRP_IPV6_IFADDR
| RTMGRP_IPV6_ROUTE
;
1975 if (bind(fd
, (struct sockaddr
*) &sa
, sizeof(sa
)) < 0)
1977 log(L_ERR
"Unable to bind asynchronous rtnetlink socket: %m");
1982 nl_async_rx_buffer
= xmalloc(NL_RX_SIZE
);
1984 sk
= nl_async_sk
= sk_new(krt_pool
);
1985 sk
->type
= SK_MAGIC
;
1986 sk
->rx_hook
= nl_async_hook
;
1987 sk
->err_hook
= nl_async_err_hook
;
1989 if (sk_open(sk
) < 0)
1990 bug("Netlink: sk_open failed");
1994 nl_update_async_bufsize(void)
1996 /* No async socket */
2000 /* Already reconfigured */
2001 if (nl_last_config
== config
)
2004 /* Update netlink buffer size */
2005 uint bufsize
= nl_cfg_rx_buffer_size(config
);
2006 if (bufsize
&& (bufsize
!= nl_async_bufsize
))
2008 /* Log message for reconfigurations only */
2010 log(L_INFO
"KRT: Changing netlink rx buffer size to %u", bufsize
);
2012 nl_set_rcvbuf(nl_async_sk
->fd
, bufsize
);
2013 nl_async_bufsize
= bufsize
;
2016 nl_last_config
= config
;
2021 * Interface to the UNIX krt module
2025 krt_sys_io_init(void)
2027 nl_linpool
= lp_new_default(krt_pool
);
2028 HASH_INIT(nl_table_map
, krt_pool
, 6);
2032 krt_sys_start(struct krt_proto
*p
)
2034 struct krt_proto
*old
= HASH_FIND(nl_table_map
, RTH
, p
->af
, krt_table_id(p
));
2038 log(L_ERR
"%s: Kernel table %u already registered by %s",
2039 p
->p
.name
, krt_table_id(p
), old
->p
.name
);
2043 HASH_INSERT2(nl_table_map
, RTH
, krt_pool
, p
);
2047 nl_update_async_bufsize();
2053 krt_sys_shutdown(struct krt_proto
*p
)
2055 nl_update_async_bufsize();
2057 HASH_REMOVE2(nl_table_map
, RTH
, krt_pool
, p
);
2061 krt_sys_reconfigure(struct krt_proto
*p UNUSED
, struct krt_config
*n
, struct krt_config
*o
)
2063 nl_update_async_bufsize();
2065 return (n
->sys
.table_id
== o
->sys
.table_id
) && (n
->sys
.metric
== o
->sys
.metric
);
2069 krt_sys_init_config(struct krt_config
*cf
)
2071 cf
->sys
.table_id
= RT_TABLE_MAIN
;
2072 cf
->sys
.metric
= 32;
2076 krt_sys_copy_config(struct krt_config
*d
, struct krt_config
*s
)
2078 d
->sys
.table_id
= s
->sys
.table_id
;
2079 d
->sys
.metric
= s
->sys
.metric
;
2082 static const char *krt_metrics_names
[KRT_METRICS_MAX
] = {
2083 NULL
, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
2084 "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
2087 static const char *krt_features_names
[KRT_FEATURES_MAX
] = {
2088 "ecn", NULL
, NULL
, "allfrag"
2092 krt_sys_get_attr(const eattr
*a
, byte
*buf
, int buflen UNUSED
)
2096 case EA_KRT_PREFSRC
:
2097 bsprintf(buf
, "prefsrc");
2101 bsprintf(buf
, "realm");
2105 bsprintf(buf
, "scope");
2109 buf
+= bsprintf(buf
, "lock:");
2110 ea_format_bitfield(a
, buf
, buflen
, krt_metrics_names
, 2, KRT_METRICS_MAX
);
2113 case EA_KRT_FEATURES
:
2114 buf
+= bsprintf(buf
, "features:");
2115 ea_format_bitfield(a
, buf
, buflen
, krt_features_names
, 0, KRT_FEATURES_MAX
);
2119 int id
= (int)EA_ID(a
->id
) - KRT_METRICS_OFFSET
;
2120 if (id
> 0 && id
< KRT_METRICS_MAX
)
2122 bsprintf(buf
, "%s", krt_metrics_names
[id
]);
2133 kif_sys_start(struct kif_proto
*p UNUSED
)
2140 kif_sys_shutdown(struct kif_proto
*p UNUSED
)
2145 kif_update_sysdep_addr(struct iface
*i UNUSED
)