2 * BIRD -- Linux Netlink Interface
4 * (c) 1999--2000 Martin Mares <mj@ucw.cz>
6 * Can be freely distributed and used under the terms of the GNU GPL.
13 #include <sys/socket.h>
19 #include "nest/bird.h"
20 #include "nest/route.h"
21 #include "nest/protocol.h"
22 #include "nest/iface.h"
23 #include "lib/alloca.h"
24 #include "sysdep/unix/unix.h"
25 #include "sysdep/unix/krt.h"
26 #include "lib/socket.h"
27 #include "lib/string.h"
29 #include "conf/conf.h"
31 #include <asm/types.h>
33 #include <linux/netlink.h>
34 #include <linux/rtnetlink.h>
36 #ifdef HAVE_MPLS_KERNEL
37 #include <linux/lwtunnel.h>
40 #ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */
41 #define MSG_TRUNC 0x20
49 #define IFF_LOWER_UP 0x10000
64 #ifndef RTA_ENCAP_TYPE
65 #define RTA_ENCAP_TYPE 21
72 #define krt_ipv4(p) ((p)->af == AF_INET)
73 #define krt_ecmp6(p) ((p)->af == AF_INET6)
75 const int rt_default_ecmp
= 16;
78 * Structure nl_parse_state keeps state of received route processing. Ideally,
79 * we could just independently parse received Netlink messages and immediately
80 * propagate received routes to the rest of BIRD, but older Linux kernel (before
81 * version 4.11) represents and announces IPv6 ECMP routes not as one route with
82 * multiple next hops (like RTA_MULTIPATH in IPv4 ECMP), but as a sequence of
83 * routes with the same prefix. More recent kernels work as with IPv4.
85 * Therefore, BIRD keeps currently processed route in nl_parse_state structure
86 * and postpones its propagation until we expect it to be final; i.e., when
87 * non-matching route is received or when the scan ends. When another matching
88 * route is received, it is merged with the already processed route to form an
89 * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
90 * postponing is done in both cases (for simplicity). All IPv4 routes or IPv6
91 * routes with RTA_MULTIPATH set are just considered non-matching.
93 * This is ignored for asynchronous notifications (every notification is handled
94 * as a separate route). It is not an issue for our routes, as we ignore such
95 * notifications anyways. But importing alien IPv6 ECMP routes does not work
96 * properly with older kernels.
98 * Whatever the kernel version is, IPv6 ECMP routes are sent as multiple routes
99 * for the same prefix.
102 struct nl_parse_state
104 struct linpool
*pool
;
110 struct krt_proto
*proto
;
119 * Synchronous Netlink interface
126 byte
*rx_buffer
; /* Receive buffer */
127 struct nlmsghdr
*last_hdr
; /* Recently received packet */
131 #define NL_RX_SIZE 8192
133 #define NL_OP_DELETE 0
134 #define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
135 #define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE)
136 #define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND)
138 static linpool
*nl_linpool
;
140 static struct nl_sock nl_scan
= {.fd
= -1}; /* Netlink socket for synchronous scan */
141 static struct nl_sock nl_req
= {.fd
= -1}; /* Netlink socket for requests */
144 nl_open_sock(struct nl_sock
*nl
)
148 nl
->fd
= socket(PF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
150 die("Unable to open rtnetlink socket: %m");
151 nl
->seq
= (u32
) (current_time() TO_S
); /* Or perhaps random_u32() ? */
152 nl
->rx_buffer
= xmalloc(NL_RX_SIZE
);
161 nl_open_sock(&nl_scan
);
162 nl_open_sock(&nl_req
);
166 nl_send(struct nl_sock
*nl
, struct nlmsghdr
*nh
)
168 struct sockaddr_nl sa
;
170 memset(&sa
, 0, sizeof(sa
));
171 sa
.nl_family
= AF_NETLINK
;
173 nh
->nlmsg_seq
= ++(nl
->seq
);
174 if (sendto(nl
->fd
, nh
, nh
->nlmsg_len
, 0, (struct sockaddr
*)&sa
, sizeof(sa
)) < 0)
175 die("rtnetlink sendto: %m");
180 nl_request_dump(int af
, int cmd
)
186 .nh
.nlmsg_type
= cmd
,
187 .nh
.nlmsg_len
= sizeof(req
),
188 .nh
.nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
,
191 nl_send(&nl_scan
, &req
.nh
);
194 static struct nlmsghdr
*
195 nl_get_reply(struct nl_sock
*nl
)
201 struct iovec iov
= { nl
->rx_buffer
, NL_RX_SIZE
};
202 struct sockaddr_nl sa
;
205 .msg_namelen
= sizeof(sa
),
209 int x
= recvmsg(nl
->fd
, &m
, 0);
211 die("nl_get_reply: %m");
212 if (sa
.nl_pid
) /* It isn't from the kernel */
214 DBG("Non-kernel packet\n");
218 nl
->last_hdr
= (void *) nl
->rx_buffer
;
219 if (m
.msg_flags
& MSG_TRUNC
)
220 bug("nl_get_reply: got truncated reply which should be impossible");
222 if (NLMSG_OK(nl
->last_hdr
, nl
->last_size
))
224 struct nlmsghdr
*h
= nl
->last_hdr
;
225 nl
->last_hdr
= NLMSG_NEXT(h
, nl
->last_size
);
226 if (h
->nlmsg_seq
!= nl
->seq
)
228 log(L_WARN
"nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
229 h
->nlmsg_seq
, nl
->seq
);
235 log(L_WARN
"nl_get_reply: Found packet remnant of size %d", nl
->last_size
);
240 static struct tbf rl_netlink_err
= TBF_DEFAULT_LOG_LIMITS
;
243 nl_error(struct nlmsghdr
*h
, int ignore_esrch
)
248 if (h
->nlmsg_len
< NLMSG_LENGTH(sizeof(struct nlmsgerr
)))
250 log(L_WARN
"Netlink: Truncated error message received");
253 e
= (struct nlmsgerr
*) NLMSG_DATA(h
);
255 if (ec
&& !(ignore_esrch
&& (ec
== ESRCH
)))
256 log_rl(&rl_netlink_err
, L_WARN
"Netlink: %s", strerror(ec
));
260 static struct nlmsghdr
*
263 struct nlmsghdr
*h
= nl_get_reply(&nl_scan
);
265 if (h
->nlmsg_type
== NLMSG_DONE
)
267 if (h
->nlmsg_type
== NLMSG_ERROR
)
276 nl_exchange(struct nlmsghdr
*pkt
, int ignore_esrch
)
280 nl_send(&nl_req
, pkt
);
283 h
= nl_get_reply(&nl_req
);
284 if (h
->nlmsg_type
== NLMSG_ERROR
)
286 log(L_WARN
"nl_exchange: Unexpected reply received");
288 return nl_error(h
, ignore_esrch
) ? -1 : 0;
295 static int nl_attr_len
;
298 nl_checkin(struct nlmsghdr
*h
, int lsize
)
300 nl_attr_len
= h
->nlmsg_len
- NLMSG_LENGTH(lsize
);
303 log(L_ERR
"nl_checkin: underrun by %d bytes", -nl_attr_len
);
306 return NLMSG_DATA(h
);
309 struct nl_want_attrs
{
316 #define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
318 static struct nl_want_attrs ifla_attr_want
[BIRD_IFLA_MAX
] = {
319 [IFLA_IFNAME
] = { 1, 0, 0 },
320 [IFLA_MTU
] = { 1, 1, sizeof(u32
) },
321 [IFLA_MASTER
] = { 1, 1, sizeof(u32
) },
322 [IFLA_WIRELESS
] = { 1, 0, 0 },
326 #define BIRD_IFA_MAX (IFA_FLAGS+1)
328 static struct nl_want_attrs ifa_attr_want4
[BIRD_IFA_MAX
] = {
329 [IFA_ADDRESS
] = { 1, 1, sizeof(ip4_addr
) },
330 [IFA_LOCAL
] = { 1, 1, sizeof(ip4_addr
) },
331 [IFA_BROADCAST
] = { 1, 1, sizeof(ip4_addr
) },
332 [IFA_FLAGS
] = { 1, 1, sizeof(u32
) },
335 static struct nl_want_attrs ifa_attr_want6
[BIRD_IFA_MAX
] = {
336 [IFA_ADDRESS
] = { 1, 1, sizeof(ip6_addr
) },
337 [IFA_LOCAL
] = { 1, 1, sizeof(ip6_addr
) },
338 [IFA_FLAGS
] = { 1, 1, sizeof(u32
) },
342 #define BIRD_RTA_MAX (RTA_ENCAP+1)
344 static struct nl_want_attrs nexthop_attr_want4
[BIRD_RTA_MAX
] = {
345 [RTA_GATEWAY
] = { 1, 1, sizeof(ip4_addr
) },
346 [RTA_ENCAP_TYPE
]= { 1, 1, sizeof(u16
) },
347 [RTA_ENCAP
] = { 1, 0, 0 },
350 static struct nl_want_attrs nexthop_attr_want6
[BIRD_RTA_MAX
] = {
351 [RTA_GATEWAY
] = { 1, 1, sizeof(ip6_addr
) },
352 [RTA_ENCAP_TYPE
]= { 1, 1, sizeof(u16
) },
353 [RTA_ENCAP
] = { 1, 0, 0 },
356 #ifdef HAVE_MPLS_KERNEL
357 static struct nl_want_attrs encap_mpls_want
[BIRD_RTA_MAX
] = {
358 [RTA_DST
] = { 1, 0, 0 },
362 static struct nl_want_attrs rtm_attr_want4
[BIRD_RTA_MAX
] = {
363 [RTA_DST
] = { 1, 1, sizeof(ip4_addr
) },
364 [RTA_OIF
] = { 1, 1, sizeof(u32
) },
365 [RTA_GATEWAY
] = { 1, 1, sizeof(ip4_addr
) },
366 [RTA_PRIORITY
] = { 1, 1, sizeof(u32
) },
367 [RTA_PREFSRC
] = { 1, 1, sizeof(ip4_addr
) },
368 [RTA_METRICS
] = { 1, 0, 0 },
369 [RTA_MULTIPATH
] = { 1, 0, 0 },
370 [RTA_FLOW
] = { 1, 1, sizeof(u32
) },
371 [RTA_TABLE
] = { 1, 1, sizeof(u32
) },
372 [RTA_ENCAP_TYPE
]= { 1, 1, sizeof(u16
) },
373 [RTA_ENCAP
] = { 1, 0, 0 },
376 static struct nl_want_attrs rtm_attr_want6
[BIRD_RTA_MAX
] = {
377 [RTA_DST
] = { 1, 1, sizeof(ip6_addr
) },
378 [RTA_SRC
] = { 1, 1, sizeof(ip6_addr
) },
379 [RTA_IIF
] = { 1, 1, sizeof(u32
) },
380 [RTA_OIF
] = { 1, 1, sizeof(u32
) },
381 [RTA_GATEWAY
] = { 1, 1, sizeof(ip6_addr
) },
382 [RTA_PRIORITY
] = { 1, 1, sizeof(u32
) },
383 [RTA_PREFSRC
] = { 1, 1, sizeof(ip6_addr
) },
384 [RTA_METRICS
] = { 1, 0, 0 },
385 [RTA_MULTIPATH
] = { 1, 0, 0 },
386 [RTA_FLOW
] = { 1, 1, sizeof(u32
) },
387 [RTA_TABLE
] = { 1, 1, sizeof(u32
) },
388 [RTA_ENCAP_TYPE
]= { 1, 1, sizeof(u16
) },
389 [RTA_ENCAP
] = { 1, 0, 0 },
392 #ifdef HAVE_MPLS_KERNEL
393 static struct nl_want_attrs rtm_attr_want_mpls
[BIRD_RTA_MAX
] = {
394 [RTA_DST
] = { 1, 1, sizeof(u32
) },
395 [RTA_IIF
] = { 1, 1, sizeof(u32
) },
396 [RTA_OIF
] = { 1, 1, sizeof(u32
) },
397 [RTA_PRIORITY
] = { 1, 1, sizeof(u32
) },
398 [RTA_METRICS
] = { 1, 0, 0 },
399 [RTA_FLOW
] = { 1, 1, sizeof(u32
) },
400 [RTA_TABLE
] = { 1, 1, sizeof(u32
) },
401 [RTA_VIA
] = { 1, 0, 0 },
402 [RTA_NEWDST
] = { 1, 0, 0 },
408 nl_parse_attrs(struct rtattr
*a
, struct nl_want_attrs
*want
, struct rtattr
**k
, int ksize
)
410 int max
= ksize
/ sizeof(struct rtattr
*);
413 for ( ; RTA_OK(a
, nl_attr_len
); a
= RTA_NEXT(a
, nl_attr_len
))
415 if ((a
->rta_type
>= max
) || !want
[a
->rta_type
].defined
)
418 if (want
[a
->rta_type
].checksize
&& (RTA_PAYLOAD(a
) != want
[a
->rta_type
].size
))
420 log(L_ERR
"nl_parse_attrs: Malformed attribute received");
429 log(L_ERR
"nl_parse_attrs: remnant of size %d", nl_attr_len
);
436 static inline u16
rta_get_u16(struct rtattr
*a
)
437 { return *(u16
*) RTA_DATA(a
); }
439 static inline u32
rta_get_u32(struct rtattr
*a
)
440 { return *(u32
*) RTA_DATA(a
); }
442 static inline ip4_addr
rta_get_ip4(struct rtattr
*a
)
443 { return ip4_ntoh(*(ip4_addr
*) RTA_DATA(a
)); }
445 static inline ip6_addr
rta_get_ip6(struct rtattr
*a
)
446 { return ip6_ntoh(*(ip6_addr
*) RTA_DATA(a
)); }
448 static inline ip_addr
rta_get_ipa(struct rtattr
*a
)
450 if (RTA_PAYLOAD(a
) == sizeof(ip4_addr
))
451 return ipa_from_ip4(rta_get_ip4(a
));
453 return ipa_from_ip6(rta_get_ip6(a
));
456 #ifdef HAVE_MPLS_KERNEL
457 static inline ip_addr
rta_get_via(struct rtattr
*a
)
459 struct rtvia
*v
= RTA_DATA(a
);
460 switch(v
->rtvia_family
) {
461 case AF_INET
: return ipa_from_ip4(ip4_ntoh(*(ip4_addr
*) v
->rtvia_addr
));
462 case AF_INET6
: return ipa_from_ip6(ip6_ntoh(*(ip6_addr
*) v
->rtvia_addr
));
467 static u32 rta_mpls_stack
[MPLS_MAX_LABEL_STACK
];
468 static inline int rta_get_mpls(struct rtattr
*a
, u32
*stack
)
470 if (RTA_PAYLOAD(a
) % 4)
471 log(L_WARN
"KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a
));
473 return mpls_get(RTA_DATA(a
), RTA_PAYLOAD(a
) & ~0x3, stack
);
478 nl_add_attr(struct nlmsghdr
*h
, uint bufsize
, uint code
, const void *data
, uint dlen
)
480 uint pos
= NLMSG_ALIGN(h
->nlmsg_len
);
481 uint len
= RTA_LENGTH(dlen
);
483 if (pos
+ len
> bufsize
)
484 bug("nl_add_attr: packet buffer overflow");
486 struct rtattr
*a
= (struct rtattr
*)((char *)h
+ pos
);
489 h
->nlmsg_len
= pos
+ len
;
492 memcpy(RTA_DATA(a
), data
, dlen
);
497 static inline struct rtattr
*
498 nl_open_attr(struct nlmsghdr
*h
, uint bufsize
, uint code
)
500 return nl_add_attr(h
, bufsize
, code
, NULL
, 0);
504 nl_close_attr(struct nlmsghdr
*h
, struct rtattr
*a
)
506 a
->rta_len
= (void *)h
+ NLMSG_ALIGN(h
->nlmsg_len
) - (void *)a
;
510 nl_add_attr_u16(struct nlmsghdr
*h
, uint bufsize
, int code
, u16 data
)
512 nl_add_attr(h
, bufsize
, code
, &data
, 2);
516 nl_add_attr_u32(struct nlmsghdr
*h
, uint bufsize
, int code
, u32 data
)
518 nl_add_attr(h
, bufsize
, code
, &data
, 4);
522 nl_add_attr_ip4(struct nlmsghdr
*h
, uint bufsize
, int code
, ip4_addr ip4
)
525 nl_add_attr(h
, bufsize
, code
, &ip4
, sizeof(ip4
));
529 nl_add_attr_ip6(struct nlmsghdr
*h
, uint bufsize
, int code
, ip6_addr ip6
)
532 nl_add_attr(h
, bufsize
, code
, &ip6
, sizeof(ip6
));
536 nl_add_attr_ipa(struct nlmsghdr
*h
, uint bufsize
, int code
, ip_addr ipa
)
539 nl_add_attr_ip4(h
, bufsize
, code
, ipa_to_ip4(ipa
));
541 nl_add_attr_ip6(h
, bufsize
, code
, ipa_to_ip6(ipa
));
544 #ifdef HAVE_MPLS_KERNEL
546 nl_add_attr_mpls(struct nlmsghdr
*h
, uint bufsize
, int code
, int len
, u32
*stack
)
549 mpls_put(buf
, len
, stack
);
550 nl_add_attr(h
, bufsize
, code
, buf
, len
*4);
554 nl_add_attr_mpls_encap(struct nlmsghdr
*h
, uint bufsize
, int len
, u32
*stack
)
556 nl_add_attr_u16(h
, bufsize
, RTA_ENCAP_TYPE
, LWTUNNEL_ENCAP_MPLS
);
558 struct rtattr
*nest
= nl_open_attr(h
, bufsize
, RTA_ENCAP
);
559 nl_add_attr_mpls(h
, bufsize
, RTA_DST
, len
, stack
);
560 nl_close_attr(h
, nest
);
564 nl_add_attr_via(struct nlmsghdr
*h
, uint bufsize
, ip_addr ipa
)
566 struct rtvia
*via
= alloca(sizeof(struct rtvia
) + 16);
570 via
->rtvia_family
= AF_INET
;
571 put_ip4(via
->rtvia_addr
, ipa_to_ip4(ipa
));
572 nl_add_attr(h
, bufsize
, RTA_VIA
, via
, sizeof(struct rtvia
) + 4);
576 via
->rtvia_family
= AF_INET6
;
577 put_ip6(via
->rtvia_addr
, ipa_to_ip6(ipa
));
578 nl_add_attr(h
, bufsize
, RTA_VIA
, via
, sizeof(struct rtvia
) + 16);
583 static inline struct rtnexthop
*
584 nl_open_nexthop(struct nlmsghdr
*h
, uint bufsize
)
586 uint pos
= NLMSG_ALIGN(h
->nlmsg_len
);
587 uint len
= RTNH_LENGTH(0);
589 if (pos
+ len
> bufsize
)
590 bug("nl_open_nexthop: packet buffer overflow");
592 h
->nlmsg_len
= pos
+ len
;
594 return (void *)h
+ pos
;
598 nl_close_nexthop(struct nlmsghdr
*h
, struct rtnexthop
*nh
)
600 nh
->rtnh_len
= (void *)h
+ NLMSG_ALIGN(h
->nlmsg_len
) - (void *)nh
;
604 nl_add_nexthop(struct nlmsghdr
*h
, uint bufsize
, struct nexthop
*nh
, int af UNUSED
)
606 #ifdef HAVE_MPLS_KERNEL
609 nl_add_attr_mpls(h
, bufsize
, RTA_NEWDST
, nh
->labels
, nh
->label
);
611 nl_add_attr_mpls_encap(h
, bufsize
, nh
->labels
, nh
->label
);
613 if (ipa_nonzero(nh
->gw
))
615 nl_add_attr_via(h
, bufsize
, nh
->gw
);
617 nl_add_attr_ipa(h
, bufsize
, RTA_GATEWAY
, nh
->gw
);
620 if (ipa_nonzero(nh
->gw
))
621 nl_add_attr_ipa(h
, bufsize
, RTA_GATEWAY
, nh
->gw
);
626 nl_add_multipath(struct nlmsghdr
*h
, uint bufsize
, struct nexthop
*nh
, int af
)
628 struct rtattr
*a
= nl_open_attr(h
, bufsize
, RTA_MULTIPATH
);
630 for (; nh
; nh
= nh
->next
)
632 struct rtnexthop
*rtnh
= nl_open_nexthop(h
, bufsize
);
634 rtnh
->rtnh_flags
= 0;
635 rtnh
->rtnh_hops
= nh
->weight
;
636 rtnh
->rtnh_ifindex
= nh
->iface
->index
;
638 nl_add_nexthop(h
, bufsize
, nh
, af
);
640 if (nh
->flags
& RNF_ONLINK
)
641 rtnh
->rtnh_flags
|= RTNH_F_ONLINK
;
643 nl_close_nexthop(h
, rtnh
);
649 static struct nexthop
*
650 nl_parse_multipath(struct nl_parse_state
*s
, struct krt_proto
*p
, struct rtattr
*ra
, int af
)
652 struct rtattr
*a
[BIRD_RTA_MAX
];
653 struct rtnexthop
*nh
= RTA_DATA(ra
);
654 struct nexthop
*rv
, *first
, **last
;
655 unsigned len
= RTA_PAYLOAD(ra
);
662 /* Use RTNH_OK(nh,len) ?? */
663 if ((len
< sizeof(*nh
)) || (len
< nh
->rtnh_len
))
666 *last
= rv
= lp_allocz(s
->pool
, NEXTHOP_MAX_SIZE
);
669 rv
->weight
= nh
->rtnh_hops
;
670 rv
->iface
= if_find_by_index(nh
->rtnh_ifindex
);
674 /* Nonexistent RTNH_PAYLOAD ?? */
675 nl_attr_len
= nh
->rtnh_len
- RTNH_LENGTH(0);
679 if (!nl_parse_attrs(RTNH_DATA(nh
), nexthop_attr_want4
, a
, sizeof(a
)))
684 if (!nl_parse_attrs(RTNH_DATA(nh
), nexthop_attr_want6
, a
, sizeof(a
)))
694 rv
->gw
= rta_get_ipa(a
[RTA_GATEWAY
]);
696 if (nh
->rtnh_flags
& RTNH_F_ONLINK
)
697 rv
->flags
|= RNF_ONLINK
;
700 nbr
= neigh_find(&p
->p
, rv
->gw
, rv
->iface
,
701 (rv
->flags
& RNF_ONLINK
) ? NEF_ONLINK
: 0);
702 if (!nbr
|| (nbr
->scope
== SCOPE_HOST
))
708 #ifdef HAVE_MPLS_KERNEL
709 if (a
[RTA_ENCAP_TYPE
])
711 if (rta_get_u16(a
[RTA_ENCAP_TYPE
]) != LWTUNNEL_ENCAP_MPLS
) {
712 log(L_WARN
"KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a
[RTA_ENCAP_TYPE
]));
716 struct rtattr
*enca
[BIRD_RTA_MAX
];
717 nl_attr_len
= RTA_PAYLOAD(a
[RTA_ENCAP
]);
718 nl_parse_attrs(RTA_DATA(a
[RTA_ENCAP
]), encap_mpls_want
, enca
, sizeof(enca
));
719 rv
->labels
= rta_get_mpls(enca
[RTA_DST
], rv
->label
);
725 len
-= NLMSG_ALIGN(nh
->rtnh_len
);
729 /* Ensure nexthops are sorted to satisfy nest invariant */
730 if (!nexthop_is_sorted(first
))
731 first
= nexthop_sort(first
);
737 nl_add_metrics(struct nlmsghdr
*h
, uint bufsize
, u32
*metrics
, int max
)
739 struct rtattr
*a
= nl_open_attr(h
, bufsize
, RTA_METRICS
);
742 for (t
= 1; t
< max
; t
++)
743 if (metrics
[0] & (1 << t
))
744 nl_add_attr_u32(h
, bufsize
, t
, metrics
[t
]);
750 nl_parse_metrics(struct rtattr
*hdr
, u32
*metrics
, int max
)
752 struct rtattr
*a
= RTA_DATA(hdr
);
753 int len
= RTA_PAYLOAD(hdr
);
756 for (; RTA_OK(a
, len
); a
= RTA_NEXT(a
, len
))
758 if (a
->rta_type
== RTA_UNSPEC
)
761 if (a
->rta_type
>= max
)
764 if (RTA_PAYLOAD(a
) != 4)
767 metrics
[0] |= 1 << a
->rta_type
;
768 metrics
[a
->rta_type
] = rta_get_u32(a
);
779 * Scanning of interfaces
783 nl_parse_link(struct nlmsghdr
*h
, int scan
)
786 struct rtattr
*a
[BIRD_IFLA_MAX
];
787 int new = h
->nlmsg_type
== RTM_NEWLINK
;
794 if (!(i
= nl_checkin(h
, sizeof(*i
))) || !nl_parse_attrs(IFLA_RTA(i
), ifla_attr_want
, a
, sizeof(a
)))
796 if (!a
[IFLA_IFNAME
] || (RTA_PAYLOAD(a
[IFLA_IFNAME
]) < 2) || !a
[IFLA_MTU
])
799 * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
800 * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
801 * We simply ignore all such messages with IFLA_WIRELESS without notice.
804 if (a
[IFLA_WIRELESS
])
807 log(L_ERR
"KIF: Malformed message received");
811 name
= RTA_DATA(a
[IFLA_IFNAME
]);
812 mtu
= rta_get_u32(a
[IFLA_MTU
]);
815 master
= rta_get_u32(a
[IFLA_MASTER
]);
817 ifi
= if_find_by_index(i
->ifi_index
);
820 DBG("KIF: IF%d(%s) goes down\n", i
->ifi_index
, name
);
828 DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i
->ifi_index
, name
, mtu
, i
->ifi_flags
);
829 if (ifi
&& strncmp(ifi
->name
, name
, sizeof(ifi
->name
)-1))
832 strncpy(f
.name
, name
, sizeof(f
.name
)-1);
833 f
.index
= i
->ifi_index
;
836 f
.master_index
= master
;
837 f
.master
= if_find_by_index(master
);
841 f
.flags
|= IF_ADMIN_UP
;
842 if (fl
& IFF_LOWER_UP
)
843 f
.flags
|= IF_LINK_UP
;
844 if (fl
& IFF_LOOPBACK
) /* Loopback */
845 f
.flags
|= IF_MULTIACCESS
| IF_LOOPBACK
| IF_IGNORE
;
846 else if (fl
& IFF_POINTOPOINT
) /* PtP */
847 f
.flags
|= IF_MULTICAST
;
848 else if (fl
& IFF_BROADCAST
) /* Broadcast */
849 f
.flags
|= IF_MULTIACCESS
| IF_BROADCAST
| IF_MULTICAST
;
851 f
.flags
|= IF_MULTIACCESS
; /* NBMA */
853 if (fl
& IFF_MULTICAST
)
854 f
.flags
|= IF_MULTICAST
;
859 if_end_partial_update(ifi
);
864 nl_parse_addr4(struct ifaddrmsg
*i
, int scan
, int new)
866 struct rtattr
*a
[BIRD_IFA_MAX
];
871 if (!nl_parse_attrs(IFA_RTA(i
), ifa_attr_want4
, a
, sizeof(a
)))
876 log(L_ERR
"KIF: Malformed message received (missing IFA_LOCAL)");
881 log(L_ERR
"KIF: Malformed message received (missing IFA_ADDRESS)");
885 ifi
= if_find_by_index(i
->ifa_index
);
888 log(L_ERR
"KIF: Received address message for unknown interface %d", i
->ifa_index
);
893 ifa_flags
= rta_get_u32(a
[IFA_FLAGS
]);
895 ifa_flags
= i
->ifa_flags
;
898 bzero(&ifa
, sizeof(ifa
));
900 if (ifa_flags
& IFA_F_SECONDARY
)
901 ifa
.flags
|= IA_SECONDARY
;
903 ifa
.ip
= rta_get_ipa(a
[IFA_LOCAL
]);
905 if (i
->ifa_prefixlen
> IP4_MAX_PREFIX_LENGTH
)
907 log(L_ERR
"KIF: Invalid prefix length for interface %s: %d", ifi
->name
, i
->ifa_prefixlen
);
910 if (i
->ifa_prefixlen
== IP4_MAX_PREFIX_LENGTH
)
912 ifa
.brd
= rta_get_ipa(a
[IFA_ADDRESS
]);
913 net_fill_ip4(&ifa
.prefix
, rta_get_ip4(a
[IFA_ADDRESS
]), i
->ifa_prefixlen
);
915 /* It is either a host address or a peer address */
916 if (ipa_equal(ifa
.ip
, ifa
.brd
))
917 ifa
.flags
|= IA_HOST
;
920 ifa
.flags
|= IA_PEER
;
921 ifa
.opposite
= ifa
.brd
;
926 net_fill_ip4(&ifa
.prefix
, ipa_to_ip4(ifa
.ip
), i
->ifa_prefixlen
);
927 net_normalize(&ifa
.prefix
);
929 if (i
->ifa_prefixlen
== IP4_MAX_PREFIX_LENGTH
- 1)
930 ifa
.opposite
= ipa_opposite_m1(ifa
.ip
);
932 if (i
->ifa_prefixlen
== IP4_MAX_PREFIX_LENGTH
- 2)
933 ifa
.opposite
= ipa_opposite_m2(ifa
.ip
);
935 if ((ifi
->flags
& IF_BROADCAST
) && a
[IFA_BROADCAST
])
937 ip4_addr xbrd
= rta_get_ip4(a
[IFA_BROADCAST
]);
938 ip4_addr ybrd
= ip4_or(ipa_to_ip4(ifa
.ip
), ip4_not(ip4_mkmask(i
->ifa_prefixlen
)));
940 if (ip4_equal(xbrd
, net4_prefix(&ifa
.prefix
)) || ip4_equal(xbrd
, ybrd
))
941 ifa
.brd
= ipa_from_ip4(xbrd
);
942 else if (ifi
->flags
& IF_TMP_DOWN
) /* Complain only during the first scan */
944 log(L_ERR
"KIF: Invalid broadcast address %I4 for %s", xbrd
, ifi
->name
);
945 ifa
.brd
= ipa_from_ip4(ybrd
);
950 scope
= ipa_classify(ifa
.ip
);
953 log(L_ERR
"KIF: Invalid interface address %I for %s", ifa
.ip
, ifi
->name
);
956 ifa
.scope
= scope
& IADDR_SCOPE_MASK
;
958 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
959 ifi
->index
, ifi
->name
,
960 new ? "added" : "removed",
961 ifa
.ip
, ifa
.flags
, &ifa
.prefix
, ifa
.brd
, ifa
.opposite
);
969 if_end_partial_update(ifi
);
973 nl_parse_addr6(struct ifaddrmsg
*i
, int scan
, int new)
975 struct rtattr
*a
[BIRD_IFA_MAX
];
980 if (!nl_parse_attrs(IFA_RTA(i
), ifa_attr_want6
, a
, sizeof(a
)))
985 log(L_ERR
"KIF: Malformed message received (missing IFA_ADDRESS)");
989 ifi
= if_find_by_index(i
->ifa_index
);
992 log(L_ERR
"KIF: Received address message for unknown interface %d", i
->ifa_index
);
997 ifa_flags
= rta_get_u32(a
[IFA_FLAGS
]);
999 ifa_flags
= i
->ifa_flags
;
1002 bzero(&ifa
, sizeof(ifa
));
1004 if (ifa_flags
& IFA_F_SECONDARY
)
1005 ifa
.flags
|= IA_SECONDARY
;
1007 /* Ignore tentative addresses silently */
1008 if (ifa_flags
& IFA_F_TENTATIVE
)
1011 /* IFA_LOCAL can be unset for IPv6 interfaces */
1012 ifa
.ip
= rta_get_ipa(a
[IFA_LOCAL
] ? : a
[IFA_ADDRESS
]);
1014 if (i
->ifa_prefixlen
> IP6_MAX_PREFIX_LENGTH
)
1016 log(L_ERR
"KIF: Invalid prefix length for interface %s: %d", ifi
->name
, i
->ifa_prefixlen
);
1019 if (i
->ifa_prefixlen
== IP6_MAX_PREFIX_LENGTH
)
1021 ifa
.brd
= rta_get_ipa(a
[IFA_ADDRESS
]);
1022 net_fill_ip6(&ifa
.prefix
, rta_get_ip6(a
[IFA_ADDRESS
]), i
->ifa_prefixlen
);
1024 /* It is either a host address or a peer address */
1025 if (ipa_equal(ifa
.ip
, ifa
.brd
))
1026 ifa
.flags
|= IA_HOST
;
1029 ifa
.flags
|= IA_PEER
;
1030 ifa
.opposite
= ifa
.brd
;
1035 net_fill_ip6(&ifa
.prefix
, ipa_to_ip6(ifa
.ip
), i
->ifa_prefixlen
);
1036 net_normalize(&ifa
.prefix
);
1038 if (i
->ifa_prefixlen
== IP6_MAX_PREFIX_LENGTH
- 1)
1039 ifa
.opposite
= ipa_opposite_m1(ifa
.ip
);
1042 scope
= ipa_classify(ifa
.ip
);
1045 log(L_ERR
"KIF: Invalid interface address %I for %s", ifa
.ip
, ifi
->name
);
1048 ifa
.scope
= scope
& IADDR_SCOPE_MASK
;
1050 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
1051 ifi
->index
, ifi
->name
,
1052 new ? "added" : "removed",
1053 ifa
.ip
, ifa
.flags
, &ifa
.prefix
, ifa
.brd
, ifa
.opposite
);
1061 if_end_partial_update(ifi
);
1065 nl_parse_addr(struct nlmsghdr
*h
, int scan
)
1067 struct ifaddrmsg
*i
;
1069 if (!(i
= nl_checkin(h
, sizeof(*i
))))
1072 int new = (h
->nlmsg_type
== RTM_NEWADDR
);
1074 switch (i
->ifa_family
)
1077 return nl_parse_addr4(i
, scan
, new);
1080 return nl_parse_addr6(i
, scan
, new);
1085 kif_do_scan(struct kif_proto
*p UNUSED
)
1091 nl_request_dump(AF_UNSPEC
, RTM_GETLINK
);
1092 while (h
= nl_get_scan())
1093 if (h
->nlmsg_type
== RTM_NEWLINK
|| h
->nlmsg_type
== RTM_DELLINK
)
1094 nl_parse_link(h
, 1);
1096 log(L_DEBUG
"nl_scan_ifaces: Unknown packet received (type=%d)", h
->nlmsg_type
);
1098 /* Re-resolve master interface for slaves */
1100 WALK_LIST(i
, iface_list
)
1101 if (i
->master_index
)
1107 .master_index
= i
->master_index
,
1108 .master
= if_find_by_index(i
->master_index
)
1111 if (f
.master
!= i
->master
)
1113 memcpy(f
.name
, i
->name
, sizeof(f
.name
));
1118 nl_request_dump(AF_INET
, RTM_GETADDR
);
1119 while (h
= nl_get_scan())
1120 if (h
->nlmsg_type
== RTM_NEWADDR
|| h
->nlmsg_type
== RTM_DELADDR
)
1121 nl_parse_addr(h
, 1);
1123 log(L_DEBUG
"nl_scan_ifaces: Unknown packet received (type=%d)", h
->nlmsg_type
);
1125 nl_request_dump(AF_INET6
, RTM_GETADDR
);
1126 while (h
= nl_get_scan())
1127 if (h
->nlmsg_type
== RTM_NEWADDR
|| h
->nlmsg_type
== RTM_DELADDR
)
1128 nl_parse_addr(h
, 1);
1130 log(L_DEBUG
"nl_scan_ifaces: Unknown packet received (type=%d)", h
->nlmsg_type
);
1140 krt_table_id(struct krt_proto
*p
)
1142 return KRT_CF
->sys
.table_id
;
1145 static HASH(struct krt_proto
) nl_table_map
;
1147 #define RTH_KEY(p) p->af, krt_table_id(p)
1148 #define RTH_NEXT(p) p->sys.hash_next
1149 #define RTH_EQ(a1,i1,a2,i2) a1 == a2 && i1 == i2
1150 #define RTH_FN(a,i) a ^ u32_hash(i)
1152 #define RTH_REHASH rth_rehash
1153 #define RTH_PARAMS /8, *2, 2, 2, 6, 20
1155 HASH_DEFINE_REHASH_FN(RTH
, struct krt_proto
)
1166 case RTD_UNREACHABLE
:
1176 nh_bufsize(struct nexthop
*nh
)
1179 for (; nh
!= NULL
; nh
= nh
->next
)
1180 rv
+= RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr
)));
1185 nl_send_route(struct krt_proto
*p
, rte
*e
, int op
, int dest
, struct nexthop
*nh
)
1190 ea_list
*eattrs
= a
->eattrs
;
1191 int bufsize
= 128 + KRT_METRICS_MAX
*8 + nh_bufsize(&(a
->nh
));
1200 int rsize
= sizeof(*r
) + bufsize
;
1203 DBG("nl_send_route(%N,op=%x)\n", net
->n
.addr
, op
);
1205 bzero(&r
->h
, sizeof(r
->h
));
1206 bzero(&r
->r
, sizeof(r
->r
));
1207 r
->h
.nlmsg_type
= op
? RTM_NEWROUTE
: RTM_DELROUTE
;
1208 r
->h
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
1209 r
->h
.nlmsg_flags
= op
| NLM_F_REQUEST
| NLM_F_ACK
;
1211 r
->r
.rtm_family
= p
->af
;
1212 r
->r
.rtm_dst_len
= net_pxlen(net
->n
.addr
);
1213 r
->r
.rtm_protocol
= RTPROT_BIRD
;
1214 r
->r
.rtm_scope
= RT_SCOPE_NOWHERE
;
1215 #ifdef HAVE_MPLS_KERNEL
1216 if (p
->af
== AF_MPLS
)
1219 * Kernel MPLS code is a bit picky. We must:
1220 * 1) Always set RT_SCOPE_UNIVERSE and RTN_UNICAST (even for RTM_DELROUTE)
1221 * 2) Never use RTA_PRIORITY
1224 u32 label
= net_mpls(net
->n
.addr
);
1225 nl_add_attr_mpls(&r
->h
, rsize
, RTA_DST
, 1, &label
);
1226 r
->r
.rtm_scope
= RT_SCOPE_UNIVERSE
;
1227 r
->r
.rtm_type
= RTN_UNICAST
;
1232 nl_add_attr_ipa(&r
->h
, rsize
, RTA_DST
, net_prefix(net
->n
.addr
));
1234 /* Add source address for IPv6 SADR routes */
1235 if (net
->n
.addr
->type
== NET_IP6_SADR
)
1237 net_addr_ip6_sadr
*a
= (void *) &net
->n
.addr
;
1238 nl_add_attr_ip6(&r
->h
, rsize
, RTA_SRC
, a
->src_prefix
);
1239 r
->r
.rtm_src_len
= a
->src_pxlen
;
1244 * Strange behavior for RTM_DELROUTE:
1245 * 1) rtm_family is ignored in IPv6, works for IPv4
1246 * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
1247 * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
1250 if (krt_table_id(p
) < 256)
1251 r
->r
.rtm_table
= krt_table_id(p
);
1253 nl_add_attr_u32(&r
->h
, rsize
, RTA_TABLE
, krt_table_id(p
));
1255 if (p
->af
== AF_MPLS
)
1257 else if (a
->source
== RTS_DUMMY
)
1258 priority
= e
->u
.krt
.metric
;
1259 else if (KRT_CF
->sys
.metric
)
1260 priority
= KRT_CF
->sys
.metric
;
1261 else if ((op
!= NL_OP_DELETE
) && (ea
= ea_find(eattrs
, EA_KRT_METRIC
)))
1262 priority
= ea
->u
.data
;
1265 nl_add_attr_u32(&r
->h
, rsize
, RTA_PRIORITY
, priority
);
1267 /* For route delete, we do not specify remaining route attributes */
1268 if (op
== NL_OP_DELETE
)
1271 /* Default scope is LINK for device routes, UNIVERSE otherwise */
1272 if (p
->af
== AF_MPLS
)
1273 r
->r
.rtm_scope
= RT_SCOPE_UNIVERSE
;
1274 else if (ea
= ea_find(eattrs
, EA_KRT_SCOPE
))
1275 r
->r
.rtm_scope
= ea
->u
.data
;
1277 r
->r
.rtm_scope
= (dest
== RTD_UNICAST
&& ipa_zero(nh
->gw
)) ? RT_SCOPE_LINK
: RT_SCOPE_UNIVERSE
;
1279 if (ea
= ea_find(eattrs
, EA_KRT_PREFSRC
))
1280 nl_add_attr_ipa(&r
->h
, rsize
, RTA_PREFSRC
, *(ip_addr
*)ea
->u
.ptr
->data
);
1282 if (ea
= ea_find(eattrs
, EA_KRT_REALM
))
1283 nl_add_attr_u32(&r
->h
, rsize
, RTA_FLOW
, ea
->u
.data
);
1286 u32 metrics
[KRT_METRICS_MAX
];
1289 struct ea_walk_state ews
= { .eattrs
= eattrs
};
1290 while (ea
= ea_walk(&ews
, EA_KRT_METRICS
, KRT_METRICS_MAX
))
1292 int id
= ea
->id
- EA_KRT_METRICS
;
1293 metrics
[0] |= 1 << id
;
1294 metrics
[id
] = ea
->u
.data
;
1298 nl_add_metrics(&r
->h
, rsize
, metrics
, KRT_METRICS_MAX
);
1305 r
->r
.rtm_type
= RTN_UNICAST
;
1306 if (nh
->next
&& !krt_ecmp6(p
))
1307 nl_add_multipath(&r
->h
, rsize
, nh
, p
->af
);
1310 nl_add_attr_u32(&r
->h
, rsize
, RTA_OIF
, nh
->iface
->index
);
1311 nl_add_nexthop(&r
->h
, rsize
, nh
, p
->af
);
1313 if (nh
->flags
& RNF_ONLINK
)
1314 r
->r
.rtm_flags
|= RTNH_F_ONLINK
;
1318 r
->r
.rtm_type
= RTN_BLACKHOLE
;
1320 case RTD_UNREACHABLE
:
1321 r
->r
.rtm_type
= RTN_UNREACHABLE
;
1324 r
->r
.rtm_type
= RTN_PROHIBIT
;
1329 bug("krt_capable inconsistent with nl_send_route");
1332 /* Ignore missing for DELETE */
1333 return nl_exchange(&r
->h
, (op
== NL_OP_DELETE
));
1337 nl_add_rte(struct krt_proto
*p
, rte
*e
)
1342 if (krt_ecmp6(p
) && a
->nh
.next
)
1344 struct nexthop
*nh
= &(a
->nh
);
1346 err
= nl_send_route(p
, e
, NL_OP_ADD
, RTD_UNICAST
, nh
);
1350 for (nh
= nh
->next
; nh
; nh
= nh
->next
)
1351 err
+= nl_send_route(p
, e
, NL_OP_APPEND
, RTD_UNICAST
, nh
);
1356 return nl_send_route(p
, e
, NL_OP_ADD
, a
->dest
, &(a
->nh
));
1360 nl_delete_rte(struct krt_proto
*p
, rte
*e
)
1364 /* For IPv6, we just repeatedly request DELETE until we get error */
1366 err
= nl_send_route(p
, e
, NL_OP_DELETE
, RTD_NONE
, NULL
);
1367 while (krt_ecmp6(p
) && !err
);
1373 nl_replace_rte(struct krt_proto
*p
, rte
*e
)
1376 return nl_send_route(p
, e
, NL_OP_REPLACE
, a
->dest
, &(a
->nh
));
1381 krt_replace_rte(struct krt_proto
*p
, net
*n
, rte
*new, rte
*old
)
1386 * We use NL_OP_REPLACE for IPv4, it has an issue with not checking for
1387 * matching rtm_protocol, but that is OK when dedicated priority is used.
1389 * We do not use NL_OP_REPLACE for IPv6, as it has broken semantics for ECMP
1390 * and with some kernel versions ECMP replace crashes kernel. Would need more
1391 * testing and checks for kernel versions.
1393 * For IPv6, we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the
1394 * old route value, so we do not try to optimize IPv6 ECMP reconfigurations.
1397 if (krt_ipv4(p
) && old
&& new)
1399 err
= nl_replace_rte(p
, new);
1404 nl_delete_rte(p
, old
);
1407 err
= nl_add_rte(p
, new);
1411 n
->n
.flags
|= KRF_SYNC_ERROR
;
1413 n
->n
.flags
&= ~KRF_SYNC_ERROR
;
1417 nl_mergable_route(struct nl_parse_state
*s
, net
*net
, struct krt_proto
*p
, uint priority
, uint krt_type
, uint rtm_family
)
1419 /* Route merging is used for IPv6 scans */
1420 if (!s
->scan
|| (rtm_family
!= AF_INET6
))
1423 /* Saved and new route must have same network, proto/table, and priority */
1424 if ((s
->net
!= net
) || (s
->proto
!= p
) || (s
->krt_metric
!= priority
))
1427 /* Both must be regular unicast routes */
1428 if ((s
->krt_type
!= RTN_UNICAST
) || (krt_type
!= RTN_UNICAST
))
1435 nl_announce_route(struct nl_parse_state
*s
)
1437 rte
*e
= rte_get_temp(s
->attrs
);
1439 e
->u
.krt
.src
= s
->krt_src
;
1440 e
->u
.krt
.proto
= s
->krt_proto
;
1443 e
->u
.krt
.metric
= s
->krt_metric
;
1446 krt_got_route(s
->proto
, e
);
1448 krt_got_route_async(s
->proto
, e
, s
->new);
1457 nl_parse_begin(struct nl_parse_state
*s
, int scan
)
1459 memset(s
, 0, sizeof (struct nl_parse_state
));
1460 s
->pool
= nl_linpool
;
1465 nl_parse_end(struct nl_parse_state
*s
)
1468 nl_announce_route(s
);
1472 #define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
1475 nl_parse_route(struct nl_parse_state
*s
, struct nlmsghdr
*h
)
1477 struct krt_proto
*p
;
1479 struct rtattr
*a
[BIRD_RTA_MAX
];
1480 int new = h
->nlmsg_type
== RTM_NEWROUTE
;
1482 net_addr dst
, src
= {};
1486 u32 def_scope
= RT_SCOPE_UNIVERSE
;
1489 if (!(i
= nl_checkin(h
, sizeof(*i
))))
1492 switch (i
->rtm_family
)
1495 if (!nl_parse_attrs(RTM_RTA(i
), rtm_attr_want4
, a
, sizeof(a
)))
1499 net_fill_ip4(&dst
, rta_get_ip4(a
[RTA_DST
]), i
->rtm_dst_len
);
1501 net_fill_ip4(&dst
, IP4_NONE
, 0);
1505 if (!nl_parse_attrs(RTM_RTA(i
), rtm_attr_want6
, a
, sizeof(a
)))
1509 net_fill_ip6(&dst
, rta_get_ip6(a
[RTA_DST
]), i
->rtm_dst_len
);
1511 net_fill_ip6(&dst
, IP6_NONE
, 0);
1514 net_fill_ip6(&src
, rta_get_ip6(a
[RTA_SRC
]), i
->rtm_src_len
);
1516 net_fill_ip6(&src
, IP6_NONE
, 0);
1519 #ifdef HAVE_MPLS_KERNEL
1521 if (!nl_parse_attrs(RTM_RTA(i
), rtm_attr_want_mpls
, a
, sizeof(a
)))
1525 SKIP("MPLS route without RTA_DST");
1527 if (rta_get_mpls(a
[RTA_DST
], rta_mpls_stack
) != 1)
1528 SKIP("MPLS route with multi-label RTA_DST");
1530 net_fill_mpls(&dst
, rta_mpls_stack
[0]);
1539 oif
= rta_get_u32(a
[RTA_OIF
]);
1542 table_id
= rta_get_u32(a
[RTA_TABLE
]);
1544 table_id
= i
->rtm_table
;
1546 /* Do we know this table? */
1547 p
= HASH_FIND(nl_table_map
, RTH
, i
->rtm_family
, table_id
);
1549 SKIP("unknown table %u\n", table_id
);
1551 if (a
[RTA_SRC
] && (p
->p
.net_type
!= NET_IP6_SADR
))
1552 SKIP("src prefix for non-SADR channel\n");
1557 if (i
->rtm_tos
!= 0) /* We don't support TOS */
1558 SKIP("TOS %02x\n", i
->rtm_tos
);
1560 if (s
->scan
&& !new)
1561 SKIP("RTM_DELROUTE in scan\n");
1563 if (a
[RTA_PRIORITY
])
1564 priority
= rta_get_u32(a
[RTA_PRIORITY
]);
1566 int c
= net_classify(&dst
);
1567 if ((c
< 0) || !(c
& IADDR_HOST
) || ((c
& IADDR_SCOPE_MASK
) <= SCOPE_LINK
))
1568 SKIP("strange class/scope\n");
1570 switch (i
->rtm_protocol
)
1573 SKIP("proto unspec\n");
1575 case RTPROT_REDIRECT
:
1576 krt_src
= KRT_SRC_REDIRECT
;
1580 krt_src
= KRT_SRC_KERNEL
;
1586 krt_src
= KRT_SRC_BIRD
;
1591 krt_src
= KRT_SRC_ALIEN
;
1595 if (p
->p
.net_type
== NET_IP6_SADR
)
1597 n
= alloca(sizeof(net_addr_ip6_sadr
));
1598 net_fill_ip6_sadr(n
, net6_prefix(&dst
), net6_pxlen(&dst
),
1599 net6_prefix(&src
), net6_pxlen(&src
));
1602 net
*net
= net_get(p
->p
.main_channel
->table
, n
);
1604 if (s
->net
&& !nl_mergable_route(s
, net
, p
, priority
, i
->rtm_type
, i
->rtm_family
))
1605 nl_announce_route(s
);
1607 rta
*ra
= lp_allocz(s
->pool
, RTA_MAX_SIZE
);
1608 ra
->src
= p
->p
.main_source
;
1609 ra
->source
= RTS_INHERIT
;
1610 ra
->scope
= SCOPE_UNIVERSE
;
1612 switch (i
->rtm_type
)
1615 ra
->dest
= RTD_UNICAST
;
1617 if (a
[RTA_MULTIPATH
])
1619 struct nexthop
*nh
= nl_parse_multipath(s
, p
, a
[RTA_MULTIPATH
], i
->rtm_family
);
1622 log(L_ERR
"KRT: Received strange multipath route %N", net
->n
.addr
);
1630 ra
->nh
.iface
= if_find_by_index(oif
);
1633 log(L_ERR
"KRT: Received route %N with unknown ifindex %u", net
->n
.addr
, oif
);
1637 if ((i
->rtm_family
!= AF_MPLS
) && a
[RTA_GATEWAY
]
1638 #ifdef HAVE_MPLS_KERNEL
1639 || (i
->rtm_family
== AF_MPLS
) && a
[RTA_VIA
]
1643 #ifdef HAVE_MPLS_KERNEL
1644 if (i
->rtm_family
== AF_MPLS
)
1645 ra
->nh
.gw
= rta_get_via(a
[RTA_VIA
]);
1648 ra
->nh
.gw
= rta_get_ipa(a
[RTA_GATEWAY
]);
1650 /* Silently skip strange 6to4 routes */
1651 const net_addr_ip6 sit
= NET_ADDR_IP6(IP6_NONE
, 96);
1652 if ((i
->rtm_family
== AF_INET6
) && ipa_in_netX(ra
->nh
.gw
, (net_addr
*) &sit
))
1655 if (i
->rtm_flags
& RTNH_F_ONLINK
)
1656 ra
->nh
.flags
|= RNF_ONLINK
;
1659 nbr
= neigh_find(&p
->p
, ra
->nh
.gw
, ra
->nh
.iface
,
1660 (ra
->nh
.flags
& RNF_ONLINK
) ? NEF_ONLINK
: 0);
1661 if (!nbr
|| (nbr
->scope
== SCOPE_HOST
))
1663 log(L_ERR
"KRT: Received route %N with strange next-hop %I", net
->n
.addr
,
1671 ra
->dest
= RTD_BLACKHOLE
;
1673 case RTN_UNREACHABLE
:
1674 ra
->dest
= RTD_UNREACHABLE
;
1677 ra
->dest
= RTD_PROHIBIT
;
1679 /* FIXME: What about RTN_THROW? */
1681 SKIP("type %d\n", i
->rtm_type
);
1685 #ifdef HAVE_MPLS_KERNEL
1687 if ((i
->rtm_family
== AF_MPLS
) && a
[RTA_NEWDST
] && !ra
->nh
.next
)
1688 labels
= rta_get_mpls(a
[RTA_NEWDST
], ra
->nh
.label
);
1690 if (a
[RTA_ENCAP
] && a
[RTA_ENCAP_TYPE
] && !ra
->nh
.next
)
1692 switch (rta_get_u16(a
[RTA_ENCAP_TYPE
]))
1694 case LWTUNNEL_ENCAP_MPLS
:
1696 struct rtattr
*enca
[BIRD_RTA_MAX
];
1697 nl_attr_len
= RTA_PAYLOAD(a
[RTA_ENCAP
]);
1698 nl_parse_attrs(RTA_DATA(a
[RTA_ENCAP
]), encap_mpls_want
, enca
, sizeof(enca
));
1699 labels
= rta_get_mpls(enca
[RTA_DST
], ra
->nh
.label
);
1703 SKIP("unknown encapsulation method %d\n", rta_get_u16(a
[RTA_ENCAP_TYPE
]));
1710 log(L_WARN
"KRT: Too long MPLS stack received, ignoring.");
1714 ra
->nh
.labels
= labels
;
1717 if (i
->rtm_scope
!= def_scope
)
1719 ea_list
*ea
= lp_alloc(s
->pool
, sizeof(ea_list
) + sizeof(eattr
));
1720 ea
->next
= ra
->eattrs
;
1722 ea
->flags
= EALF_SORTED
;
1724 ea
->attrs
[0].id
= EA_KRT_SCOPE
;
1725 ea
->attrs
[0].flags
= 0;
1726 ea
->attrs
[0].type
= EAF_TYPE_INT
;
1727 ea
->attrs
[0].u
.data
= i
->rtm_scope
;
1732 ip_addr ps
= rta_get_ipa(a
[RTA_PREFSRC
]);
1734 ea_list
*ea
= lp_alloc(s
->pool
, sizeof(ea_list
) + sizeof(eattr
));
1735 ea
->next
= ra
->eattrs
;
1737 ea
->flags
= EALF_SORTED
;
1739 ea
->attrs
[0].id
= EA_KRT_PREFSRC
;
1740 ea
->attrs
[0].flags
= 0;
1741 ea
->attrs
[0].type
= EAF_TYPE_IP_ADDRESS
;
1742 ea
->attrs
[0].u
.ptr
= lp_alloc(s
->pool
, sizeof(struct adata
) + sizeof(ps
));
1743 ea
->attrs
[0].u
.ptr
->length
= sizeof(ps
);
1744 memcpy(ea
->attrs
[0].u
.ptr
->data
, &ps
, sizeof(ps
));
1749 ea_list
*ea
= lp_alloc(s
->pool
, sizeof(ea_list
) + sizeof(eattr
));
1750 ea
->next
= ra
->eattrs
;
1752 ea
->flags
= EALF_SORTED
;
1754 ea
->attrs
[0].id
= EA_KRT_REALM
;
1755 ea
->attrs
[0].flags
= 0;
1756 ea
->attrs
[0].type
= EAF_TYPE_INT
;
1757 ea
->attrs
[0].u
.data
= rta_get_u32(a
[RTA_FLOW
]);
1762 u32 metrics
[KRT_METRICS_MAX
];
1763 ea_list
*ea
= lp_alloc(s
->pool
, sizeof(ea_list
) + KRT_METRICS_MAX
* sizeof(eattr
));
1766 if (nl_parse_metrics(a
[RTA_METRICS
], metrics
, ARRAY_SIZE(metrics
)) < 0)
1768 log(L_ERR
"KRT: Received route %N with strange RTA_METRICS attribute", net
->n
.addr
);
1772 for (t
= 1; t
< KRT_METRICS_MAX
; t
++)
1773 if (metrics
[0] & (1 << t
))
1775 ea
->attrs
[n
].id
= EA_CODE(PROTOCOL_KERNEL
, KRT_METRICS_OFFSET
+ t
);
1776 ea
->attrs
[n
].flags
= 0;
1777 ea
->attrs
[n
].type
= EAF_TYPE_INT
; /* FIXME: Some are EAF_TYPE_BITFIELD */
1778 ea
->attrs
[n
].u
.data
= metrics
[t
];
1784 ea
->next
= ra
->eattrs
;
1785 ea
->flags
= EALF_SORTED
;
1792 * Ideally, now we would send the received route to the rest of kernel code.
1793 * But IPv6 ECMP routes before 4.11 are sent as a sequence of routes, so we
1794 * postpone it and merge next hops until the end of the sequence. Note that
1795 * when doing merging of next hops, we expect the new route to be unipath.
1796 * Otherwise, we ignore additional next hops in nexthop_insert().
1801 /* Store the new route */
1806 s
->krt_src
= krt_src
;
1807 s
->krt_type
= i
->rtm_type
;
1808 s
->krt_proto
= i
->rtm_protocol
;
1809 s
->krt_metric
= priority
;
1813 /* Merge next hops with the stored route */
1816 struct nexthop
*nhs
= &oa
->nh
;
1817 nexthop_insert(&nhs
, &ra
->nh
);
1819 /* Perhaps new nexthop is inserted at the first position */
1825 /* Keep old eattrs */
1826 ra
->eattrs
= oa
->eattrs
;
1832 krt_do_scan(struct krt_proto
*p UNUSED
) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
1835 struct nl_parse_state s
;
1837 nl_parse_begin(&s
, 1);
1838 nl_request_dump(AF_UNSPEC
, RTM_GETROUTE
);
1839 while (h
= nl_get_scan())
1840 if (h
->nlmsg_type
== RTM_NEWROUTE
|| h
->nlmsg_type
== RTM_DELROUTE
)
1841 nl_parse_route(&s
, h
);
1843 log(L_DEBUG
"nl_scan_fire: Unknown packet received (type=%d)", h
->nlmsg_type
);
1848 * Asynchronous Netlink interface
1851 static sock
*nl_async_sk
; /* BIRD socket for asynchronous notifications */
1852 static byte
*nl_async_rx_buffer
; /* Receive buffer */
1855 nl_async_msg(struct nlmsghdr
*h
)
1857 struct nl_parse_state s
;
1859 switch (h
->nlmsg_type
)
1863 DBG("KRT: Received async route notification (%d)\n", h
->nlmsg_type
);
1864 nl_parse_begin(&s
, 0);
1865 nl_parse_route(&s
, h
);
1870 DBG("KRT: Received async link notification (%d)\n", h
->nlmsg_type
);
1872 nl_parse_link(h
, 0);
1876 DBG("KRT: Received async address notification (%d)\n", h
->nlmsg_type
);
1878 nl_parse_addr(h
, 0);
1881 DBG("KRT: Received unknown async notification (%d)\n", h
->nlmsg_type
);
1886 nl_async_hook(sock
*sk
, uint size UNUSED
)
1888 struct iovec iov
= { nl_async_rx_buffer
, NL_RX_SIZE
};
1889 struct sockaddr_nl sa
;
1892 .msg_namelen
= sizeof(sa
),
1900 x
= recvmsg(sk
->fd
, &m
, 0);
1903 if (errno
== ENOBUFS
)
1906 * Netlink reports some packets have been thrown away.
1907 * One day we might react to it by asking for route table
1908 * scan in near future.
1910 log(L_WARN
"Kernel dropped some netlink messages, will resync on next scan.");
1911 return 1; /* More data are likely to be ready */
1913 else if (errno
!= EWOULDBLOCK
)
1914 log(L_ERR
"Netlink recvmsg: %m");
1917 if (sa
.nl_pid
) /* It isn't from the kernel */
1919 DBG("Non-kernel packet\n");
1922 h
= (void *) nl_async_rx_buffer
;
1924 if (m
.msg_flags
& MSG_TRUNC
)
1926 log(L_WARN
"Netlink got truncated asynchronous message");
1929 while (NLMSG_OK(h
, len
))
1932 h
= NLMSG_NEXT(h
, len
);
1935 log(L_WARN
"nl_async_hook: Found packet remnant of size %d", len
);
1940 nl_async_err_hook(sock
*sk
, int e UNUSED
)
1942 nl_async_hook(sk
, 0);
1949 struct sockaddr_nl sa
;
1955 DBG("KRT: Opening async netlink socket\n");
1957 fd
= socket(PF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
1960 log(L_ERR
"Unable to open asynchronous rtnetlink socket: %m");
1964 bzero(&sa
, sizeof(sa
));
1965 sa
.nl_family
= AF_NETLINK
;
1966 sa
.nl_groups
= RTMGRP_LINK
|
1967 RTMGRP_IPV4_IFADDR
| RTMGRP_IPV4_ROUTE
|
1968 RTMGRP_IPV6_IFADDR
| RTMGRP_IPV6_ROUTE
;
1970 if (bind(fd
, (struct sockaddr
*) &sa
, sizeof(sa
)) < 0)
1972 log(L_ERR
"Unable to bind asynchronous rtnetlink socket: %m");
1977 nl_async_rx_buffer
= xmalloc(NL_RX_SIZE
);
1979 sk
= nl_async_sk
= sk_new(krt_pool
);
1980 sk
->type
= SK_MAGIC
;
1981 sk
->rx_hook
= nl_async_hook
;
1982 sk
->err_hook
= nl_async_err_hook
;
1984 if (sk_open(sk
) < 0)
1985 bug("Netlink: sk_open failed");
1990 * Interface to the UNIX krt module
1994 krt_sys_io_init(void)
1996 nl_linpool
= lp_new_default(krt_pool
);
1997 HASH_INIT(nl_table_map
, krt_pool
, 6);
2001 krt_sys_start(struct krt_proto
*p
)
2003 struct krt_proto
*old
= HASH_FIND(nl_table_map
, RTH
, p
->af
, krt_table_id(p
));
2007 log(L_ERR
"%s: Kernel table %u already registered by %s",
2008 p
->p
.name
, krt_table_id(p
), old
->p
.name
);
2012 HASH_INSERT2(nl_table_map
, RTH
, krt_pool
, p
);
2021 krt_sys_shutdown(struct krt_proto
*p
)
2023 HASH_REMOVE2(nl_table_map
, RTH
, krt_pool
, p
);
2027 krt_sys_reconfigure(struct krt_proto
*p UNUSED
, struct krt_config
*n
, struct krt_config
*o
)
2029 return (n
->sys
.table_id
== o
->sys
.table_id
) && (n
->sys
.metric
== o
->sys
.metric
);
2033 krt_sys_init_config(struct krt_config
*cf
)
2035 cf
->sys
.table_id
= RT_TABLE_MAIN
;
2036 cf
->sys
.metric
= 32;
2040 krt_sys_copy_config(struct krt_config
*d
, struct krt_config
*s
)
2042 d
->sys
.table_id
= s
->sys
.table_id
;
2043 d
->sys
.metric
= s
->sys
.metric
;
2046 static const char *krt_metrics_names
[KRT_METRICS_MAX
] = {
2047 NULL
, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
2048 "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
2051 static const char *krt_features_names
[KRT_FEATURES_MAX
] = {
2052 "ecn", NULL
, NULL
, "allfrag"
2056 krt_sys_get_attr(eattr
*a
, byte
*buf
, int buflen UNUSED
)
2060 case EA_KRT_PREFSRC
:
2061 bsprintf(buf
, "prefsrc");
2065 bsprintf(buf
, "realm");
2069 bsprintf(buf
, "scope");
2073 buf
+= bsprintf(buf
, "lock:");
2074 ea_format_bitfield(a
, buf
, buflen
, krt_metrics_names
, 2, KRT_METRICS_MAX
);
2077 case EA_KRT_FEATURES
:
2078 buf
+= bsprintf(buf
, "features:");
2079 ea_format_bitfield(a
, buf
, buflen
, krt_features_names
, 0, KRT_FEATURES_MAX
);
2083 int id
= (int)EA_ID(a
->id
) - KRT_METRICS_OFFSET
;
2084 if (id
> 0 && id
< KRT_METRICS_MAX
)
2086 bsprintf(buf
, "%s", krt_metrics_names
[id
]);
2097 kif_sys_start(struct kif_proto
*p UNUSED
)
2104 kif_sys_shutdown(struct kif_proto
*p UNUSED
)
2109 kif_update_sysdep_addr(struct iface
*i UNUSED
)