2 * BIRD -- Linux Netlink Interface
4 * (c) 1999--2000 Martin Mares <mj@ucw.cz>
6 * Can be freely distributed and used under the terms of the GNU GPL.
13 #include <sys/socket.h>
19 #include "nest/bird.h"
20 #include "nest/route.h"
21 #include "nest/protocol.h"
22 #include "nest/iface.h"
23 #include "lib/alloca.h"
24 #include "sysdep/unix/unix.h"
25 #include "sysdep/unix/krt.h"
26 #include "lib/socket.h"
27 #include "lib/string.h"
29 #include "conf/conf.h"
31 #include <asm/types.h>
33 #include <linux/netlink.h>
34 #include <linux/rtnetlink.h>
36 #ifdef HAVE_MPLS_KERNEL
37 #include <linux/lwtunnel.h>
40 #ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */
41 #define MSG_TRUNC 0x20
49 #define IFF_LOWER_UP 0x10000
64 #ifndef RTA_ENCAP_TYPE
65 #define RTA_ENCAP_TYPE 21
72 #define krt_ecmp6(p) ((p)->af == AF_INET6)
74 const int rt_default_ecmp
= 16;
77 * Structure nl_parse_state keeps state of received route processing. Ideally,
78 * we could just independently parse received Netlink messages and immediately
79 * propagate received routes to the rest of BIRD, but older Linux kernel (before
80 * version 4.11) represents and announces IPv6 ECMP routes not as one route with
81 * multiple next hops (like RTA_MULTIPATH in IPv4 ECMP), but as a sequence of
82 * routes with the same prefix. More recent kernels work as with IPv4.
84 * Therefore, BIRD keeps currently processed route in nl_parse_state structure
85 * and postpones its propagation until we expect it to be final; i.e., when
86 * non-matching route is received or when the scan ends. When another matching
87 * route is received, it is merged with the already processed route to form an
88 * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
89 * postponing is done in both cases (for simplicity). All IPv4 routes or IPv6
90 * routes with RTA_MULTIPATH set are just considered non-matching.
92 * This is ignored for asynchronous notifications (every notification is handled
93 * as a separate route). It is not an issue for our routes, as we ignore such
94 * notifications anyways. But importing alien IPv6 ECMP routes does not work
95 * properly with older kernels.
97 * Whatever the kernel version is, IPv6 ECMP routes are sent as multiple routes
98 * for the same prefix.
101 struct nl_parse_state
103 struct linpool
*pool
;
109 struct krt_proto
*proto
;
118 * Synchronous Netlink interface
125 byte
*rx_buffer
; /* Receive buffer */
126 struct nlmsghdr
*last_hdr
; /* Recently received packet */
130 #define NL_RX_SIZE 8192
132 #define NL_OP_DELETE 0
133 #define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
134 #define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE)
135 #define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND)
137 static linpool
*nl_linpool
;
139 static struct nl_sock nl_scan
= {.fd
= -1}; /* Netlink socket for synchronous scan */
140 static struct nl_sock nl_req
= {.fd
= -1}; /* Netlink socket for requests */
143 nl_open_sock(struct nl_sock
*nl
)
147 nl
->fd
= socket(PF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
149 die("Unable to open rtnetlink socket: %m");
150 nl
->seq
= (u32
) (current_time() TO_S
); /* Or perhaps random_u32() ? */
151 nl
->rx_buffer
= xmalloc(NL_RX_SIZE
);
160 nl_open_sock(&nl_scan
);
161 nl_open_sock(&nl_req
);
165 nl_send(struct nl_sock
*nl
, struct nlmsghdr
*nh
)
167 struct sockaddr_nl sa
;
169 memset(&sa
, 0, sizeof(sa
));
170 sa
.nl_family
= AF_NETLINK
;
172 nh
->nlmsg_seq
= ++(nl
->seq
);
173 if (sendto(nl
->fd
, nh
, nh
->nlmsg_len
, 0, (struct sockaddr
*)&sa
, sizeof(sa
)) < 0)
174 die("rtnetlink sendto: %m");
179 nl_request_dump(int af
, int cmd
)
185 .nh
.nlmsg_type
= cmd
,
186 .nh
.nlmsg_len
= sizeof(req
),
187 .nh
.nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
,
190 nl_send(&nl_scan
, &req
.nh
);
193 static struct nlmsghdr
*
194 nl_get_reply(struct nl_sock
*nl
)
200 struct iovec iov
= { nl
->rx_buffer
, NL_RX_SIZE
};
201 struct sockaddr_nl sa
;
204 .msg_namelen
= sizeof(sa
),
208 int x
= recvmsg(nl
->fd
, &m
, 0);
210 die("nl_get_reply: %m");
211 if (sa
.nl_pid
) /* It isn't from the kernel */
213 DBG("Non-kernel packet\n");
217 nl
->last_hdr
= (void *) nl
->rx_buffer
;
218 if (m
.msg_flags
& MSG_TRUNC
)
219 bug("nl_get_reply: got truncated reply which should be impossible");
221 if (NLMSG_OK(nl
->last_hdr
, nl
->last_size
))
223 struct nlmsghdr
*h
= nl
->last_hdr
;
224 nl
->last_hdr
= NLMSG_NEXT(h
, nl
->last_size
);
225 if (h
->nlmsg_seq
!= nl
->seq
)
227 log(L_WARN
"nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
228 h
->nlmsg_seq
, nl
->seq
);
234 log(L_WARN
"nl_get_reply: Found packet remnant of size %d", nl
->last_size
);
239 static struct tbf rl_netlink_err
= TBF_DEFAULT_LOG_LIMITS
;
242 nl_error(struct nlmsghdr
*h
, int ignore_esrch
)
247 if (h
->nlmsg_len
< NLMSG_LENGTH(sizeof(struct nlmsgerr
)))
249 log(L_WARN
"Netlink: Truncated error message received");
252 e
= (struct nlmsgerr
*) NLMSG_DATA(h
);
254 if (ec
&& !(ignore_esrch
&& (ec
== ESRCH
)))
255 log_rl(&rl_netlink_err
, L_WARN
"Netlink: %s", strerror(ec
));
259 static struct nlmsghdr
*
262 struct nlmsghdr
*h
= nl_get_reply(&nl_scan
);
264 if (h
->nlmsg_type
== NLMSG_DONE
)
266 if (h
->nlmsg_type
== NLMSG_ERROR
)
275 nl_exchange(struct nlmsghdr
*pkt
, int ignore_esrch
)
279 nl_send(&nl_req
, pkt
);
282 h
= nl_get_reply(&nl_req
);
283 if (h
->nlmsg_type
== NLMSG_ERROR
)
285 log(L_WARN
"nl_exchange: Unexpected reply received");
287 return nl_error(h
, ignore_esrch
) ? -1 : 0;
294 static int nl_attr_len
;
297 nl_checkin(struct nlmsghdr
*h
, int lsize
)
299 nl_attr_len
= h
->nlmsg_len
- NLMSG_LENGTH(lsize
);
302 log(L_ERR
"nl_checkin: underrun by %d bytes", -nl_attr_len
);
305 return NLMSG_DATA(h
);
308 struct nl_want_attrs
{
315 #define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
317 static struct nl_want_attrs ifla_attr_want
[BIRD_IFLA_MAX
] = {
318 [IFLA_IFNAME
] = { 1, 0, 0 },
319 [IFLA_MTU
] = { 1, 1, sizeof(u32
) },
320 [IFLA_MASTER
] = { 1, 1, sizeof(u32
) },
321 [IFLA_WIRELESS
] = { 1, 0, 0 },
325 #define BIRD_IFA_MAX (IFA_FLAGS+1)
327 static struct nl_want_attrs ifa_attr_want4
[BIRD_IFA_MAX
] = {
328 [IFA_ADDRESS
] = { 1, 1, sizeof(ip4_addr
) },
329 [IFA_LOCAL
] = { 1, 1, sizeof(ip4_addr
) },
330 [IFA_BROADCAST
] = { 1, 1, sizeof(ip4_addr
) },
331 [IFA_FLAGS
] = { 1, 1, sizeof(u32
) },
334 static struct nl_want_attrs ifa_attr_want6
[BIRD_IFA_MAX
] = {
335 [IFA_ADDRESS
] = { 1, 1, sizeof(ip6_addr
) },
336 [IFA_LOCAL
] = { 1, 1, sizeof(ip6_addr
) },
337 [IFA_FLAGS
] = { 1, 1, sizeof(u32
) },
341 #define BIRD_RTA_MAX (RTA_ENCAP+1)
343 static struct nl_want_attrs nexthop_attr_want4
[BIRD_RTA_MAX
] = {
344 [RTA_GATEWAY
] = { 1, 1, sizeof(ip4_addr
) },
345 [RTA_ENCAP_TYPE
]= { 1, 1, sizeof(u16
) },
346 [RTA_ENCAP
] = { 1, 0, 0 },
349 static struct nl_want_attrs nexthop_attr_want6
[BIRD_RTA_MAX
] = {
350 [RTA_GATEWAY
] = { 1, 1, sizeof(ip6_addr
) },
351 [RTA_ENCAP_TYPE
]= { 1, 1, sizeof(u16
) },
352 [RTA_ENCAP
] = { 1, 0, 0 },
355 #ifdef HAVE_MPLS_KERNEL
356 static struct nl_want_attrs encap_mpls_want
[BIRD_RTA_MAX
] = {
357 [RTA_DST
] = { 1, 0, 0 },
361 static struct nl_want_attrs rtm_attr_want4
[BIRD_RTA_MAX
] = {
362 [RTA_DST
] = { 1, 1, sizeof(ip4_addr
) },
363 [RTA_OIF
] = { 1, 1, sizeof(u32
) },
364 [RTA_GATEWAY
] = { 1, 1, sizeof(ip4_addr
) },
365 [RTA_PRIORITY
] = { 1, 1, sizeof(u32
) },
366 [RTA_PREFSRC
] = { 1, 1, sizeof(ip4_addr
) },
367 [RTA_METRICS
] = { 1, 0, 0 },
368 [RTA_MULTIPATH
] = { 1, 0, 0 },
369 [RTA_FLOW
] = { 1, 1, sizeof(u32
) },
370 [RTA_TABLE
] = { 1, 1, sizeof(u32
) },
371 [RTA_ENCAP_TYPE
]= { 1, 1, sizeof(u16
) },
372 [RTA_ENCAP
] = { 1, 0, 0 },
375 static struct nl_want_attrs rtm_attr_want6
[BIRD_RTA_MAX
] = {
376 [RTA_DST
] = { 1, 1, sizeof(ip6_addr
) },
377 [RTA_SRC
] = { 1, 1, sizeof(ip6_addr
) },
378 [RTA_IIF
] = { 1, 1, sizeof(u32
) },
379 [RTA_OIF
] = { 1, 1, sizeof(u32
) },
380 [RTA_GATEWAY
] = { 1, 1, sizeof(ip6_addr
) },
381 [RTA_PRIORITY
] = { 1, 1, sizeof(u32
) },
382 [RTA_PREFSRC
] = { 1, 1, sizeof(ip6_addr
) },
383 [RTA_METRICS
] = { 1, 0, 0 },
384 [RTA_MULTIPATH
] = { 1, 0, 0 },
385 [RTA_FLOW
] = { 1, 1, sizeof(u32
) },
386 [RTA_TABLE
] = { 1, 1, sizeof(u32
) },
387 [RTA_ENCAP_TYPE
]= { 1, 1, sizeof(u16
) },
388 [RTA_ENCAP
] = { 1, 0, 0 },
391 #ifdef HAVE_MPLS_KERNEL
392 static struct nl_want_attrs rtm_attr_want_mpls
[BIRD_RTA_MAX
] = {
393 [RTA_DST
] = { 1, 1, sizeof(u32
) },
394 [RTA_IIF
] = { 1, 1, sizeof(u32
) },
395 [RTA_OIF
] = { 1, 1, sizeof(u32
) },
396 [RTA_PRIORITY
] = { 1, 1, sizeof(u32
) },
397 [RTA_METRICS
] = { 1, 0, 0 },
398 [RTA_FLOW
] = { 1, 1, sizeof(u32
) },
399 [RTA_TABLE
] = { 1, 1, sizeof(u32
) },
400 [RTA_VIA
] = { 1, 0, 0 },
401 [RTA_NEWDST
] = { 1, 0, 0 },
407 nl_parse_attrs(struct rtattr
*a
, struct nl_want_attrs
*want
, struct rtattr
**k
, int ksize
)
409 int max
= ksize
/ sizeof(struct rtattr
*);
412 for ( ; RTA_OK(a
, nl_attr_len
); a
= RTA_NEXT(a
, nl_attr_len
))
414 if ((a
->rta_type
>= max
) || !want
[a
->rta_type
].defined
)
417 if (want
[a
->rta_type
].checksize
&& (RTA_PAYLOAD(a
) != want
[a
->rta_type
].size
))
419 log(L_ERR
"nl_parse_attrs: Malformed attribute received");
428 log(L_ERR
"nl_parse_attrs: remnant of size %d", nl_attr_len
);
435 static inline u16
rta_get_u16(struct rtattr
*a
)
436 { return *(u16
*) RTA_DATA(a
); }
438 static inline u32
rta_get_u32(struct rtattr
*a
)
439 { return *(u32
*) RTA_DATA(a
); }
441 static inline ip4_addr
rta_get_ip4(struct rtattr
*a
)
442 { return ip4_ntoh(*(ip4_addr
*) RTA_DATA(a
)); }
444 static inline ip6_addr
rta_get_ip6(struct rtattr
*a
)
445 { return ip6_ntoh(*(ip6_addr
*) RTA_DATA(a
)); }
447 static inline ip_addr
rta_get_ipa(struct rtattr
*a
)
449 if (RTA_PAYLOAD(a
) == sizeof(ip4_addr
))
450 return ipa_from_ip4(rta_get_ip4(a
));
452 return ipa_from_ip6(rta_get_ip6(a
));
455 #ifdef HAVE_MPLS_KERNEL
456 static inline ip_addr
rta_get_via(struct rtattr
*a
)
458 struct rtvia
*v
= RTA_DATA(a
);
459 switch(v
->rtvia_family
) {
460 case AF_INET
: return ipa_from_ip4(ip4_ntoh(*(ip4_addr
*) v
->rtvia_addr
));
461 case AF_INET6
: return ipa_from_ip6(ip6_ntoh(*(ip6_addr
*) v
->rtvia_addr
));
466 static u32 rta_mpls_stack
[MPLS_MAX_LABEL_STACK
];
467 static inline int rta_get_mpls(struct rtattr
*a
, u32
*stack
)
469 if (RTA_PAYLOAD(a
) % 4)
470 log(L_WARN
"KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a
));
472 return mpls_get(RTA_DATA(a
), RTA_PAYLOAD(a
) & ~0x3, stack
);
477 nl_add_attr(struct nlmsghdr
*h
, uint bufsize
, uint code
, const void *data
, uint dlen
)
479 uint pos
= NLMSG_ALIGN(h
->nlmsg_len
);
480 uint len
= RTA_LENGTH(dlen
);
482 if (pos
+ len
> bufsize
)
483 bug("nl_add_attr: packet buffer overflow");
485 struct rtattr
*a
= (struct rtattr
*)((char *)h
+ pos
);
488 h
->nlmsg_len
= pos
+ len
;
491 memcpy(RTA_DATA(a
), data
, dlen
);
496 static inline struct rtattr
*
497 nl_open_attr(struct nlmsghdr
*h
, uint bufsize
, uint code
)
499 return nl_add_attr(h
, bufsize
, code
, NULL
, 0);
503 nl_close_attr(struct nlmsghdr
*h
, struct rtattr
*a
)
505 a
->rta_len
= (void *)h
+ NLMSG_ALIGN(h
->nlmsg_len
) - (void *)a
;
509 nl_add_attr_u16(struct nlmsghdr
*h
, uint bufsize
, int code
, u16 data
)
511 nl_add_attr(h
, bufsize
, code
, &data
, 2);
515 nl_add_attr_u32(struct nlmsghdr
*h
, uint bufsize
, int code
, u32 data
)
517 nl_add_attr(h
, bufsize
, code
, &data
, 4);
521 nl_add_attr_ip4(struct nlmsghdr
*h
, uint bufsize
, int code
, ip4_addr ip4
)
524 nl_add_attr(h
, bufsize
, code
, &ip4
, sizeof(ip4
));
528 nl_add_attr_ip6(struct nlmsghdr
*h
, uint bufsize
, int code
, ip6_addr ip6
)
531 nl_add_attr(h
, bufsize
, code
, &ip6
, sizeof(ip6
));
535 nl_add_attr_ipa(struct nlmsghdr
*h
, uint bufsize
, int code
, ip_addr ipa
)
538 nl_add_attr_ip4(h
, bufsize
, code
, ipa_to_ip4(ipa
));
540 nl_add_attr_ip6(h
, bufsize
, code
, ipa_to_ip6(ipa
));
543 #ifdef HAVE_MPLS_KERNEL
545 nl_add_attr_mpls(struct nlmsghdr
*h
, uint bufsize
, int code
, int len
, u32
*stack
)
548 mpls_put(buf
, len
, stack
);
549 nl_add_attr(h
, bufsize
, code
, buf
, len
*4);
553 nl_add_attr_mpls_encap(struct nlmsghdr
*h
, uint bufsize
, int len
, u32
*stack
)
555 nl_add_attr_u16(h
, bufsize
, RTA_ENCAP_TYPE
, LWTUNNEL_ENCAP_MPLS
);
557 struct rtattr
*nest
= nl_open_attr(h
, bufsize
, RTA_ENCAP
);
558 nl_add_attr_mpls(h
, bufsize
, RTA_DST
, len
, stack
);
559 nl_close_attr(h
, nest
);
563 nl_add_attr_via(struct nlmsghdr
*h
, uint bufsize
, ip_addr ipa
)
565 struct rtvia
*via
= alloca(sizeof(struct rtvia
) + 16);
569 via
->rtvia_family
= AF_INET
;
570 put_ip4(via
->rtvia_addr
, ipa_to_ip4(ipa
));
571 nl_add_attr(h
, bufsize
, RTA_VIA
, via
, sizeof(struct rtvia
) + 4);
575 via
->rtvia_family
= AF_INET6
;
576 put_ip6(via
->rtvia_addr
, ipa_to_ip6(ipa
));
577 nl_add_attr(h
, bufsize
, RTA_VIA
, via
, sizeof(struct rtvia
) + 16);
582 static inline struct rtnexthop
*
583 nl_open_nexthop(struct nlmsghdr
*h
, uint bufsize
)
585 uint pos
= NLMSG_ALIGN(h
->nlmsg_len
);
586 uint len
= RTNH_LENGTH(0);
588 if (pos
+ len
> bufsize
)
589 bug("nl_open_nexthop: packet buffer overflow");
591 h
->nlmsg_len
= pos
+ len
;
593 return (void *)h
+ pos
;
597 nl_close_nexthop(struct nlmsghdr
*h
, struct rtnexthop
*nh
)
599 nh
->rtnh_len
= (void *)h
+ NLMSG_ALIGN(h
->nlmsg_len
) - (void *)nh
;
603 nl_add_nexthop(struct nlmsghdr
*h
, uint bufsize
, struct nexthop
*nh
, int af UNUSED
)
605 #ifdef HAVE_MPLS_KERNEL
608 nl_add_attr_mpls(h
, bufsize
, RTA_NEWDST
, nh
->labels
, nh
->label
);
610 nl_add_attr_mpls_encap(h
, bufsize
, nh
->labels
, nh
->label
);
612 if (ipa_nonzero(nh
->gw
))
614 nl_add_attr_via(h
, bufsize
, nh
->gw
);
616 nl_add_attr_ipa(h
, bufsize
, RTA_GATEWAY
, nh
->gw
);
619 if (ipa_nonzero(nh
->gw
))
620 nl_add_attr_ipa(h
, bufsize
, RTA_GATEWAY
, nh
->gw
);
625 nl_add_multipath(struct nlmsghdr
*h
, uint bufsize
, struct nexthop
*nh
, int af
)
627 struct rtattr
*a
= nl_open_attr(h
, bufsize
, RTA_MULTIPATH
);
629 for (; nh
; nh
= nh
->next
)
631 struct rtnexthop
*rtnh
= nl_open_nexthop(h
, bufsize
);
633 rtnh
->rtnh_flags
= 0;
634 rtnh
->rtnh_hops
= nh
->weight
;
635 rtnh
->rtnh_ifindex
= nh
->iface
->index
;
637 nl_add_nexthop(h
, bufsize
, nh
, af
);
639 if (nh
->flags
& RNF_ONLINK
)
640 rtnh
->rtnh_flags
|= RTNH_F_ONLINK
;
642 nl_close_nexthop(h
, rtnh
);
648 static struct nexthop
*
649 nl_parse_multipath(struct nl_parse_state
*s
, struct krt_proto
*p
, struct rtattr
*ra
, int af
)
651 struct rtattr
*a
[BIRD_RTA_MAX
];
652 struct rtnexthop
*nh
= RTA_DATA(ra
);
653 struct nexthop
*rv
, *first
, **last
;
654 unsigned len
= RTA_PAYLOAD(ra
);
661 /* Use RTNH_OK(nh,len) ?? */
662 if ((len
< sizeof(*nh
)) || (len
< nh
->rtnh_len
))
665 *last
= rv
= lp_allocz(s
->pool
, NEXTHOP_MAX_SIZE
);
668 rv
->weight
= nh
->rtnh_hops
;
669 rv
->iface
= if_find_by_index(nh
->rtnh_ifindex
);
673 /* Nonexistent RTNH_PAYLOAD ?? */
674 nl_attr_len
= nh
->rtnh_len
- RTNH_LENGTH(0);
678 if (!nl_parse_attrs(RTNH_DATA(nh
), nexthop_attr_want4
, a
, sizeof(a
)))
683 if (!nl_parse_attrs(RTNH_DATA(nh
), nexthop_attr_want6
, a
, sizeof(a
)))
693 rv
->gw
= rta_get_ipa(a
[RTA_GATEWAY
]);
695 if (nh
->rtnh_flags
& RTNH_F_ONLINK
)
696 rv
->flags
|= RNF_ONLINK
;
699 nbr
= neigh_find(&p
->p
, rv
->gw
, rv
->iface
,
700 (rv
->flags
& RNF_ONLINK
) ? NEF_ONLINK
: 0);
701 if (!nbr
|| (nbr
->scope
== SCOPE_HOST
))
707 #ifdef HAVE_MPLS_KERNEL
708 if (a
[RTA_ENCAP_TYPE
])
710 if (rta_get_u16(a
[RTA_ENCAP_TYPE
]) != LWTUNNEL_ENCAP_MPLS
) {
711 log(L_WARN
"KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a
[RTA_ENCAP_TYPE
]));
715 struct rtattr
*enca
[BIRD_RTA_MAX
];
716 nl_attr_len
= RTA_PAYLOAD(a
[RTA_ENCAP
]);
717 nl_parse_attrs(RTA_DATA(a
[RTA_ENCAP
]), encap_mpls_want
, enca
, sizeof(enca
));
718 rv
->labels
= rta_get_mpls(enca
[RTA_DST
], rv
->label
);
724 len
-= NLMSG_ALIGN(nh
->rtnh_len
);
732 nl_add_metrics(struct nlmsghdr
*h
, uint bufsize
, u32
*metrics
, int max
)
734 struct rtattr
*a
= nl_open_attr(h
, bufsize
, RTA_METRICS
);
737 for (t
= 1; t
< max
; t
++)
738 if (metrics
[0] & (1 << t
))
739 nl_add_attr_u32(h
, bufsize
, t
, metrics
[t
]);
745 nl_parse_metrics(struct rtattr
*hdr
, u32
*metrics
, int max
)
747 struct rtattr
*a
= RTA_DATA(hdr
);
748 int len
= RTA_PAYLOAD(hdr
);
751 for (; RTA_OK(a
, len
); a
= RTA_NEXT(a
, len
))
753 if (a
->rta_type
== RTA_UNSPEC
)
756 if (a
->rta_type
>= max
)
759 if (RTA_PAYLOAD(a
) != 4)
762 metrics
[0] |= 1 << a
->rta_type
;
763 metrics
[a
->rta_type
] = rta_get_u32(a
);
774 * Scanning of interfaces
778 nl_parse_link(struct nlmsghdr
*h
, int scan
)
781 struct rtattr
*a
[BIRD_IFLA_MAX
];
782 int new = h
->nlmsg_type
== RTM_NEWLINK
;
789 if (!(i
= nl_checkin(h
, sizeof(*i
))) || !nl_parse_attrs(IFLA_RTA(i
), ifla_attr_want
, a
, sizeof(a
)))
791 if (!a
[IFLA_IFNAME
] || (RTA_PAYLOAD(a
[IFLA_IFNAME
]) < 2) || !a
[IFLA_MTU
])
794 * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
795 * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
796 * We simply ignore all such messages with IFLA_WIRELESS without notice.
799 if (a
[IFLA_WIRELESS
])
802 log(L_ERR
"KIF: Malformed message received");
806 name
= RTA_DATA(a
[IFLA_IFNAME
]);
807 mtu
= rta_get_u32(a
[IFLA_MTU
]);
810 master
= rta_get_u32(a
[IFLA_MASTER
]);
812 ifi
= if_find_by_index(i
->ifi_index
);
815 DBG("KIF: IF%d(%s) goes down\n", i
->ifi_index
, name
);
823 DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i
->ifi_index
, name
, mtu
, i
->ifi_flags
);
824 if (ifi
&& strncmp(ifi
->name
, name
, sizeof(ifi
->name
)-1))
827 strncpy(f
.name
, name
, sizeof(f
.name
)-1);
828 f
.index
= i
->ifi_index
;
831 f
.master_index
= master
;
832 f
.master
= if_find_by_index(master
);
836 f
.flags
|= IF_ADMIN_UP
;
837 if (fl
& IFF_LOWER_UP
)
838 f
.flags
|= IF_LINK_UP
;
839 if (fl
& IFF_LOOPBACK
) /* Loopback */
840 f
.flags
|= IF_MULTIACCESS
| IF_LOOPBACK
| IF_IGNORE
;
841 else if (fl
& IFF_POINTOPOINT
) /* PtP */
842 f
.flags
|= IF_MULTICAST
;
843 else if (fl
& IFF_BROADCAST
) /* Broadcast */
844 f
.flags
|= IF_MULTIACCESS
| IF_BROADCAST
| IF_MULTICAST
;
846 f
.flags
|= IF_MULTIACCESS
; /* NBMA */
848 if (fl
& IFF_MULTICAST
)
849 f
.flags
|= IF_MULTICAST
;
854 if_end_partial_update(ifi
);
859 nl_parse_addr4(struct ifaddrmsg
*i
, int scan
, int new)
861 struct rtattr
*a
[BIRD_IFA_MAX
];
866 if (!nl_parse_attrs(IFA_RTA(i
), ifa_attr_want4
, a
, sizeof(a
)))
871 log(L_ERR
"KIF: Malformed message received (missing IFA_LOCAL)");
876 log(L_ERR
"KIF: Malformed message received (missing IFA_ADDRESS)");
880 ifi
= if_find_by_index(i
->ifa_index
);
883 log(L_ERR
"KIF: Received address message for unknown interface %d", i
->ifa_index
);
888 ifa_flags
= rta_get_u32(a
[IFA_FLAGS
]);
890 ifa_flags
= i
->ifa_flags
;
893 bzero(&ifa
, sizeof(ifa
));
895 if (ifa_flags
& IFA_F_SECONDARY
)
896 ifa
.flags
|= IA_SECONDARY
;
898 ifa
.ip
= rta_get_ipa(a
[IFA_LOCAL
]);
900 if (i
->ifa_prefixlen
> IP4_MAX_PREFIX_LENGTH
)
902 log(L_ERR
"KIF: Invalid prefix length for interface %s: %d", ifi
->name
, i
->ifa_prefixlen
);
905 if (i
->ifa_prefixlen
== IP4_MAX_PREFIX_LENGTH
)
907 ifa
.brd
= rta_get_ipa(a
[IFA_ADDRESS
]);
908 net_fill_ip4(&ifa
.prefix
, rta_get_ip4(a
[IFA_ADDRESS
]), i
->ifa_prefixlen
);
910 /* It is either a host address or a peer address */
911 if (ipa_equal(ifa
.ip
, ifa
.brd
))
912 ifa
.flags
|= IA_HOST
;
915 ifa
.flags
|= IA_PEER
;
916 ifa
.opposite
= ifa
.brd
;
921 net_fill_ip4(&ifa
.prefix
, ipa_to_ip4(ifa
.ip
), i
->ifa_prefixlen
);
922 net_normalize(&ifa
.prefix
);
924 if (i
->ifa_prefixlen
== IP4_MAX_PREFIX_LENGTH
- 1)
925 ifa
.opposite
= ipa_opposite_m1(ifa
.ip
);
927 if (i
->ifa_prefixlen
== IP4_MAX_PREFIX_LENGTH
- 2)
928 ifa
.opposite
= ipa_opposite_m2(ifa
.ip
);
930 if ((ifi
->flags
& IF_BROADCAST
) && a
[IFA_BROADCAST
])
932 ip4_addr xbrd
= rta_get_ip4(a
[IFA_BROADCAST
]);
933 ip4_addr ybrd
= ip4_or(ipa_to_ip4(ifa
.ip
), ip4_not(ip4_mkmask(i
->ifa_prefixlen
)));
935 if (ip4_equal(xbrd
, net4_prefix(&ifa
.prefix
)) || ip4_equal(xbrd
, ybrd
))
936 ifa
.brd
= ipa_from_ip4(xbrd
);
937 else if (ifi
->flags
& IF_TMP_DOWN
) /* Complain only during the first scan */
939 log(L_ERR
"KIF: Invalid broadcast address %I4 for %s", xbrd
, ifi
->name
);
940 ifa
.brd
= ipa_from_ip4(ybrd
);
945 scope
= ipa_classify(ifa
.ip
);
948 log(L_ERR
"KIF: Invalid interface address %I for %s", ifa
.ip
, ifi
->name
);
951 ifa
.scope
= scope
& IADDR_SCOPE_MASK
;
953 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
954 ifi
->index
, ifi
->name
,
955 new ? "added" : "removed",
956 ifa
.ip
, ifa
.flags
, ifa
.prefix
, ifa
.brd
, ifa
.opposite
);
964 if_end_partial_update(ifi
);
968 nl_parse_addr6(struct ifaddrmsg
*i
, int scan
, int new)
970 struct rtattr
*a
[BIRD_IFA_MAX
];
975 if (!nl_parse_attrs(IFA_RTA(i
), ifa_attr_want6
, a
, sizeof(a
)))
980 log(L_ERR
"KIF: Malformed message received (missing IFA_ADDRESS)");
984 ifi
= if_find_by_index(i
->ifa_index
);
987 log(L_ERR
"KIF: Received address message for unknown interface %d", i
->ifa_index
);
992 ifa_flags
= rta_get_u32(a
[IFA_FLAGS
]);
994 ifa_flags
= i
->ifa_flags
;
997 bzero(&ifa
, sizeof(ifa
));
999 if (ifa_flags
& IFA_F_SECONDARY
)
1000 ifa
.flags
|= IA_SECONDARY
;
1002 /* Ignore tentative addresses silently */
1003 if (ifa_flags
& IFA_F_TENTATIVE
)
1006 /* IFA_LOCAL can be unset for IPv6 interfaces */
1007 ifa
.ip
= rta_get_ipa(a
[IFA_LOCAL
] ? : a
[IFA_ADDRESS
]);
1009 if (i
->ifa_prefixlen
> IP6_MAX_PREFIX_LENGTH
)
1011 log(L_ERR
"KIF: Invalid prefix length for interface %s: %d", ifi
->name
, i
->ifa_prefixlen
);
1014 if (i
->ifa_prefixlen
== IP6_MAX_PREFIX_LENGTH
)
1016 ifa
.brd
= rta_get_ipa(a
[IFA_ADDRESS
]);
1017 net_fill_ip6(&ifa
.prefix
, rta_get_ip6(a
[IFA_ADDRESS
]), i
->ifa_prefixlen
);
1019 /* It is either a host address or a peer address */
1020 if (ipa_equal(ifa
.ip
, ifa
.brd
))
1021 ifa
.flags
|= IA_HOST
;
1024 ifa
.flags
|= IA_PEER
;
1025 ifa
.opposite
= ifa
.brd
;
1030 net_fill_ip6(&ifa
.prefix
, ipa_to_ip6(ifa
.ip
), i
->ifa_prefixlen
);
1031 net_normalize(&ifa
.prefix
);
1033 if (i
->ifa_prefixlen
== IP6_MAX_PREFIX_LENGTH
- 1)
1034 ifa
.opposite
= ipa_opposite_m1(ifa
.ip
);
1037 scope
= ipa_classify(ifa
.ip
);
1040 log(L_ERR
"KIF: Invalid interface address %I for %s", ifa
.ip
, ifi
->name
);
1043 ifa
.scope
= scope
& IADDR_SCOPE_MASK
;
1045 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
1046 ifi
->index
, ifi
->name
,
1047 new ? "added" : "removed",
1048 ifa
.ip
, ifa
.flags
, ifa
.prefix
, ifa
.brd
, ifa
.opposite
);
1056 if_end_partial_update(ifi
);
1060 nl_parse_addr(struct nlmsghdr
*h
, int scan
)
1062 struct ifaddrmsg
*i
;
1064 if (!(i
= nl_checkin(h
, sizeof(*i
))))
1067 int new = (h
->nlmsg_type
== RTM_NEWADDR
);
1069 switch (i
->ifa_family
)
1072 return nl_parse_addr4(i
, scan
, new);
1075 return nl_parse_addr6(i
, scan
, new);
1080 kif_do_scan(struct kif_proto
*p UNUSED
)
1086 nl_request_dump(AF_UNSPEC
, RTM_GETLINK
);
1087 while (h
= nl_get_scan())
1088 if (h
->nlmsg_type
== RTM_NEWLINK
|| h
->nlmsg_type
== RTM_DELLINK
)
1089 nl_parse_link(h
, 1);
1091 log(L_DEBUG
"nl_scan_ifaces: Unknown packet received (type=%d)", h
->nlmsg_type
);
1093 /* Re-resolve master interface for slaves */
1095 WALK_LIST(i
, iface_list
)
1096 if (i
->master_index
)
1102 .master_index
= i
->master_index
,
1103 .master
= if_find_by_index(i
->master_index
)
1106 if (f
.master
!= i
->master
)
1108 memcpy(f
.name
, i
->name
, sizeof(f
.name
));
1113 nl_request_dump(AF_INET
, RTM_GETADDR
);
1114 while (h
= nl_get_scan())
1115 if (h
->nlmsg_type
== RTM_NEWADDR
|| h
->nlmsg_type
== RTM_DELADDR
)
1116 nl_parse_addr(h
, 1);
1118 log(L_DEBUG
"nl_scan_ifaces: Unknown packet received (type=%d)", h
->nlmsg_type
);
1120 nl_request_dump(AF_INET6
, RTM_GETADDR
);
1121 while (h
= nl_get_scan())
1122 if (h
->nlmsg_type
== RTM_NEWADDR
|| h
->nlmsg_type
== RTM_DELADDR
)
1123 nl_parse_addr(h
, 1);
1125 log(L_DEBUG
"nl_scan_ifaces: Unknown packet received (type=%d)", h
->nlmsg_type
);
1135 krt_table_id(struct krt_proto
*p
)
1137 return KRT_CF
->sys
.table_id
;
1140 static HASH(struct krt_proto
) nl_table_map
;
1142 #define RTH_KEY(p) p->af, krt_table_id(p)
1143 #define RTH_NEXT(p) p->sys.hash_next
1144 #define RTH_EQ(a1,i1,a2,i2) a1 == a2 && i1 == i2
1145 #define RTH_FN(a,i) a ^ u32_hash(i)
1147 #define RTH_REHASH rth_rehash
1148 #define RTH_PARAMS /8, *2, 2, 2, 6, 20
1150 HASH_DEFINE_REHASH_FN(RTH
, struct krt_proto
)
1161 case RTD_UNREACHABLE
:
1171 nh_bufsize(struct nexthop
*nh
)
1174 for (; nh
!= NULL
; nh
= nh
->next
)
1175 rv
+= RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr
)));
1180 nl_send_route(struct krt_proto
*p
, rte
*e
, int op
, int dest
, struct nexthop
*nh
)
1185 ea_list
*eattrs
= a
->eattrs
;
1186 int bufsize
= 128 + KRT_METRICS_MAX
*8 + nh_bufsize(&(a
->nh
));
1195 int rsize
= sizeof(*r
) + bufsize
;
1198 DBG("nl_send_route(%N,op=%x)\n", net
->n
.addr
, op
);
1200 bzero(&r
->h
, sizeof(r
->h
));
1201 bzero(&r
->r
, sizeof(r
->r
));
1202 r
->h
.nlmsg_type
= op
? RTM_NEWROUTE
: RTM_DELROUTE
;
1203 r
->h
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
1204 r
->h
.nlmsg_flags
= op
| NLM_F_REQUEST
| NLM_F_ACK
;
1206 r
->r
.rtm_family
= p
->af
;
1207 r
->r
.rtm_dst_len
= net_pxlen(net
->n
.addr
);
1208 r
->r
.rtm_protocol
= RTPROT_BIRD
;
1209 r
->r
.rtm_scope
= RT_SCOPE_NOWHERE
;
1210 #ifdef HAVE_MPLS_KERNEL
1211 if (p
->af
== AF_MPLS
)
1214 * Kernel MPLS code is a bit picky. We must:
1215 * 1) Always set RT_SCOPE_UNIVERSE and RTN_UNICAST (even for RTM_DELROUTE)
1216 * 2) Never use RTA_PRIORITY
1219 u32 label
= net_mpls(net
->n
.addr
);
1220 nl_add_attr_mpls(&r
->h
, rsize
, RTA_DST
, 1, &label
);
1221 r
->r
.rtm_scope
= RT_SCOPE_UNIVERSE
;
1222 r
->r
.rtm_type
= RTN_UNICAST
;
1227 nl_add_attr_ipa(&r
->h
, rsize
, RTA_DST
, net_prefix(net
->n
.addr
));
1229 /* Add source address for IPv6 SADR routes */
1230 if (net
->n
.addr
->type
== NET_IP6_SADR
)
1232 net_addr_ip6_sadr
*a
= (void *) &net
->n
.addr
;
1233 nl_add_attr_ip6(&r
->h
, rsize
, RTA_SRC
, a
->src_prefix
);
1234 r
->r
.rtm_src_len
= a
->src_pxlen
;
1239 * Strange behavior for RTM_DELROUTE:
1240 * 1) rtm_family is ignored in IPv6, works for IPv4
1241 * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
1242 * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
1245 if (krt_table_id(p
) < 256)
1246 r
->r
.rtm_table
= krt_table_id(p
);
1248 nl_add_attr_u32(&r
->h
, rsize
, RTA_TABLE
, krt_table_id(p
));
1250 if (p
->af
== AF_MPLS
)
1252 else if (a
->source
== RTS_DUMMY
)
1253 priority
= e
->u
.krt
.metric
;
1254 else if (KRT_CF
->sys
.metric
)
1255 priority
= KRT_CF
->sys
.metric
;
1256 else if ((op
!= NL_OP_DELETE
) && (ea
= ea_find(eattrs
, EA_KRT_METRIC
)))
1257 priority
= ea
->u
.data
;
1260 nl_add_attr_u32(&r
->h
, rsize
, RTA_PRIORITY
, priority
);
1262 /* For route delete, we do not specify remaining route attributes */
1263 if (op
== NL_OP_DELETE
)
1266 /* Default scope is LINK for device routes, UNIVERSE otherwise */
1267 if (p
->af
== AF_MPLS
)
1268 r
->r
.rtm_scope
= RT_SCOPE_UNIVERSE
;
1269 else if (ea
= ea_find(eattrs
, EA_KRT_SCOPE
))
1270 r
->r
.rtm_scope
= ea
->u
.data
;
1272 r
->r
.rtm_scope
= (dest
== RTD_UNICAST
&& ipa_zero(nh
->gw
)) ? RT_SCOPE_LINK
: RT_SCOPE_UNIVERSE
;
1274 if (ea
= ea_find(eattrs
, EA_KRT_PREFSRC
))
1275 nl_add_attr_ipa(&r
->h
, rsize
, RTA_PREFSRC
, *(ip_addr
*)ea
->u
.ptr
->data
);
1277 if (ea
= ea_find(eattrs
, EA_KRT_REALM
))
1278 nl_add_attr_u32(&r
->h
, rsize
, RTA_FLOW
, ea
->u
.data
);
1281 u32 metrics
[KRT_METRICS_MAX
];
1284 struct ea_walk_state ews
= { .eattrs
= eattrs
};
1285 while (ea
= ea_walk(&ews
, EA_KRT_METRICS
, KRT_METRICS_MAX
))
1287 int id
= ea
->id
- EA_KRT_METRICS
;
1288 metrics
[0] |= 1 << id
;
1289 metrics
[id
] = ea
->u
.data
;
1293 nl_add_metrics(&r
->h
, rsize
, metrics
, KRT_METRICS_MAX
);
1300 r
->r
.rtm_type
= RTN_UNICAST
;
1301 if (nh
->next
&& !krt_ecmp6(p
))
1302 nl_add_multipath(&r
->h
, rsize
, nh
, p
->af
);
1305 nl_add_attr_u32(&r
->h
, rsize
, RTA_OIF
, nh
->iface
->index
);
1306 nl_add_nexthop(&r
->h
, rsize
, nh
, p
->af
);
1308 if (nh
->flags
& RNF_ONLINK
)
1309 r
->r
.rtm_flags
|= RTNH_F_ONLINK
;
1313 r
->r
.rtm_type
= RTN_BLACKHOLE
;
1315 case RTD_UNREACHABLE
:
1316 r
->r
.rtm_type
= RTN_UNREACHABLE
;
1319 r
->r
.rtm_type
= RTN_PROHIBIT
;
1324 bug("krt_capable inconsistent with nl_send_route");
1327 /* Ignore missing for DELETE */
1328 return nl_exchange(&r
->h
, (op
== NL_OP_DELETE
));
1332 nl_add_rte(struct krt_proto
*p
, rte
*e
)
1337 if (krt_ecmp6(p
) && a
->nh
.next
)
1339 struct nexthop
*nh
= &(a
->nh
);
1341 err
= nl_send_route(p
, e
, NL_OP_ADD
, RTD_UNICAST
, nh
);
1345 for (nh
= nh
->next
; nh
; nh
= nh
->next
)
1346 err
+= nl_send_route(p
, e
, NL_OP_APPEND
, RTD_UNICAST
, nh
);
1351 return nl_send_route(p
, e
, NL_OP_ADD
, a
->dest
, &(a
->nh
));
1355 nl_delete_rte(struct krt_proto
*p
, rte
*e
)
1359 /* For IPv6, we just repeatedly request DELETE until we get error */
1361 err
= nl_send_route(p
, e
, NL_OP_DELETE
, RTD_NONE
, NULL
);
1362 while (krt_ecmp6(p
) && !err
);
1368 krt_replace_rte(struct krt_proto
*p
, net
*n
, rte
*new, rte
*old
)
1373 * We could use NL_OP_REPLACE, but route replace on Linux has some problems:
1375 * 1) Does not check for matching rtm_protocol
1376 * 2) Has broken semantics for IPv6 ECMP
1377 * 3) Crashes some kernel version when used for IPv6 ECMP
1379 * So we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the old
1380 * route value, so we do not try to optimize IPv6 ECMP reconfigurations.
1384 nl_delete_rte(p
, old
);
1387 err
= nl_add_rte(p
, new);
1390 n
->n
.flags
|= KRF_SYNC_ERROR
;
1392 n
->n
.flags
&= ~KRF_SYNC_ERROR
;
1396 nl_mergable_route(struct nl_parse_state
*s
, net
*net
, struct krt_proto
*p
, uint priority
, uint krt_type
)
1398 /* Route merging must be active */
1402 /* Saved and new route must have same network, proto/table, and priority */
1403 if ((s
->net
!= net
) || (s
->proto
!= p
) || (s
->krt_metric
!= priority
))
1406 /* Both must be regular unicast routes */
1407 if ((s
->krt_type
!= RTN_UNICAST
) || (krt_type
!= RTN_UNICAST
))
1414 nl_announce_route(struct nl_parse_state
*s
)
1416 rte
*e
= rte_get_temp(s
->attrs
);
1418 e
->u
.krt
.src
= s
->krt_src
;
1419 e
->u
.krt
.proto
= s
->krt_proto
;
1422 e
->u
.krt
.metric
= s
->krt_metric
;
1425 krt_got_route(s
->proto
, e
);
1427 krt_got_route_async(s
->proto
, e
, s
->new);
1436 nl_parse_begin(struct nl_parse_state
*s
, int scan
, int merge
)
1438 memset(s
, 0, sizeof (struct nl_parse_state
));
1439 s
->pool
= nl_linpool
;
1445 nl_parse_end(struct nl_parse_state
*s
)
1448 nl_announce_route(s
);
1452 #define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
1455 nl_parse_route(struct nl_parse_state
*s
, struct nlmsghdr
*h
)
1457 struct krt_proto
*p
;
1459 struct rtattr
*a
[BIRD_RTA_MAX
];
1460 int new = h
->nlmsg_type
== RTM_NEWROUTE
;
1462 net_addr dst
, src
= {};
1466 u32 def_scope
= RT_SCOPE_UNIVERSE
;
1469 if (!(i
= nl_checkin(h
, sizeof(*i
))))
1472 switch (i
->rtm_family
)
1475 if (!nl_parse_attrs(RTM_RTA(i
), rtm_attr_want4
, a
, sizeof(a
)))
1479 net_fill_ip4(&dst
, rta_get_ip4(a
[RTA_DST
]), i
->rtm_dst_len
);
1481 net_fill_ip4(&dst
, IP4_NONE
, 0);
1485 if (!nl_parse_attrs(RTM_RTA(i
), rtm_attr_want6
, a
, sizeof(a
)))
1489 net_fill_ip6(&dst
, rta_get_ip6(a
[RTA_DST
]), i
->rtm_dst_len
);
1491 net_fill_ip6(&dst
, IP6_NONE
, 0);
1494 net_fill_ip6(&src
, rta_get_ip6(a
[RTA_SRC
]), i
->rtm_src_len
);
1496 net_fill_ip6(&src
, IP6_NONE
, 0);
1499 #ifdef HAVE_MPLS_KERNEL
1501 if (!nl_parse_attrs(RTM_RTA(i
), rtm_attr_want_mpls
, a
, sizeof(a
)))
1505 SKIP("MPLS route without RTA_DST");
1507 if (rta_get_mpls(a
[RTA_DST
], rta_mpls_stack
) != 1)
1508 SKIP("MPLS route with multi-label RTA_DST");
1510 net_fill_mpls(&dst
, rta_mpls_stack
[0]);
1519 oif
= rta_get_u32(a
[RTA_OIF
]);
1522 table_id
= rta_get_u32(a
[RTA_TABLE
]);
1524 table_id
= i
->rtm_table
;
1526 /* Do we know this table? */
1527 p
= HASH_FIND(nl_table_map
, RTH
, i
->rtm_family
, table_id
);
1529 SKIP("unknown table %d\n", table
);
1531 if (a
[RTA_SRC
] && (p
->p
.net_type
!= NET_IP6_SADR
))
1532 SKIP("src prefix for non-SADR channel\n");
1537 if (i
->rtm_tos
!= 0) /* We don't support TOS */
1538 SKIP("TOS %02x\n", i
->rtm_tos
);
1540 if (s
->scan
&& !new)
1541 SKIP("RTM_DELROUTE in scan\n");
1543 if (a
[RTA_PRIORITY
])
1544 priority
= rta_get_u32(a
[RTA_PRIORITY
]);
1546 int c
= net_classify(&dst
);
1547 if ((c
< 0) || !(c
& IADDR_HOST
) || ((c
& IADDR_SCOPE_MASK
) <= SCOPE_LINK
))
1548 SKIP("strange class/scope\n");
1550 switch (i
->rtm_protocol
)
1553 SKIP("proto unspec\n");
1555 case RTPROT_REDIRECT
:
1556 krt_src
= KRT_SRC_REDIRECT
;
1560 krt_src
= KRT_SRC_KERNEL
;
1566 krt_src
= KRT_SRC_BIRD
;
1571 krt_src
= KRT_SRC_ALIEN
;
1575 if (p
->p
.net_type
== NET_IP6_SADR
)
1577 n
= alloca(sizeof(net_addr_ip6_sadr
));
1578 net_fill_ip6_sadr(n
, net6_prefix(&dst
), net6_pxlen(&dst
),
1579 net6_prefix(&src
), net6_pxlen(&src
));
1582 net
*net
= net_get(p
->p
.main_channel
->table
, n
);
1584 if (s
->net
&& !nl_mergable_route(s
, net
, p
, priority
, i
->rtm_type
))
1585 nl_announce_route(s
);
1587 rta
*ra
= lp_allocz(s
->pool
, RTA_MAX_SIZE
);
1588 ra
->src
= p
->p
.main_source
;
1589 ra
->source
= RTS_INHERIT
;
1590 ra
->scope
= SCOPE_UNIVERSE
;
1592 switch (i
->rtm_type
)
1595 ra
->dest
= RTD_UNICAST
;
1597 if (a
[RTA_MULTIPATH
])
1599 struct nexthop
*nh
= nl_parse_multipath(s
, p
, a
[RTA_MULTIPATH
], i
->rtm_family
);
1602 log(L_ERR
"KRT: Received strange multipath route %N", net
->n
.addr
);
1610 ra
->nh
.iface
= if_find_by_index(oif
);
1613 log(L_ERR
"KRT: Received route %N with unknown ifindex %u", net
->n
.addr
, oif
);
1617 if ((i
->rtm_family
!= AF_MPLS
) && a
[RTA_GATEWAY
]
1618 #ifdef HAVE_MPLS_KERNEL
1619 || (i
->rtm_family
== AF_MPLS
) && a
[RTA_VIA
]
1623 #ifdef HAVE_MPLS_KERNEL
1624 if (i
->rtm_family
== AF_MPLS
)
1625 ra
->nh
.gw
= rta_get_via(a
[RTA_VIA
]);
1628 ra
->nh
.gw
= rta_get_ipa(a
[RTA_GATEWAY
]);
1630 /* Silently skip strange 6to4 routes */
1631 const net_addr_ip6 sit
= NET_ADDR_IP6(IP6_NONE
, 96);
1632 if ((i
->rtm_family
== AF_INET6
) && ipa_in_netX(ra
->nh
.gw
, (net_addr
*) &sit
))
1635 if (i
->rtm_flags
& RTNH_F_ONLINK
)
1636 ra
->nh
.flags
|= RNF_ONLINK
;
1639 nbr
= neigh_find(&p
->p
, ra
->nh
.gw
, ra
->nh
.iface
,
1640 (ra
->nh
.flags
& RNF_ONLINK
) ? NEF_ONLINK
: 0);
1641 if (!nbr
|| (nbr
->scope
== SCOPE_HOST
))
1643 log(L_ERR
"KRT: Received route %N with strange next-hop %I", net
->n
.addr
,
1651 ra
->dest
= RTD_BLACKHOLE
;
1653 case RTN_UNREACHABLE
:
1654 ra
->dest
= RTD_UNREACHABLE
;
1657 ra
->dest
= RTD_PROHIBIT
;
1659 /* FIXME: What about RTN_THROW? */
1661 SKIP("type %d\n", i
->rtm_type
);
1665 #ifdef HAVE_MPLS_KERNEL
1667 if ((i
->rtm_family
== AF_MPLS
) && a
[RTA_NEWDST
] && !ra
->nh
.next
)
1668 labels
= rta_get_mpls(a
[RTA_NEWDST
], ra
->nh
.label
);
1670 if (a
[RTA_ENCAP
] && a
[RTA_ENCAP_TYPE
] && !ra
->nh
.next
)
1672 switch (rta_get_u16(a
[RTA_ENCAP_TYPE
]))
1674 case LWTUNNEL_ENCAP_MPLS
:
1676 struct rtattr
*enca
[BIRD_RTA_MAX
];
1677 nl_attr_len
= RTA_PAYLOAD(a
[RTA_ENCAP
]);
1678 nl_parse_attrs(RTA_DATA(a
[RTA_ENCAP
]), encap_mpls_want
, enca
, sizeof(enca
));
1679 labels
= rta_get_mpls(enca
[RTA_DST
], ra
->nh
.label
);
1683 SKIP("unknown encapsulation method %d\n", rta_get_u16(a
[RTA_ENCAP_TYPE
]));
1690 log(L_WARN
"KRT: Too long MPLS stack received, ignoring.");
1694 ra
->nh
.labels
= labels
;
1697 if (i
->rtm_scope
!= def_scope
)
1699 ea_list
*ea
= lp_alloc(s
->pool
, sizeof(ea_list
) + sizeof(eattr
));
1700 ea
->next
= ra
->eattrs
;
1702 ea
->flags
= EALF_SORTED
;
1704 ea
->attrs
[0].id
= EA_KRT_SCOPE
;
1705 ea
->attrs
[0].flags
= 0;
1706 ea
->attrs
[0].type
= EAF_TYPE_INT
;
1707 ea
->attrs
[0].u
.data
= i
->rtm_scope
;
1712 ip_addr ps
= rta_get_ipa(a
[RTA_PREFSRC
]);
1714 ea_list
*ea
= lp_alloc(s
->pool
, sizeof(ea_list
) + sizeof(eattr
));
1715 ea
->next
= ra
->eattrs
;
1717 ea
->flags
= EALF_SORTED
;
1719 ea
->attrs
[0].id
= EA_KRT_PREFSRC
;
1720 ea
->attrs
[0].flags
= 0;
1721 ea
->attrs
[0].type
= EAF_TYPE_IP_ADDRESS
;
1722 ea
->attrs
[0].u
.ptr
= lp_alloc(s
->pool
, sizeof(struct adata
) + sizeof(ps
));
1723 ea
->attrs
[0].u
.ptr
->length
= sizeof(ps
);
1724 memcpy(ea
->attrs
[0].u
.ptr
->data
, &ps
, sizeof(ps
));
1729 ea_list
*ea
= lp_alloc(s
->pool
, sizeof(ea_list
) + sizeof(eattr
));
1730 ea
->next
= ra
->eattrs
;
1732 ea
->flags
= EALF_SORTED
;
1734 ea
->attrs
[0].id
= EA_KRT_REALM
;
1735 ea
->attrs
[0].flags
= 0;
1736 ea
->attrs
[0].type
= EAF_TYPE_INT
;
1737 ea
->attrs
[0].u
.data
= rta_get_u32(a
[RTA_FLOW
]);
1742 u32 metrics
[KRT_METRICS_MAX
];
1743 ea_list
*ea
= lp_alloc(s
->pool
, sizeof(ea_list
) + KRT_METRICS_MAX
* sizeof(eattr
));
1746 if (nl_parse_metrics(a
[RTA_METRICS
], metrics
, ARRAY_SIZE(metrics
)) < 0)
1748 log(L_ERR
"KRT: Received route %N with strange RTA_METRICS attribute", net
->n
.addr
);
1752 for (t
= 1; t
< KRT_METRICS_MAX
; t
++)
1753 if (metrics
[0] & (1 << t
))
1755 ea
->attrs
[n
].id
= EA_CODE(PROTOCOL_KERNEL
, KRT_METRICS_OFFSET
+ t
);
1756 ea
->attrs
[n
].flags
= 0;
1757 ea
->attrs
[n
].type
= EAF_TYPE_INT
; /* FIXME: Some are EAF_TYPE_BITFIELD */
1758 ea
->attrs
[n
].u
.data
= metrics
[t
];
1764 ea
->next
= ra
->eattrs
;
1765 ea
->flags
= EALF_SORTED
;
1772 * Ideally, now we would send the received route to the rest of kernel code.
1773 * But IPv6 ECMP routes before 4.11 are sent as a sequence of routes, so we
1774 * postpone it and merge next hops until the end of the sequence. Note that
1775 * when doing merging of next hops, we expect the new route to be unipath.
1776 * Otherwise, we ignore additional next hops in nexthop_insert().
1781 /* Store the new route */
1786 s
->krt_src
= krt_src
;
1787 s
->krt_type
= i
->rtm_type
;
1788 s
->krt_proto
= i
->rtm_protocol
;
1789 s
->krt_metric
= priority
;
1793 /* Merge next hops with the stored route */
1796 struct nexthop
*nhs
= &oa
->nh
;
1797 nexthop_insert(&nhs
, &ra
->nh
);
1799 /* Perhaps new nexthop is inserted at the first position */
1805 /* Keep old eattrs */
1806 ra
->eattrs
= oa
->eattrs
;
1812 krt_do_scan(struct krt_proto
*p UNUSED
) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
1815 struct nl_parse_state s
;
1817 nl_parse_begin(&s
, 1, 0);
1818 nl_request_dump(AF_INET
, RTM_GETROUTE
);
1819 while (h
= nl_get_scan())
1820 if (h
->nlmsg_type
== RTM_NEWROUTE
|| h
->nlmsg_type
== RTM_DELROUTE
)
1821 nl_parse_route(&s
, h
);
1823 log(L_DEBUG
"nl_scan_fire: Unknown packet received (type=%d)", h
->nlmsg_type
);
1826 nl_parse_begin(&s
, 1, 1);
1827 nl_request_dump(AF_INET6
, RTM_GETROUTE
);
1828 while (h
= nl_get_scan())
1829 if (h
->nlmsg_type
== RTM_NEWROUTE
|| h
->nlmsg_type
== RTM_DELROUTE
)
1830 nl_parse_route(&s
, h
);
1832 log(L_DEBUG
"nl_scan_fire: Unknown packet received (type=%d)", h
->nlmsg_type
);
1835 #ifdef HAVE_MPLS_KERNEL
1836 nl_parse_begin(&s
, 1, 1);
1837 nl_request_dump(AF_MPLS
, RTM_GETROUTE
);
1838 while (h
= nl_get_scan())
1839 if (h
->nlmsg_type
== RTM_NEWROUTE
|| h
->nlmsg_type
== RTM_DELROUTE
)
1840 nl_parse_route(&s
, h
);
1842 log(L_DEBUG
"nl_scan_fire: Unknown packet received (type=%d)", h
->nlmsg_type
);
1848 * Asynchronous Netlink interface
1851 static sock
*nl_async_sk
; /* BIRD socket for asynchronous notifications */
1852 static byte
*nl_async_rx_buffer
; /* Receive buffer */
1855 nl_async_msg(struct nlmsghdr
*h
)
1857 struct nl_parse_state s
;
1859 switch (h
->nlmsg_type
)
1863 DBG("KRT: Received async route notification (%d)\n", h
->nlmsg_type
);
1864 nl_parse_begin(&s
, 0, 0);
1865 nl_parse_route(&s
, h
);
1870 DBG("KRT: Received async link notification (%d)\n", h
->nlmsg_type
);
1872 nl_parse_link(h
, 0);
1876 DBG("KRT: Received async address notification (%d)\n", h
->nlmsg_type
);
1878 nl_parse_addr(h
, 0);
1881 DBG("KRT: Received unknown async notification (%d)\n", h
->nlmsg_type
);
1886 nl_async_hook(sock
*sk
, uint size UNUSED
)
1888 struct iovec iov
= { nl_async_rx_buffer
, NL_RX_SIZE
};
1889 struct sockaddr_nl sa
;
1892 .msg_namelen
= sizeof(sa
),
1900 x
= recvmsg(sk
->fd
, &m
, 0);
1903 if (errno
== ENOBUFS
)
1906 * Netlink reports some packets have been thrown away.
1907 * One day we might react to it by asking for route table
1908 * scan in near future.
1910 log(L_WARN
"Kernel dropped some netlink messages, will resync on next scan.");
1911 return 1; /* More data are likely to be ready */
1913 else if (errno
!= EWOULDBLOCK
)
1914 log(L_ERR
"Netlink recvmsg: %m");
1917 if (sa
.nl_pid
) /* It isn't from the kernel */
1919 DBG("Non-kernel packet\n");
1922 h
= (void *) nl_async_rx_buffer
;
1924 if (m
.msg_flags
& MSG_TRUNC
)
1926 log(L_WARN
"Netlink got truncated asynchronous message");
1929 while (NLMSG_OK(h
, len
))
1932 h
= NLMSG_NEXT(h
, len
);
1935 log(L_WARN
"nl_async_hook: Found packet remnant of size %d", len
);
1940 nl_async_err_hook(sock
*sk
, int e UNUSED
)
1942 nl_async_hook(sk
, 0);
1949 struct sockaddr_nl sa
;
1955 DBG("KRT: Opening async netlink socket\n");
1957 fd
= socket(PF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
1960 log(L_ERR
"Unable to open asynchronous rtnetlink socket: %m");
1964 bzero(&sa
, sizeof(sa
));
1965 sa
.nl_family
= AF_NETLINK
;
1966 sa
.nl_groups
= RTMGRP_LINK
|
1967 RTMGRP_IPV4_IFADDR
| RTMGRP_IPV4_ROUTE
|
1968 RTMGRP_IPV6_IFADDR
| RTMGRP_IPV6_ROUTE
;
1970 if (bind(fd
, (struct sockaddr
*) &sa
, sizeof(sa
)) < 0)
1972 log(L_ERR
"Unable to bind asynchronous rtnetlink socket: %m");
1977 nl_async_rx_buffer
= xmalloc(NL_RX_SIZE
);
1979 sk
= nl_async_sk
= sk_new(krt_pool
);
1980 sk
->type
= SK_MAGIC
;
1981 sk
->rx_hook
= nl_async_hook
;
1982 sk
->err_hook
= nl_async_err_hook
;
1984 if (sk_open(sk
) < 0)
1985 bug("Netlink: sk_open failed");
1990 * Interface to the UNIX krt module
1994 krt_sys_io_init(void)
1996 nl_linpool
= lp_new_default(krt_pool
);
1997 HASH_INIT(nl_table_map
, krt_pool
, 6);
2001 krt_sys_start(struct krt_proto
*p
)
2003 struct krt_proto
*old
= HASH_FIND(nl_table_map
, RTH
, p
->af
, krt_table_id(p
));
2007 log(L_ERR
"%s: Kernel table %u already registered by %s",
2008 p
->p
.name
, krt_table_id(p
), old
->p
.name
);
2012 HASH_INSERT2(nl_table_map
, RTH
, krt_pool
, p
);
2021 krt_sys_shutdown(struct krt_proto
*p
)
2023 HASH_REMOVE2(nl_table_map
, RTH
, krt_pool
, p
);
2027 krt_sys_reconfigure(struct krt_proto
*p UNUSED
, struct krt_config
*n
, struct krt_config
*o
)
2029 return (n
->sys
.table_id
== o
->sys
.table_id
) && (n
->sys
.metric
== o
->sys
.metric
);
2033 krt_sys_init_config(struct krt_config
*cf
)
2035 cf
->sys
.table_id
= RT_TABLE_MAIN
;
2036 cf
->sys
.metric
= 32;
2040 krt_sys_copy_config(struct krt_config
*d
, struct krt_config
*s
)
2042 d
->sys
.table_id
= s
->sys
.table_id
;
2043 d
->sys
.metric
= s
->sys
.metric
;
2046 static const char *krt_metrics_names
[KRT_METRICS_MAX
] = {
2047 NULL
, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
2048 "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
2051 static const char *krt_features_names
[KRT_FEATURES_MAX
] = {
2052 "ecn", NULL
, NULL
, "allfrag"
2056 krt_sys_get_attr(eattr
*a
, byte
*buf
, int buflen UNUSED
)
2060 case EA_KRT_PREFSRC
:
2061 bsprintf(buf
, "prefsrc");
2065 bsprintf(buf
, "realm");
2069 bsprintf(buf
, "scope");
2073 buf
+= bsprintf(buf
, "lock:");
2074 ea_format_bitfield(a
, buf
, buflen
, krt_metrics_names
, 2, KRT_METRICS_MAX
);
2077 case EA_KRT_FEATURES
:
2078 buf
+= bsprintf(buf
, "features:");
2079 ea_format_bitfield(a
, buf
, buflen
, krt_features_names
, 0, KRT_FEATURES_MAX
);
2083 int id
= (int)EA_ID(a
->id
) - KRT_METRICS_OFFSET
;
2084 if (id
> 0 && id
< KRT_METRICS_MAX
)
2086 bsprintf(buf
, "%s", krt_metrics_names
[id
]);
2097 kif_sys_start(struct kif_proto
*p UNUSED
)
2104 kif_sys_shutdown(struct kif_proto
*p UNUSED
)
2109 kif_update_sysdep_addr(struct iface
*i UNUSED
)