2 * BIRD -- Linux Netlink Interface
4 * (c) 1999--2000 Martin Mares <mj@ucw.cz>
6 * Can be freely distributed and used under the terms of the GNU GPL.
12 #include <sys/socket.h>
18 #include "nest/bird.h"
19 #include "nest/route.h"
20 #include "nest/protocol.h"
21 #include "nest/iface.h"
22 #include "lib/alloca.h"
23 #include "lib/timer.h"
26 #include "lib/socket.h"
27 #include "lib/string.h"
29 #include "conf/conf.h"
31 #include <asm/types.h>
33 #include <linux/netlink.h>
34 #include <linux/rtnetlink.h>
37 #ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */
38 #define MSG_TRUNC 0x20
42 #define IFF_LOWER_UP 0x10000
51 * Synchronous Netlink interface
58 byte
*rx_buffer
; /* Receive buffer */
59 struct nlmsghdr
*last_hdr
; /* Recently received packet */
63 #define NL_RX_SIZE 8192
65 static struct nl_sock nl_scan
= {.fd
= -1}; /* Netlink socket for synchronous scan */
66 static struct nl_sock nl_req
= {.fd
= -1}; /* Netlink socket for requests */
69 nl_open_sock(struct nl_sock
*nl
)
73 nl
->fd
= socket(PF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
75 die("Unable to open rtnetlink socket: %m");
77 nl
->rx_buffer
= xmalloc(NL_RX_SIZE
);
86 nl_open_sock(&nl_scan
);
87 nl_open_sock(&nl_req
);
91 nl_send(struct nl_sock
*nl
, struct nlmsghdr
*nh
)
93 struct sockaddr_nl sa
;
95 memset(&sa
, 0, sizeof(sa
));
96 sa
.nl_family
= AF_NETLINK
;
98 nh
->nlmsg_seq
= ++(nl
->seq
);
99 if (sendto(nl
->fd
, nh
, nh
->nlmsg_len
, 0, (struct sockaddr
*)&sa
, sizeof(sa
)) < 0)
100 die("rtnetlink sendto: %m");
105 nl_request_dump(int af
, int cmd
)
111 .nh
.nlmsg_type
= cmd
,
112 .nh
.nlmsg_len
= sizeof(req
),
113 .nh
.nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
,
116 nl_send(&nl_scan
, &req
.nh
);
119 static struct nlmsghdr
*
120 nl_get_reply(struct nl_sock
*nl
)
126 struct iovec iov
= { nl
->rx_buffer
, NL_RX_SIZE
};
127 struct sockaddr_nl sa
;
128 struct msghdr m
= { (struct sockaddr
*) &sa
, sizeof(sa
), &iov
, 1, NULL
, 0, 0 };
129 int x
= recvmsg(nl
->fd
, &m
, 0);
131 die("nl_get_reply: %m");
132 if (sa
.nl_pid
) /* It isn't from the kernel */
134 DBG("Non-kernel packet\n");
138 nl
->last_hdr
= (void *) nl
->rx_buffer
;
139 if (m
.msg_flags
& MSG_TRUNC
)
140 bug("nl_get_reply: got truncated reply which should be impossible");
142 if (NLMSG_OK(nl
->last_hdr
, nl
->last_size
))
144 struct nlmsghdr
*h
= nl
->last_hdr
;
145 nl
->last_hdr
= NLMSG_NEXT(h
, nl
->last_size
);
146 if (h
->nlmsg_seq
!= nl
->seq
)
148 log(L_WARN
"nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
149 h
->nlmsg_seq
, nl
->seq
);
155 log(L_WARN
"nl_get_reply: Found packet remnant of size %d", nl
->last_size
);
160 static struct tbf rl_netlink_err
= TBF_DEFAULT_LOG_LIMITS
;
163 nl_error(struct nlmsghdr
*h
)
168 if (h
->nlmsg_len
< NLMSG_LENGTH(sizeof(struct nlmsgerr
)))
170 log(L_WARN
"Netlink: Truncated error message received");
173 e
= (struct nlmsgerr
*) NLMSG_DATA(h
);
176 log_rl(&rl_netlink_err
, L_WARN
"Netlink: %s", strerror(ec
));
180 static struct nlmsghdr
*
183 struct nlmsghdr
*h
= nl_get_reply(&nl_scan
);
185 if (h
->nlmsg_type
== NLMSG_DONE
)
187 if (h
->nlmsg_type
== NLMSG_ERROR
)
196 nl_exchange(struct nlmsghdr
*pkt
)
200 nl_send(&nl_req
, pkt
);
203 h
= nl_get_reply(&nl_req
);
204 if (h
->nlmsg_type
== NLMSG_ERROR
)
206 log(L_WARN
"nl_exchange: Unexpected reply received");
208 return nl_error(h
) ? -1 : 0;
215 static int nl_attr_len
;
218 nl_checkin(struct nlmsghdr
*h
, int lsize
)
220 nl_attr_len
= h
->nlmsg_len
- NLMSG_LENGTH(lsize
);
223 log(L_ERR
"nl_checkin: underrun by %d bytes", -nl_attr_len
);
226 return NLMSG_DATA(h
);
229 struct nl_want_attrs
{
236 #define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
238 static struct nl_want_attrs ifla_attr_want
[BIRD_IFLA_MAX
] = {
239 [IFLA_IFNAME
] = { 1, 0, 0 },
240 [IFLA_MTU
] = { 1, 1, sizeof(u32
) },
241 [IFLA_WIRELESS
] = { 1, 0, 0 },
245 #define BIRD_IFA_MAX (IFA_ANYCAST+1)
248 static struct nl_want_attrs ifa_attr_want4
[BIRD_IFA_MAX
] = {
249 [IFA_ADDRESS
] = { 1, 1, sizeof(ip4_addr
) },
250 [IFA_LOCAL
] = { 1, 1, sizeof(ip4_addr
) },
251 [IFA_BROADCAST
] = { 1, 1, sizeof(ip4_addr
) },
254 static struct nl_want_attrs ifa_attr_want6
[BIRD_IFA_MAX
] = {
255 [IFA_ADDRESS
] = { 1, 1, sizeof(ip6_addr
) },
256 [IFA_LOCAL
] = { 1, 1, sizeof(ip6_addr
) },
261 #define BIRD_RTA_MAX (RTA_TABLE+1)
263 static struct nl_want_attrs mpnh_attr_want4
[BIRD_RTA_MAX
] = {
264 [RTA_GATEWAY
] = { 1, 1, sizeof(ip4_addr
) },
268 static struct nl_want_attrs rtm_attr_want4
[BIRD_RTA_MAX
] = {
269 [RTA_DST
] = { 1, 1, sizeof(ip4_addr
) },
270 [RTA_OIF
] = { 1, 1, sizeof(u32
) },
271 [RTA_GATEWAY
] = { 1, 1, sizeof(ip4_addr
) },
272 [RTA_PRIORITY
] = { 1, 1, sizeof(u32
) },
273 [RTA_PREFSRC
] = { 1, 1, sizeof(ip4_addr
) },
274 [RTA_METRICS
] = { 1, 0, 0 },
275 [RTA_MULTIPATH
] = { 1, 0, 0 },
276 [RTA_FLOW
] = { 1, 1, sizeof(u32
) },
277 [RTA_TABLE
] = { 1, 1, sizeof(u32
) },
280 static struct nl_want_attrs rtm_attr_want6
[BIRD_RTA_MAX
] = {
281 [RTA_DST
] = { 1, 1, sizeof(ip6_addr
) },
282 [RTA_IIF
] = { 1, 1, sizeof(u32
) },
283 [RTA_OIF
] = { 1, 1, sizeof(u32
) },
284 [RTA_GATEWAY
] = { 1, 1, sizeof(ip6_addr
) },
285 [RTA_PRIORITY
] = { 1, 1, sizeof(u32
) },
286 [RTA_PREFSRC
] = { 1, 1, sizeof(ip6_addr
) },
287 [RTA_METRICS
] = { 1, 0, 0 },
288 [RTA_FLOW
] = { 1, 1, sizeof(u32
) },
289 [RTA_TABLE
] = { 1, 1, sizeof(u32
) },
295 nl_parse_attrs(struct rtattr
*a
, struct nl_want_attrs
*want
, struct rtattr
**k
, int ksize
)
297 int max
= ksize
/ sizeof(struct rtattr
*);
300 for ( ; RTA_OK(a
, nl_attr_len
); a
= RTA_NEXT(a
, nl_attr_len
))
302 if ((a
->rta_type
>= max
) || !want
[a
->rta_type
].defined
)
305 if (want
[a
->rta_type
].checksize
&& (RTA_PAYLOAD(a
) != want
[a
->rta_type
].size
))
307 log(L_ERR
"nl_parse_attrs: Malformed message received");
316 log(L_ERR
"nl_parse_attrs: remnant of size %d", nl_attr_len
);
323 static inline u32
rta_get_u32(struct rtattr
*a
)
324 { return *(u32
*) RTA_DATA(a
); }
326 static inline ip4_addr
rta_get_ip4(struct rtattr
*a
)
327 { return ip4_ntoh(*(ip4_addr
*) RTA_DATA(a
)); }
329 static inline ip6_addr
rta_get_ip6(struct rtattr
*a
)
330 { return ip6_ntoh(*(ip6_addr
*) RTA_DATA(a
)); }
334 nl_add_attr(struct nlmsghdr
*h
, uint bufsize
, uint code
, const void *data
, uint dlen
)
336 uint pos
= NLMSG_ALIGN(h
->nlmsg_len
);
337 uint len
= RTA_LENGTH(dlen
);
339 if (pos
+ len
> bufsize
)
340 bug("nl_add_attr: packet buffer overflow");
342 struct rtattr
*a
= (struct rtattr
*)((char *)h
+ pos
);
345 h
->nlmsg_len
= pos
+ len
;
348 memcpy(RTA_DATA(a
), data
, dlen
);
354 nl_add_attr_u32(struct nlmsghdr
*h
, unsigned bufsize
, int code
, u32 data
)
356 nl_add_attr(h
, bufsize
, code
, &data
, 4);
360 nl_add_attr_ipa(struct nlmsghdr
*h
, unsigned bufsize
, int code
, ip_addr ipa
)
363 nl_add_attr(h
, bufsize
, code
, &ipa
, sizeof(ipa
));
366 static inline struct rtattr
*
367 nl_open_attr(struct nlmsghdr
*h
, uint bufsize
, uint code
)
369 return nl_add_attr(h
, bufsize
, code
, NULL
, 0);
373 nl_close_attr(struct nlmsghdr
*h
, struct rtattr
*a
)
375 a
->rta_len
= (void *)h
+ NLMSG_ALIGN(h
->nlmsg_len
) - (void *)a
;
378 static inline struct rtnexthop
*
379 nl_open_nexthop(struct nlmsghdr
*h
, uint bufsize
)
381 uint pos
= NLMSG_ALIGN(h
->nlmsg_len
);
382 uint len
= RTNH_LENGTH(0);
384 if (pos
+ len
> bufsize
)
385 bug("nl_open_nexthop: packet buffer overflow");
387 h
->nlmsg_len
= pos
+ len
;
389 return (void *)h
+ pos
;
393 nl_close_nexthop(struct nlmsghdr
*h
, struct rtnexthop
*nh
)
395 nh
->rtnh_len
= (void *)h
+ NLMSG_ALIGN(h
->nlmsg_len
) - (void *)nh
;
399 nl_add_multipath(struct nlmsghdr
*h
, unsigned bufsize
, struct mpnh
*nh
)
401 struct rtattr
*a
= nl_open_attr(h
, bufsize
, RTA_MULTIPATH
);
403 for (; nh
; nh
= nh
->next
)
405 struct rtnexthop
*rtnh
= nl_open_nexthop(h
, bufsize
);
407 rtnh
->rtnh_flags
= 0;
408 rtnh
->rtnh_hops
= nh
->weight
;
409 rtnh
->rtnh_ifindex
= nh
->iface
->index
;
411 nl_add_attr_ipa(h
, bufsize
, RTA_GATEWAY
, nh
->gw
);
413 nl_close_nexthop(h
, rtnh
);
420 nl_parse_multipath(struct krt_proto
*p
, struct rtattr
*ra
)
422 /* Temporary buffer for multicast nexthops */
423 static struct mpnh
*nh_buffer
;
424 static int nh_buf_size
; /* in number of structures */
425 static int nh_buf_used
;
427 struct rtattr
*a
[BIRD_RTA_MAX
];
428 struct rtnexthop
*nh
= RTA_DATA(ra
);
429 struct mpnh
*rv
, *first
, **last
;
430 int len
= RTA_PAYLOAD(ra
);
438 /* Use RTNH_OK(nh,len) ?? */
439 if ((len
< sizeof(*nh
)) || (len
< nh
->rtnh_len
))
442 if (nh_buf_used
== nh_buf_size
)
444 nh_buf_size
= nh_buf_size
? (nh_buf_size
* 2) : 4;
445 nh_buffer
= xrealloc(nh_buffer
, nh_buf_size
* sizeof(struct mpnh
));
447 *last
= rv
= nh_buffer
+ nh_buf_used
++;
451 rv
->weight
= nh
->rtnh_hops
;
452 rv
->iface
= if_find_by_index(nh
->rtnh_ifindex
);
456 /* Nonexistent RTNH_PAYLOAD ?? */
457 nl_attr_len
= nh
->rtnh_len
- RTNH_LENGTH(0);
458 nl_parse_attrs(RTNH_DATA(nh
), mpnh_attr_want4
, a
, sizeof(a
));
461 memcpy(&rv
->gw
, RTA_DATA(a
[RTA_GATEWAY
]), sizeof(ip_addr
));
464 neighbor
*ng
= neigh_find2(&p
->p
, &rv
->gw
, rv
->iface
,
465 (nh
->rtnh_flags
& RTNH_F_ONLINK
) ? NEF_ONLINK
: 0);
466 if (!ng
|| (ng
->scope
== SCOPE_HOST
))
472 len
-= NLMSG_ALIGN(nh
->rtnh_len
);
480 nl_add_metrics(struct nlmsghdr
*h
, uint bufsize
, u32
*metrics
, int max
)
482 struct rtattr
*a
= nl_open_attr(h
, bufsize
, RTA_METRICS
);
485 for (t
= 1; t
< max
; t
++)
486 if (metrics
[0] & (1 << t
))
487 nl_add_attr_u32(h
, bufsize
, t
, metrics
[t
]);
493 nl_parse_metrics(struct rtattr
*hdr
, u32
*metrics
, int max
)
495 struct rtattr
*a
= RTA_DATA(hdr
);
496 int len
= RTA_PAYLOAD(hdr
);
499 for (; RTA_OK(a
, len
); a
= RTA_NEXT(a
, len
))
501 if (a
->rta_type
== RTA_UNSPEC
)
504 if (a
->rta_type
>= max
)
507 if (RTA_PAYLOAD(a
) != 4)
510 metrics
[0] |= 1 << a
->rta_type
;
511 metrics
[a
->rta_type
] = rta_get_u32(a
);
522 * Scanning of interfaces
526 nl_parse_link(struct nlmsghdr
*h
, int scan
)
529 struct rtattr
*a
[BIRD_IFLA_MAX
];
530 int new = h
->nlmsg_type
== RTM_NEWLINK
;
537 if (!(i
= nl_checkin(h
, sizeof(*i
))) || !nl_parse_attrs(IFLA_RTA(i
), ifla_attr_want
, a
, sizeof(a
)))
539 if (!a
[IFLA_IFNAME
] || (RTA_PAYLOAD(a
[IFLA_IFNAME
]) < 2) || !a
[IFLA_MTU
])
542 * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
543 * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
544 * We simply ignore all such messages with IFLA_WIRELESS without notice.
547 if (a
[IFLA_WIRELESS
])
550 log(L_ERR
"KIF: Malformed message received");
554 name
= RTA_DATA(a
[IFLA_IFNAME
]);
555 mtu
= rta_get_u32(a
[IFLA_MTU
]);
557 ifi
= if_find_by_index(i
->ifi_index
);
560 DBG("KIF: IF%d(%s) goes down\n", i
->ifi_index
, name
);
568 DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i
->ifi_index
, name
, mtu
, i
->ifi_flags
);
569 if (ifi
&& strncmp(ifi
->name
, name
, sizeof(ifi
->name
)-1))
572 strncpy(f
.name
, name
, sizeof(f
.name
)-1);
573 f
.index
= i
->ifi_index
;
578 f
.flags
|= IF_ADMIN_UP
;
579 if (fl
& IFF_LOWER_UP
)
580 f
.flags
|= IF_LINK_UP
;
581 if (fl
& IFF_LOOPBACK
) /* Loopback */
582 f
.flags
|= IF_MULTIACCESS
| IF_LOOPBACK
| IF_IGNORE
;
583 else if (fl
& IFF_POINTOPOINT
) /* PtP */
584 f
.flags
|= IF_MULTICAST
;
585 else if (fl
& IFF_BROADCAST
) /* Broadcast */
586 f
.flags
|= IF_MULTIACCESS
| IF_BROADCAST
| IF_MULTICAST
;
588 f
.flags
|= IF_MULTIACCESS
; /* NBMA */
590 if (fl
& IFF_MULTICAST
)
591 f
.flags
|= IF_MULTICAST
;
596 if_end_partial_update(ifi
);
601 nl_parse_addr(struct nlmsghdr
*h
, int scan
)
604 struct rtattr
*a
[BIRD_IFA_MAX
];
605 int new = h
->nlmsg_type
== RTM_NEWADDR
;
610 if (!(i
= nl_checkin(h
, sizeof(*i
))))
613 switch (i
->ifa_family
)
617 if (!nl_parse_attrs(IFA_RTA(i
), ifa_attr_want4
, a
, sizeof(a
)))
621 log(L_ERR
"KIF: Malformed message received (missing IFA_LOCAL)");
627 if (!nl_parse_attrs(IFA_RTA(i
), ifa_attr_want6
, a
, sizeof(a
)))
637 log(L_ERR
"KIF: Malformed message received (missing IFA_ADDRESS)");
641 ifi
= if_find_by_index(i
->ifa_index
);
644 log(L_ERR
"KIF: Received address message for unknown interface %d", i
->ifa_index
);
648 bzero(&ifa
, sizeof(ifa
));
650 if (i
->ifa_flags
& IFA_F_SECONDARY
)
651 ifa
.flags
|= IA_SECONDARY
;
653 /* IFA_LOCAL can be unset for IPv6 interfaces */
654 memcpy(&ifa
.ip
, RTA_DATA(a
[IFA_LOCAL
] ? : a
[IFA_ADDRESS
]), sizeof(ifa
.ip
));
656 ifa
.pxlen
= i
->ifa_prefixlen
;
657 if (i
->ifa_prefixlen
> BITS_PER_IP_ADDRESS
)
659 log(L_ERR
"KIF: Invalid prefix length for interface %s: %d", ifi
->name
, i
->ifa_prefixlen
);
662 if (i
->ifa_prefixlen
== BITS_PER_IP_ADDRESS
)
665 memcpy(&addr
, RTA_DATA(a
[IFA_ADDRESS
]), sizeof(addr
));
667 ifa
.prefix
= ifa
.brd
= addr
;
669 /* It is either a host address or a peer address */
670 if (ipa_equal(ifa
.ip
, addr
))
671 ifa
.flags
|= IA_HOST
;
674 ifa
.flags
|= IA_PEER
;
680 ip_addr netmask
= ipa_mkmask(ifa
.pxlen
);
681 ifa
.prefix
= ipa_and(ifa
.ip
, netmask
);
682 ifa
.brd
= ipa_or(ifa
.ip
, ipa_not(netmask
));
683 if (i
->ifa_prefixlen
== BITS_PER_IP_ADDRESS
- 1)
684 ifa
.opposite
= ipa_opposite_m1(ifa
.ip
);
687 if (i
->ifa_prefixlen
== BITS_PER_IP_ADDRESS
- 2)
688 ifa
.opposite
= ipa_opposite_m2(ifa
.ip
);
690 if ((ifi
->flags
& IF_BROADCAST
) && a
[IFA_BROADCAST
])
693 memcpy(&xbrd
, RTA_DATA(a
[IFA_BROADCAST
]), sizeof(xbrd
));
695 if (ipa_equal(xbrd
, ifa
.prefix
) || ipa_equal(xbrd
, ifa
.brd
))
697 else if (ifi
->flags
& IF_TMP_DOWN
) /* Complain only during the first scan */
698 log(L_ERR
"KIF: Invalid broadcast address %I for %s", xbrd
, ifi
->name
);
703 scope
= ipa_classify(ifa
.ip
);
706 log(L_ERR
"KIF: Invalid interface address %I for %s", ifa
.ip
, ifi
->name
);
709 ifa
.scope
= scope
& IADDR_SCOPE_MASK
;
711 DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %I/%d, brd %I, opp %I\n",
712 ifi
->index
, ifi
->name
,
713 new ? "added" : "removed",
714 ifa
.ip
, ifa
.flags
, ifa
.prefix
, ifa
.pxlen
, ifa
.brd
, ifa
.opposite
);
722 if_end_partial_update(ifi
);
726 kif_do_scan(struct kif_proto
*p UNUSED
)
732 nl_request_dump(AF_UNSPEC
, RTM_GETLINK
);
733 while (h
= nl_get_scan())
734 if (h
->nlmsg_type
== RTM_NEWLINK
|| h
->nlmsg_type
== RTM_DELLINK
)
737 log(L_DEBUG
"nl_scan_ifaces: Unknown packet received (type=%d)", h
->nlmsg_type
);
739 nl_request_dump(BIRD_AF
, RTM_GETADDR
);
740 while (h
= nl_get_scan())
741 if (h
->nlmsg_type
== RTM_NEWADDR
|| h
->nlmsg_type
== RTM_DELADDR
)
744 log(L_DEBUG
"nl_scan_ifaces: Unknown packet received (type=%d)", h
->nlmsg_type
);
754 krt_table_id(struct krt_proto
*p
)
756 return KRT_CF
->sys
.table_id
;
759 static HASH(struct krt_proto
) nl_table_map
;
761 #define RTH_FN(k) u32_hash(k)
762 #define RTH_EQ(k1,k2) k1 == k2
763 #define RTH_KEY(p) krt_table_id(p)
764 #define RTH_NEXT(p) p->sys.hash_next
766 #define RTH_REHASH rth_rehash
767 #define RTH_PARAMS /8, *2, 2, 2, 6, 20
769 HASH_DEFINE_REHASH_FN(RTH
, struct krt_proto
)
776 if (a
->cast
!= RTC_UNICAST
)
783 if (a
->iface
== NULL
)
786 case RTD_UNREACHABLE
:
797 nh_bufsize(struct mpnh
*nh
)
800 for (; nh
!= NULL
; nh
= nh
->next
)
801 rv
+= RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr
)));
806 nl_send_route(struct krt_proto
*p
, rte
*e
, struct ea_list
*eattrs
, int new)
814 char buf
[128 + KRT_METRICS_MAX
*8 + nh_bufsize(a
->nexthops
)];
817 DBG("nl_send_route(%I/%d,new=%d)\n", net
->n
.prefix
, net
->n
.pxlen
, new);
819 bzero(&r
.h
, sizeof(r
.h
));
820 bzero(&r
.r
, sizeof(r
.r
));
821 r
.h
.nlmsg_type
= new ? RTM_NEWROUTE
: RTM_DELROUTE
;
822 r
.h
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
823 r
.h
.nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
| (new ? NLM_F_CREATE
|NLM_F_EXCL
: 0);
825 r
.r
.rtm_family
= BIRD_AF
;
826 r
.r
.rtm_dst_len
= net
->n
.pxlen
;
827 r
.r
.rtm_protocol
= RTPROT_BIRD
;
828 r
.r
.rtm_scope
= RT_SCOPE_UNIVERSE
;
829 nl_add_attr_ipa(&r
.h
, sizeof(r
), RTA_DST
, net
->n
.prefix
);
831 if (krt_table_id(p
) < 256)
832 r
.r
.rtm_table
= krt_table_id(p
);
834 nl_add_attr_u32(&r
.h
, sizeof(r
), RTA_TABLE
, krt_table_id(p
));
836 /* For route delete, we do not specify route attributes */
838 return nl_exchange(&r
.h
);
841 if (ea
= ea_find(eattrs
, EA_KRT_METRIC
))
842 nl_add_attr_u32(&r
.h
, sizeof(r
), RTA_PRIORITY
, ea
->u
.data
);
844 if (ea
= ea_find(eattrs
, EA_KRT_PREFSRC
))
845 nl_add_attr_ipa(&r
.h
, sizeof(r
), RTA_PREFSRC
, *(ip_addr
*)ea
->u
.ptr
->data
);
847 if (ea
= ea_find(eattrs
, EA_KRT_REALM
))
848 nl_add_attr_u32(&r
.h
, sizeof(r
), RTA_FLOW
, ea
->u
.data
);
851 u32 metrics
[KRT_METRICS_MAX
];
854 struct ea_walk_state ews
= { .eattrs
= eattrs
};
855 while (ea
= ea_walk(&ews
, EA_KRT_METRICS
, KRT_METRICS_MAX
))
857 int id
= ea
->id
- EA_KRT_METRICS
;
858 metrics
[0] |= 1 << id
;
859 metrics
[id
] = ea
->u
.data
;
863 nl_add_metrics(&r
.h
, sizeof(r
), metrics
, KRT_METRICS_MAX
);
866 /* a->iface != NULL checked in krt_capable() for router and device routes */
871 r
.r
.rtm_type
= RTN_UNICAST
;
872 nl_add_attr_u32(&r
.h
, sizeof(r
), RTA_OIF
, a
->iface
->index
);
873 nl_add_attr_ipa(&r
.h
, sizeof(r
), RTA_GATEWAY
, a
->gw
);
876 r
.r
.rtm_type
= RTN_UNICAST
;
877 nl_add_attr_u32(&r
.h
, sizeof(r
), RTA_OIF
, a
->iface
->index
);
880 r
.r
.rtm_type
= RTN_BLACKHOLE
;
882 case RTD_UNREACHABLE
:
883 r
.r
.rtm_type
= RTN_UNREACHABLE
;
886 r
.r
.rtm_type
= RTN_PROHIBIT
;
889 r
.r
.rtm_type
= RTN_UNICAST
;
890 nl_add_multipath(&r
.h
, sizeof(r
), a
->nexthops
);
893 bug("krt_capable inconsistent with nl_send_route");
896 return nl_exchange(&r
.h
);
900 krt_replace_rte(struct krt_proto
*p
, net
*n
, rte
*new, rte
*old
, struct ea_list
*eattrs
)
905 * NULL for eattr of the old route is a little hack, but we don't
906 * get proper eattrs for old in rt_notify() anyway. NULL means no
907 * extended route attributes and therefore matches if the kernel
908 * route has any of them.
912 nl_send_route(p
, old
, NULL
, 0);
915 err
= nl_send_route(p
, new, eattrs
, 1);
918 n
->n
.flags
|= KRF_SYNC_ERROR
;
920 n
->n
.flags
&= ~KRF_SYNC_ERROR
;
924 #define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
927 nl_parse_route(struct nlmsghdr
*h
, int scan
)
931 struct rtattr
*a
[BIRD_RTA_MAX
];
932 int new = h
->nlmsg_type
== RTM_NEWROUTE
;
934 ip_addr dst
= IPA_NONE
;
939 if (!(i
= nl_checkin(h
, sizeof(*i
))))
942 switch (i
->rtm_family
)
946 if (!nl_parse_attrs(RTM_RTA(i
), rtm_attr_want4
, a
, sizeof(a
)))
951 if (!nl_parse_attrs(RTM_RTA(i
), rtm_attr_want6
, a
, sizeof(a
)))
962 memcpy(&dst
, RTA_DATA(a
[RTA_DST
]), sizeof(dst
));
967 oif
= rta_get_u32(a
[RTA_OIF
]);
970 table
= rta_get_u32(a
[RTA_TABLE
]);
972 table
= i
->rtm_table
;
974 p
= HASH_FIND(nl_table_map
, RTH
, table
); /* Do we know this table? */
975 DBG("KRT: Got %I/%d, type=%d, oif=%d, table=%d, prid=%d, proto=%s\n", dst
, i
->rtm_dst_len
, i
->rtm_type
, oif
, table
, i
->rtm_protocol
, p
? p
->p
.name
: "(none)");
977 SKIP("unknown table %d\n", table
);
984 if (i
->rtm_tos
!= 0) /* We don't support TOS */
985 SKIP("TOS %02x\n", i
->rtm_tos
);
989 SKIP("RTM_DELROUTE in scan\n");
991 int c
= ipa_classify_net(dst
);
992 if ((c
< 0) || !(c
& IADDR_HOST
) || ((c
& IADDR_SCOPE_MASK
) <= SCOPE_LINK
))
993 SKIP("strange class/scope\n");
995 // ignore rtm_scope, it is not a real scope
996 // if (i->rtm_scope != RT_SCOPE_UNIVERSE)
997 // SKIP("scope %u\n", i->rtm_scope);
999 switch (i
->rtm_protocol
)
1002 SKIP("proto unspec\n");
1004 case RTPROT_REDIRECT
:
1005 src
= KRT_SRC_REDIRECT
;
1009 src
= KRT_SRC_KERNEL
;
1020 src
= KRT_SRC_ALIEN
;
1023 net
*net
= net_get(p
->p
.table
, dst
, i
->rtm_dst_len
);
1026 .src
= p
->p
.main_source
,
1027 .source
= RTS_INHERIT
,
1028 .scope
= SCOPE_UNIVERSE
,
1032 switch (i
->rtm_type
)
1036 if (a
[RTA_MULTIPATH
] && (i
->rtm_family
== AF_INET
))
1038 ra
.dest
= RTD_MULTIPATH
;
1039 ra
.nexthops
= nl_parse_multipath(p
, a
[RTA_MULTIPATH
]);
1042 log(L_ERR
"KRT: Received strange multipath route %I/%d",
1043 net
->n
.prefix
, net
->n
.pxlen
);
1050 ra
.iface
= if_find_by_index(oif
);
1053 log(L_ERR
"KRT: Received route %I/%d with unknown ifindex %u",
1054 net
->n
.prefix
, net
->n
.pxlen
, oif
);
1061 ra
.dest
= RTD_ROUTER
;
1062 memcpy(&ra
.gw
, RTA_DATA(a
[RTA_GATEWAY
]), sizeof(ra
.gw
));
1066 /* Silently skip strange 6to4 routes */
1067 if (ipa_in_net(ra
.gw
, IPA_NONE
, 96))
1071 ng
= neigh_find2(&p
->p
, &ra
.gw
, ra
.iface
,
1072 (i
->rtm_flags
& RTNH_F_ONLINK
) ? NEF_ONLINK
: 0);
1073 if (!ng
|| (ng
->scope
== SCOPE_HOST
))
1075 log(L_ERR
"KRT: Received route %I/%d with strange next-hop %I",
1076 net
->n
.prefix
, net
->n
.pxlen
, ra
.gw
);
1082 ra
.dest
= RTD_DEVICE
;
1087 ra
.dest
= RTD_BLACKHOLE
;
1089 case RTN_UNREACHABLE
:
1090 ra
.dest
= RTD_UNREACHABLE
;
1093 ra
.dest
= RTD_PROHIBIT
;
1095 /* FIXME: What about RTN_THROW? */
1097 SKIP("type %d\n", i
->rtm_type
);
1101 rte
*e
= rte_get_temp(&ra
);
1104 e
->u
.krt
.proto
= i
->rtm_protocol
;
1107 e
->u
.krt
.metric
= 0;
1109 if (a
[RTA_PRIORITY
])
1110 e
->u
.krt
.metric
= rta_get_u32(a
[RTA_PRIORITY
]);
1115 memcpy(&ps
, RTA_DATA(a
[RTA_PREFSRC
]), sizeof(ps
));
1118 ea_list
*ea
= alloca(sizeof(ea_list
) + sizeof(eattr
));
1119 ea
->next
= ra
.eattrs
;
1121 ea
->flags
= EALF_SORTED
;
1123 ea
->attrs
[0].id
= EA_KRT_PREFSRC
;
1124 ea
->attrs
[0].flags
= 0;
1125 ea
->attrs
[0].type
= EAF_TYPE_IP_ADDRESS
;
1126 ea
->attrs
[0].u
.ptr
= alloca(sizeof(struct adata
) + sizeof(ps
));
1127 ea
->attrs
[0].u
.ptr
->length
= sizeof(ps
);
1128 memcpy(ea
->attrs
[0].u
.ptr
->data
, &ps
, sizeof(ps
));
1133 ea_list
*ea
= alloca(sizeof(ea_list
) + sizeof(eattr
));
1134 ea
->next
= ra
.eattrs
;
1136 ea
->flags
= EALF_SORTED
;
1138 ea
->attrs
[0].id
= EA_KRT_REALM
;
1139 ea
->attrs
[0].flags
= 0;
1140 ea
->attrs
[0].type
= EAF_TYPE_INT
;
1141 ea
->attrs
[0].u
.data
= rta_get_u32(a
[RTA_FLOW
]);
1146 u32 metrics
[KRT_METRICS_MAX
];
1147 ea_list
*ea
= alloca(sizeof(ea_list
) + KRT_METRICS_MAX
* sizeof(eattr
));
1150 if (nl_parse_metrics(a
[RTA_METRICS
], metrics
, ARRAY_SIZE(metrics
)) < 0)
1152 log(L_ERR
"KRT: Received route %I/%d with strange RTA_METRICS attribute",
1153 net
->n
.prefix
, net
->n
.pxlen
);
1157 for (t
= 1; t
< KRT_METRICS_MAX
; t
++)
1158 if (metrics
[0] & (1 << t
))
1160 ea
->attrs
[n
].id
= EA_CODE(EAP_KRT
, KRT_METRICS_OFFSET
+ t
);
1161 ea
->attrs
[n
].flags
= 0;
1162 ea
->attrs
[n
].type
= EAF_TYPE_INT
; /* FIXME: Some are EAF_TYPE_BITFIELD */
1163 ea
->attrs
[n
].u
.data
= metrics
[t
];
1169 ea
->next
= ra
.eattrs
;
1170 ea
->flags
= EALF_SORTED
;
1177 krt_got_route(p
, e
);
1179 krt_got_route_async(p
, e
, new);
1183 krt_do_scan(struct krt_proto
*p UNUSED
) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
1187 nl_request_dump(BIRD_AF
, RTM_GETROUTE
);
1188 while (h
= nl_get_scan())
1189 if (h
->nlmsg_type
== RTM_NEWROUTE
|| h
->nlmsg_type
== RTM_DELROUTE
)
1190 nl_parse_route(h
, 1);
1192 log(L_DEBUG
"nl_scan_fire: Unknown packet received (type=%d)", h
->nlmsg_type
);
1196 * Asynchronous Netlink interface
1199 static sock
*nl_async_sk
; /* BIRD socket for asynchronous notifications */
1200 static byte
*nl_async_rx_buffer
; /* Receive buffer */
1203 nl_async_msg(struct nlmsghdr
*h
)
1205 switch (h
->nlmsg_type
)
1209 DBG("KRT: Received async route notification (%d)\n", h
->nlmsg_type
);
1210 nl_parse_route(h
, 0);
1214 DBG("KRT: Received async link notification (%d)\n", h
->nlmsg_type
);
1216 nl_parse_link(h
, 0);
1220 DBG("KRT: Received async address notification (%d)\n", h
->nlmsg_type
);
1222 nl_parse_addr(h
, 0);
1225 DBG("KRT: Received unknown async notification (%d)\n", h
->nlmsg_type
);
1230 nl_async_hook(sock
*sk
, int size UNUSED
)
1232 struct iovec iov
= { nl_async_rx_buffer
, NL_RX_SIZE
};
1233 struct sockaddr_nl sa
;
1234 struct msghdr m
= { (struct sockaddr
*) &sa
, sizeof(sa
), &iov
, 1, NULL
, 0, 0 };
1239 x
= recvmsg(sk
->fd
, &m
, 0);
1242 if (errno
== ENOBUFS
)
1245 * Netlink reports some packets have been thrown away.
1246 * One day we might react to it by asking for route table
1247 * scan in near future.
1249 return 1; /* More data are likely to be ready */
1251 else if (errno
!= EWOULDBLOCK
)
1252 log(L_ERR
"Netlink recvmsg: %m");
1255 if (sa
.nl_pid
) /* It isn't from the kernel */
1257 DBG("Non-kernel packet\n");
1260 h
= (void *) nl_async_rx_buffer
;
1262 if (m
.msg_flags
& MSG_TRUNC
)
1264 log(L_WARN
"Netlink got truncated asynchronous message");
1267 while (NLMSG_OK(h
, len
))
1270 h
= NLMSG_NEXT(h
, len
);
1273 log(L_WARN
"nl_async_hook: Found packet remnant of size %d", len
);
1281 struct sockaddr_nl sa
;
1287 DBG("KRT: Opening async netlink socket\n");
1289 fd
= socket(PF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
1292 log(L_ERR
"Unable to open asynchronous rtnetlink socket: %m");
1296 bzero(&sa
, sizeof(sa
));
1297 sa
.nl_family
= AF_NETLINK
;
1299 sa
.nl_groups
= RTMGRP_LINK
| RTMGRP_IPV6_IFADDR
| RTMGRP_IPV6_ROUTE
;
1301 sa
.nl_groups
= RTMGRP_LINK
| RTMGRP_IPV4_IFADDR
| RTMGRP_IPV4_ROUTE
;
1303 if (bind(fd
, (struct sockaddr
*) &sa
, sizeof(sa
)) < 0)
1305 log(L_ERR
"Unable to bind asynchronous rtnetlink socket: %m");
1310 nl_async_rx_buffer
= xmalloc(NL_RX_SIZE
);
1312 sk
= nl_async_sk
= sk_new(krt_pool
);
1313 sk
->type
= SK_MAGIC
;
1314 sk
->rx_hook
= nl_async_hook
;
1316 if (sk_open(sk
) < 0)
1317 bug("Netlink: sk_open failed");
1322 * Interface to the UNIX krt module
1326 krt_sys_io_init(void)
1328 HASH_INIT(nl_table_map
, krt_pool
, 6);
1332 krt_sys_start(struct krt_proto
*p
)
1334 struct krt_proto
*old
= HASH_FIND(nl_table_map
, RTH
, krt_table_id(p
));
1338 log(L_ERR
"%s: Kernel table %u already registered by %s",
1339 p
->p
.name
, krt_table_id(p
), old
->p
.name
);
1343 HASH_INSERT2(nl_table_map
, RTH
, krt_pool
, p
);
1352 krt_sys_shutdown(struct krt_proto
*p
)
1354 HASH_REMOVE2(nl_table_map
, RTH
, krt_pool
, p
);
1358 krt_sys_reconfigure(struct krt_proto
*p UNUSED
, struct krt_config
*n
, struct krt_config
*o
)
1360 return n
->sys
.table_id
== o
->sys
.table_id
;
1364 krt_sys_init_config(struct krt_config
*cf
)
1366 cf
->sys
.table_id
= RT_TABLE_MAIN
;
1370 krt_sys_copy_config(struct krt_config
*d
, struct krt_config
*s
)
1372 d
->sys
.table_id
= s
->sys
.table_id
;
1375 static const char *krt_metrics_names
[KRT_METRICS_MAX
] = {
1376 NULL
, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
1377 "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
1380 static const char *krt_features_names
[KRT_FEATURES_MAX
] = {
1381 "ecn", NULL
, NULL
, "allfrag"
1385 krt_sys_get_attr(eattr
*a
, byte
*buf
, int buflen UNUSED
)
1389 case EA_KRT_PREFSRC
:
1390 bsprintf(buf
, "prefsrc");
1394 bsprintf(buf
, "realm");
1398 buf
+= bsprintf(buf
, "lock:");
1399 ea_format_bitfield(a
, buf
, buflen
, krt_metrics_names
, 2, KRT_METRICS_MAX
);
1402 case EA_KRT_FEATURES
:
1403 buf
+= bsprintf(buf
, "features:");
1404 ea_format_bitfield(a
, buf
, buflen
, krt_features_names
, 0, KRT_FEATURES_MAX
);
1408 int id
= (int)EA_ID(a
->id
) - KRT_METRICS_OFFSET
;
1409 if (id
> 0 && id
< KRT_METRICS_MAX
)
1411 bsprintf(buf
, "%s", krt_metrics_names
[id
]);
1422 kif_sys_start(struct kif_proto
*p UNUSED
)
1429 kif_sys_shutdown(struct kif_proto
*p UNUSED
)