From: Ondrej Zajicek (work) Date: Tue, 8 Nov 2016 16:03:31 +0000 (+0100) Subject: Merge tag 'v1.6.2' into int-new X-Git-Tag: v2.0.0-pre0~30 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cc5b93f72db80abd1262a0a5e1d8400ceef54385;p=thirdparty%2Fbird.git Merge tag 'v1.6.2' into int-new --- cc5b93f72db80abd1262a0a5e1d8400ceef54385 diff --cc filter/filter.c index 7b3e550ff,2f5f00d81..3282bd50f --- a/filter/filter.c +++ b/filter/filter.c @@@ -1415,8 -1471,8 +1440,9 @@@ i_same(struct f_inst *f1, struct f_ins case P('A','p'): TWOARGS; break; case P('C','a'): TWOARGS; break; case P('a','f'): - case P('a','l'): ONEARG; break; + case P('a','l'): + case P('a','L'): ONEARG; break; +#if 0 case P('R','C'): TWOARGS; /* Does not really make sense - ROA check resuls may change anyway */ diff --cc lib/printf.c index 844f59694,4fd75c9b3..1632b5f3f --- a/lib/printf.c +++ b/lib/printf.c @@@ -169,8 -169,8 +169,8 @@@ int bvsnprintf(char *buf, int size, con case ' ': flags |= SPACE; goto repeat; case '#': flags |= SPECIAL; goto repeat; case '0': flags |= ZEROPAD; goto repeat; - } + } - + /* get field width */ field_width = -1; if (is_digit(*fmt)) diff --cc nest/route.h index b5885ee3f,2fcb189af..a536def7b --- a/nest/route.h +++ b/nest/route.h @@@ -277,20 -268,17 +277,20 @@@ void rt_commit(struct config *new, stru void rt_lock_table(rtable *); void rt_unlock_table(rtable *); void rt_setup(pool *, rtable *, char *, struct rtable_config *); -static inline net *net_find(rtable *tab, ip_addr addr, unsigned len) { return (net *) fib_find(&tab->fib, &addr, len); } -static inline net *net_get(rtable *tab, ip_addr addr, unsigned len) { return (net *) fib_get(&tab->fib, &addr, len); } +static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); } +static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); } +void *net_route(rtable *tab, const net_addr *n); +int net_roa_check(rtable *tab, const net_addr *n, u32 asn); rte *rte_find(net *net, struct rte_src *src); rte *rte_get_temp(struct rta *); -void rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src); -static inline void rte_update(struct proto *p, net *net, rte *new) { rte_update2(p->main_ahook, net, new, p->main_source); } +void rte_update2(struct channel *c, net_addr *n, rte *new, struct rte_src *src); +/* rte_update() moved to protocol.h to avoid dependency conflicts */ void rte_discard(rtable *tab, rte *old); -int rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter); -rte *rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, struct ea_list **tmpa, linpool *pool, int silent); -void rt_refresh_begin(rtable *t, struct announce_hook *ah); -void rt_refresh_end(rtable *t, struct announce_hook *ah); +int rt_examine(rtable *t, net_addr *a, struct proto *p, struct filter *filter); - rte *rt_export_merged(struct channel *c, net *net, rte **rt_free, struct ea_list **tmpa, int silent); ++rte *rt_export_merged(struct channel *c, net *net, rte **rt_free, struct ea_list **tmpa, linpool *pool, int silent); +void rt_refresh_begin(rtable *t, struct channel *c); +void rt_refresh_end(rtable *t, struct channel *c); +void rt_schedule_prune(rtable *t); void rte_dump(rte *); void rte_free(rte *); rte *rte_do_cow(rte *); diff --cc nest/rt-table.c index 9e9d4c7a9,d3aba085c..eb9dc3a50 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@@ -346,10 -227,7 +346,7 @@@ rte_mergable(rte *pri, rte *sec static void rte_trace(struct proto *p, rte *e, int dir, char *msg) { - byte via[IPA_MAX_TEXT_LENGTH+32]; - - rt_format_via(e, via); - log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, via); - log(L_TRACE "%s %c %s %I/%d %s", p->name, dir, msg, e->net->n.prefix, e->net->n.pxlen, rt_format_via(e)); ++ log(L_TRACE "%s %c %s %N %s", p->name, dir, msg, e->net->n.addr, rt_format_via(e)); } static inline void @@@ -367,11 -245,11 +364,11 @@@ rte_trace_out(uint flag, struct proto * } static rte * - export_filter(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, int silent) -export_filter_(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa, linpool *pool, int silent) ++export_filter_(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, linpool *pool, int silent) { - struct proto *p = ah->proto; - struct filter *filter = ah->out_filter; - struct proto_stats *stats = ah->stats; + struct proto *p = c->proto; + struct filter *filter = c->out_filter; + struct proto_stats *stats = &c->stats; ea_list *tmpb = NULL; rte *rt; int v; @@@ -426,11 -304,17 +423,17 @@@ return NULL; } + static inline rte * -export_filter(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa, int silent) ++export_filter(struct channel *c, rte *rt0, rte **rt_free, ea_list **tmpa, int silent) + { - return export_filter_(ah, rt0, rt_free, tmpa, rte_update_pool, silent); ++ return export_filter_(c, rt0, rt_free, tmpa, rte_update_pool, silent); + } + static void -do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tmpa, int refeed) +do_rt_notify(struct channel *c, net *net, rte *new, rte *old, ea_list *tmpa, int refeed) { - struct proto *p = ah->proto; - struct proto_stats *stats = ah->stats; + struct proto *p = c->proto; + struct proto_stats *stats = &c->stats; /* @@@ -714,9 -600,9 +717,9 @@@ mpnh_merge_rta(struct mpnh *nhs, rta *a } rte * - rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, int silent) -rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tmpa, linpool *pool, int silent) ++rt_export_merged(struct channel *c, net *net, rte **rt_free, ea_list **tmpa, linpool *pool, int silent) { - // struct proto *p = ah->proto; + // struct proto *p = c->proto; struct mpnh *nhs = NULL; rte *best0, *best, *rt0, *rt, *tmp; @@@ -726,7 -612,7 +729,7 @@@ if (!rte_is_valid(best0)) return NULL; - best = export_filter(c, best0, rt_free, tmpa, silent); - best = export_filter_(ah, best0, rt_free, tmpa, pool, silent); ++ best = export_filter_(c, best0, rt_free, tmpa, pool, silent); if (!best || !rte_is_reachable(best)) return best; @@@ -736,13 -622,13 +739,13 @@@ if (!rte_mergable(best0, rt0)) continue; - rt = export_filter(c, rt0, &tmp, NULL, 1); - rt = export_filter_(ah, rt0, &tmp, NULL, pool, 1); ++ rt = export_filter_(c, rt0, &tmp, NULL, pool, 1); if (!rt) continue; if (rte_is_reachable(rt)) - nhs = mpnh_merge_rta(nhs, rt->attrs, c->merge_limit); - nhs = mpnh_merge_rta(nhs, rt->attrs, pool, ah->proto->merge_limit); ++ nhs = mpnh_merge_rta(nhs, rt->attrs, pool, c->merge_limit); if (tmp) rte_free(tmp); @@@ -750,7 -636,7 +753,7 @@@ if (nhs) { - nhs = mpnh_merge_rta(nhs, best->attrs, c->merge_limit); - nhs = mpnh_merge_rta(nhs, best->attrs, pool, ah->proto->merge_limit); ++ nhs = mpnh_merge_rta(nhs, best->attrs, pool, c->merge_limit); if (nhs->next) { @@@ -805,7 -691,7 +808,7 @@@ rt_notify_merged(struct channel *c, ne /* Prepare new merged route */ if (new_best) - new_best = rt_export_merged(c, net, &new_best_free, &tmpa, 0); - new_best = rt_export_merged(ah, net, &new_best_free, &tmpa, rte_update_pool, 0); ++ new_best = rt_export_merged(c, net, &new_best_free, &tmpa, rte_update_pool, 0); /* Prepare old merged route (without proper merged next hops) */ /* There are some issues with running filter on old route - see rt_notify_basic() */ @@@ -903,22 -795,28 +906,29 @@@ rte_validate(rte *e int c; net *n = e->net; - if ((n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen)) - { - log(L_WARN "Ignoring bogus prefix %I/%d received via %s", - n->n.prefix, n->n.pxlen, e->sender->proto->name); - return 0; - } + // (n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen)) + if (!net_validate(n->n.addr)) + { + log(L_WARN "Ignoring bogus prefix %N received via %s", + n->n.addr, e->sender->proto->name); + return 0; + } - c = ipa_classify_net(n->n.prefix); + c = net_classify(n->n.addr); if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) - { - log(L_WARN "Ignoring bogus route %I/%d received via %s", - n->n.prefix, n->n.pxlen, e->sender->proto->name); - return 0; - } + { + log(L_WARN "Ignoring bogus route %N received via %s", + n->n.addr, e->sender->proto->name); + return 0; + } + if ((e->attrs->dest == RTD_MULTIPATH) && !mpnh_is_sorted(e->attrs->nexthops)) + { - log(L_WARN "Ignoring unsorted multipath route %I/%d received via %s", - n->n.prefix, n->n.pxlen, e->sender->proto->name); ++ log(L_WARN "Ignoring unsorted multipath route %N received via %s", ++ n->n.addr, e->sender->proto->name); + return 0; + } + return 1; } @@@ -2431,6 -2379,9 +2441,9 @@@ rt_format_via(rte *e { rta *a = e->attrs; + /* Max text length w/o IP addr and interface name is 16 */ - static byte via[STD_ADDRESS_P_LENGTH+sizeof(a->iface->name)+16]; ++ static byte via[IPA_MAX_TEXT_LENGTH+sizeof(a->iface->name)+16]; + switch (a->dest) { case RTD_ROUTER: bsprintf(via, "via %I on %s", a->gw, a->iface->name); break; @@@ -2446,8 -2398,7 +2460,7 @@@ static void rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tmpa) { - byte via[IPA_MAX_TEXT_LENGTH+32]; - byte from[STD_ADDRESS_P_LENGTH+8]; + byte from[IPA_MAX_TEXT_LENGTH+8]; byte tm[TM_DATETIME_BUFFER_SIZE], info[256]; rta *a = e->attrs; int primary = (e->net->routes == e); @@@ -2514,10 -2473,10 +2526,10 @@@ rt_show_net(struct cli *c, net *n, stru tmpa = make_tmp_attrs(e, rte_update_pool); /* Special case for merged export */ - if ((d->export_mode == RSEM_EXPORT) && (d->export_protocol->accept_ra_types == RA_MERGED)) + if ((d->export_mode == RSEM_EXPORT) && (ec->ra_mode == RA_MERGED)) { rte *rt_free; - e = rt_export_merged(ec, n, &rt_free, &tmpa, 1); - e = rt_export_merged(a, n, &rt_free, &tmpa, rte_update_pool, 1); ++ e = rt_export_merged(ec, n, &rt_free, &tmpa, rte_update_pool, 1); pass = 1; if (!e) diff --cc proto/static/static.c index 28cb1e771,d54302a89..a8abdef0a --- a/proto/static/static.c +++ b/proto/static/static.c @@@ -88,10 -88,8 +87,8 @@@ static_install(struct proto *p, struct struct mpnh *nh = alloca(sizeof(struct mpnh)); nh->gw = r2->via; nh->iface = r2->neigh->iface; - nh->weight = r2->masklen; /* really */ + nh->weight = r2->weight; - nh->next = NULL; - *nhp = nh; - nhp = &(nh->next); + mpnh_insert(&nhs, nh); } /* There is at least one nexthop */ diff --cc sysdep/linux/netlink.c index 8146072b7,79dd14050..7af575a7a --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@@ -20,10 -19,9 +20,9 @@@ #include "nest/route.h" #include "nest/protocol.h" #include "nest/iface.h" - #include "lib/alloca.h" -#include "lib/timer.h" -#include "lib/unix.h" -#include "lib/krt.h" +#include "sysdep/unix/timer.h" +#include "sysdep/unix/unix.h" +#include "sysdep/unix/krt.h" #include "lib/socket.h" #include "lib/string.h" #include "lib/hash.h" @@@ -48,6 -50,49 +51,45 @@@ #endif -#ifdef IPV6 -#define krt_ecmp6(X) 1 -#else -#define krt_ecmp6(X) 0 -#endif ++#define krt_ecmp6(p) ((p)->af == AF_INET6) + + /* + * Structure nl_parse_state keeps state of received route processing. Ideally, + * we could just independently parse received Netlink messages and immediately + * propagate received routes to the rest of BIRD, but Linux kernel represents + * and announces IPv6 ECMP routes not as one route with multiple next hops (like + * RTA_MULTIPATH in IPv4 ECMP), but as a set of routes with the same prefix. + * + * Therefore, BIRD keeps currently processed route in nl_parse_state structure + * and postpones its propagation until we expect it to be final; i.e., when + * non-matching route is received or when the scan ends. When another matching + * route is received, it is merged with the already processed route to form an + * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the + * postponing is done in both cases (for simplicity). All IPv4 routes are just + * considered non-matching. + * + * This is ignored for asynchronous notifications (every notification is handled + * as a separate route). It is not an issue for our routes, as we ignore such + * notifications anyways. But importing alien IPv6 ECMP routes does not work + * properly. + */ + + struct nl_parse_state + { + struct linpool *pool; + int scan; + int merge; + + net *net; + rta *attrs; + struct krt_proto *proto; + s8 new; + s8 krt_src; + u8 krt_type; + u8 krt_proto; + u32 krt_metric; + }; + /* * Synchronous Netlink interface */ @@@ -248,18 -300,22 +297,20 @@@ static struct nl_want_attrs ifla_attr_w }; - #define BIRD_IFA_MAX (IFA_ANYCAST+1) + #define BIRD_IFA_MAX (IFA_FLAGS+1) -#ifndef IPV6 static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = { [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) }, [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) }, [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) }, + [IFA_FLAGS] = { 1, 1, sizeof(u32) }, }; -#else + static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = { [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) }, [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) }, + [IFA_FLAGS] = { 1, 1, sizeof(u32) }, }; -#endif #define BIRD_RTA_MAX (RTA_TABLE+1) @@@ -623,124 -658,52 +674,131 @@@ nl_parse_link(struct nlmsghdr *h, int s } static void -nl_parse_addr(struct nlmsghdr *h, int scan) +nl_parse_addr4(struct ifaddrmsg *i, int scan, int new) { - struct ifaddrmsg *i; struct rtattr *a[BIRD_IFA_MAX]; - int new = h->nlmsg_type == RTM_NEWADDR; - struct ifa ifa; struct iface *ifi; - int scope; + u32 ifa_flags; + int scope; - if (!(i = nl_checkin(h, sizeof(*i)))) + if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a))) return; - switch (i->ifa_family) + if (!a[IFA_LOCAL]) { -#ifndef IPV6 - case AF_INET: - if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a))) - return; - if (!a[IFA_LOCAL]) - { - log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)"); - return; - } - break; -#else - case AF_INET6: - if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a))) - return; - break; -#endif - default: - return; + log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)"); + return; } - if (!a[IFA_ADDRESS]) { log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)"); return; } + ifi = if_find_by_index(i->ifa_index); + if (!ifi) + { + log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index); + return; + } + + if (a[IFA_FLAGS]) + ifa_flags = rta_get_u32(a[IFA_FLAGS]); + else + ifa_flags = i->ifa_flags; + + struct ifa ifa; + bzero(&ifa, sizeof(ifa)); + ifa.iface = ifi; - if (i->ifa_flags & IFA_F_SECONDARY) ++ if (ifa_flags & IFA_F_SECONDARY) + ifa.flags |= IA_SECONDARY; + + ifa.ip = rta_get_ipa(a[IFA_LOCAL]); + + if (i->ifa_prefixlen > IP4_MAX_PREFIX_LENGTH) + { + log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen); + new = 0; + } + if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH) + { + ifa.brd = rta_get_ipa(a[IFA_ADDRESS]); + net_fill_ip4(&ifa.prefix, rta_get_ip4(a[IFA_ADDRESS]), i->ifa_prefixlen); + + /* It is either a host address or a peer address */ + if (ipa_equal(ifa.ip, ifa.brd)) + ifa.flags |= IA_HOST; + else + { + ifa.flags |= IA_PEER; + ifa.opposite = ifa.brd; + } + } + else + { + net_fill_ip4(&ifa.prefix, ipa_to_ip4(ifa.ip), i->ifa_prefixlen); + net_normalize(&ifa.prefix); + + if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 1) + ifa.opposite = ipa_opposite_m1(ifa.ip); + + if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 2) + ifa.opposite = ipa_opposite_m2(ifa.ip); + + if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST]) + { + ip4_addr xbrd = rta_get_ip4(a[IFA_BROADCAST]); + ip4_addr ybrd = ip4_or(ipa_to_ip4(ifa.ip), ip4_not(ip4_mkmask(i->ifa_prefixlen))); + + if (ip4_equal(xbrd, net4_prefix(&ifa.prefix)) || ip4_equal(xbrd, ybrd)) + ifa.brd = ipa_from_ip4(xbrd); + else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */ + { + log(L_ERR "KIF: Invalid broadcast address %I4 for %s", xbrd, ifi->name); + ifa.brd = ipa_from_ip4(ybrd); + } + } + } + + scope = ipa_classify(ifa.ip); + if (scope < 0) + { + log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name); + return; + } + ifa.scope = scope & IADDR_SCOPE_MASK; + + DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n", + ifi->index, ifi->name, + new ? "added" : "removed", + ifa.ip, ifa.flags, ifa.prefix, ifa.brd, ifa.opposite); + + if (new) + ifa_update(&ifa); + else + ifa_delete(&ifa); + + if (!scan) + if_end_partial_update(ifi); +} + +static void +nl_parse_addr6(struct ifaddrmsg *i, int scan, int new) +{ + struct rtattr *a[BIRD_IFA_MAX]; + struct iface *ifi; ++ u32 ifa_flags; + int scope; + + if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a))) + return; + + if (!a[IFA_ADDRESS]) + { + log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)"); + return; + } + ifi = if_find_by_index(i->ifa_index); if (!ifi) { @@@ -748,17 -711,22 +806,25 @@@ return; } ++ if (a[IFA_FLAGS]) ++ ifa_flags = rta_get_u32(a[IFA_FLAGS]); ++ else ++ ifa_flags = i->ifa_flags; ++ + struct ifa ifa; bzero(&ifa, sizeof(ifa)); ifa.iface = ifi; - if (i->ifa_flags & IFA_F_SECONDARY) + if (ifa_flags & IFA_F_SECONDARY) ifa.flags |= IA_SECONDARY; - /* IFA_LOCAL can be unset for IPv6 interfaces */ -#ifdef IPV6 + /* Ignore tentative addresses silently */ + if (ifa_flags & IFA_F_TENTATIVE) + return; -#endif + /* IFA_LOCAL can be unset for IPv6 interfaces */ - memcpy(&ifa.ip, RTA_DATA(a[IFA_LOCAL] ? : a[IFA_ADDRESS]), sizeof(ifa.ip)); - ipa_ntoh(ifa.ip); - ifa.pxlen = i->ifa_prefixlen; - if (i->ifa_prefixlen > BITS_PER_IP_ADDRESS) + ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]); + + if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH) { log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen); new = 0; @@@ -921,49 -880,65 +987,69 @@@ nl_send_route(struct krt_proto *p, rte eattr *ea; net *net = e->net; rta *a = e->attrs; + int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(a->nexthops); + u32 priority = 0; struct { struct nlmsghdr h; struct rtmsg r; - char buf[128 + KRT_METRICS_MAX*8 + nh_bufsize(a->nexthops)]; - } r; + char buf[0]; + } *r; + + int rsize = sizeof(*r) + bufsize; + r = alloca(rsize); - DBG("nl_send_route(%N,new=%d)\n", net->n.addr, new); - DBG("nl_send_route(%I/%d,op=%x)\n", net->n.prefix, net->n.pxlen, op); ++ DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op); - bzero(&r.h, sizeof(r.h)); - bzero(&r.r, sizeof(r.r)); - r.h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE; - r.h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); - r.h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK; + bzero(&r->h, sizeof(r->h)); + bzero(&r->r, sizeof(r->r)); - r->h.nlmsg_type = new ? RTM_NEWROUTE : RTM_DELROUTE; ++ r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE; + r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); - r->h.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | (new ? NLM_F_CREATE|NLM_F_EXCL : 0); ++ r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK; - r.r.rtm_family = BIRD_AF; - r.r.rtm_dst_len = net->n.pxlen; - r.r.rtm_protocol = RTPROT_BIRD; - r.r.rtm_scope = RT_SCOPE_NOWHERE; - nl_add_attr_ipa(&r.h, sizeof(r), RTA_DST, net->n.prefix); + r->r.rtm_family = p->af; + r->r.rtm_dst_len = net_pxlen(net->n.addr); + r->r.rtm_protocol = RTPROT_BIRD; + r->r.rtm_scope = RT_SCOPE_UNIVERSE; + nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr)); + /* + * Strange behavior for RTM_DELROUTE: + * 1) rtm_family is ignored in IPv6, works for IPv4 + * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6) + * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard + */ + if (krt_table_id(p) < 256) - r.r.rtm_table = krt_table_id(p); + r->r.rtm_table = krt_table_id(p); else - nl_add_attr_u32(&r.h, sizeof(r), RTA_TABLE, krt_table_id(p)); + nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p)); - /* For route delete, we do not specify route attributes */ - if (!new) - return nl_exchange(&r->h); + if (a->source == RTS_DUMMY) + priority = e->u.krt.metric; + else if (KRT_CF->sys.metric) + priority = KRT_CF->sys.metric; + else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC))) + priority = ea->u.data; + if (priority) - nl_add_attr_u32(&r.h, sizeof(r), RTA_PRIORITY, priority); ++ nl_add_attr_u32(&r->h, sizeof(r), RTA_PRIORITY, priority); - if (ea = ea_find(eattrs, EA_KRT_METRIC)) - nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, ea->u.data); + /* For route delete, we do not specify remaining route attributes */ + if (op == NL_OP_DELETE) + goto dest; + + /* Default scope is LINK for device routes, UNIVERSE otherwise */ + if (ea = ea_find(eattrs, EA_KRT_SCOPE)) - r.r.rtm_scope = ea->u.data; ++ r->r.rtm_scope = ea->u.data; + else - r.r.rtm_scope = (dest == RTD_DEVICE) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; ++ r->r.rtm_scope = (dest == RTD_DEVICE) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; if (ea = ea_find(eattrs, EA_KRT_PREFSRC)) - nl_add_attr_ipa(&r.h, sizeof(r), RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data); + nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data); if (ea = ea_find(eattrs, EA_KRT_REALM)) - nl_add_attr_u32(&r.h, sizeof(r), RTA_FLOW, ea->u.data); + nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data); u32 metrics[KRT_METRICS_MAX]; @@@ -978,40 -953,79 +1064,79 @@@ } if (metrics[0]) - nl_add_metrics(&r.h, sizeof(r), metrics, KRT_METRICS_MAX); + nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX); + dest: /* a->iface != NULL checked in krt_capable() for router and device routes */ - - switch (a->dest) + switch (dest) { case RTD_ROUTER: - r.r.rtm_type = RTN_UNICAST; - nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, iface->index); - nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, gw); + r->r.rtm_type = RTN_UNICAST; - nl_add_attr_u32(&r->h, rsize, RTA_OIF, a->iface->index); - nl_add_attr_ipa(&r->h, rsize, RTA_GATEWAY, a->gw); ++ nl_add_attr_u32(&r->h, rsize, RTA_OIF, iface->index); ++ nl_add_attr_ipa(&r->h, rsize, RTA_GATEWAY, gw); break; case RTD_DEVICE: - r.r.rtm_type = RTN_UNICAST; - nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, iface->index); + r->r.rtm_type = RTN_UNICAST; - nl_add_attr_u32(&r->h, rsize, RTA_OIF, a->iface->index); ++ nl_add_attr_u32(&r->h, rsize, RTA_OIF, iface->index); break; case RTD_BLACKHOLE: - r.r.rtm_type = RTN_BLACKHOLE; + r->r.rtm_type = RTN_BLACKHOLE; break; case RTD_UNREACHABLE: - r.r.rtm_type = RTN_UNREACHABLE; + r->r.rtm_type = RTN_UNREACHABLE; break; case RTD_PROHIBIT: - r.r.rtm_type = RTN_PROHIBIT; + r->r.rtm_type = RTN_PROHIBIT; break; case RTD_MULTIPATH: - r.r.rtm_type = RTN_UNICAST; - nl_add_multipath(&r.h, sizeof(r), a->nexthops); + r->r.rtm_type = RTN_UNICAST; + nl_add_multipath(&r->h, rsize, a->nexthops); break; + case RTD_NONE: + break; default: bug("krt_capable inconsistent with nl_send_route"); } - return nl_exchange(&r->h); + /* Ignore missing for DELETE */ - return nl_exchange(&r.h, (op == NL_OP_DELETE)); ++ return nl_exchange(&r->h, (op == NL_OP_DELETE)); + } + + static inline int + nl_add_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs) + { + rta *a = e->attrs; + int err = 0; + + if (krt_ecmp6(p) && (a->dest == RTD_MULTIPATH)) + { + struct mpnh *nh = a->nexthops; + + err = nl_send_route(p, e, eattrs, NL_OP_ADD, RTD_ROUTER, nh->gw, nh->iface); + if (err < 0) + return err; + + for (nh = nh->next; nh; nh = nh->next) + err += nl_send_route(p, e, eattrs, NL_OP_APPEND, RTD_ROUTER, nh->gw, nh->iface); + + return err; + } + + return nl_send_route(p, e, eattrs, NL_OP_ADD, a->dest, a->gw, a->iface); + } + + static inline int + nl_delete_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs) + { + int err = 0; + + /* For IPv6, we just repeatedly request DELETE until we get error */ + do + err = nl_send_route(p, e, eattrs, NL_OP_DELETE, RTD_NONE, IPA_NONE, NULL); + while (krt_ecmp6(p) && !err); + + return err; } void @@@ -1049,9 -1137,11 +1248,11 @@@ nl_parse_route(struct nl_parse_state *s struct rtattr *a[BIRD_RTA_MAX]; int new = h->nlmsg_type == RTM_NEWROUTE; - ip_addr dst = IPA_NONE; + net_addr dst; u32 oif = ~0; - u32 table; + u32 table_id; + u32 priority = 0; + u32 def_scope = RT_SCOPE_UNIVERSE; int src; if (!(i = nl_checkin(h, sizeof(*i)))) @@@ -1059,28 -1149,25 +1260,28 @@@ switch (i->rtm_family) { -#ifndef IPV6 - case AF_INET: - if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a))) - return; - break; -#else - case AF_INET6: - if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a))) - return; - break; -#endif - default: + case AF_INET: + if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a))) return; - } - if (a[RTA_DST]) - { - memcpy(&dst, RTA_DATA(a[RTA_DST]), sizeof(dst)); - ipa_ntoh(dst); + if (a[RTA_DST]) + net_fill_ip4(&dst, rta_get_ip4(a[RTA_DST]), i->rtm_dst_len); + else + net_fill_ip4(&dst, IP4_NONE, 0); + break; + - case AF_INET6: - if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a))) - return; ++ case AF_INET6: ++ if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a))) ++ return; + + if (a[RTA_DST]) + net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len); + else + net_fill_ip6(&dst, IP6_NONE, 0); + break; + + default: + return; } if (a[RTA_OIF]) @@@ -1096,17 -1183,21 +1297,19 @@@ if (!p) SKIP("unknown table %d\n", table); - -#ifdef IPV6 if (a[RTA_IIF]) SKIP("IIF set\n"); -#else + if (i->rtm_tos != 0) /* We don't support TOS */ SKIP("TOS %02x\n", i->rtm_tos); -#endif - if (scan && !new) + if (s->scan && !new) SKIP("RTM_DELROUTE in scan\n"); + if (a[RTA_PRIORITY]) + priority = rta_get_u32(a[RTA_PRIORITY]); + - int c = ipa_classify_net(dst); + int c = net_classify(&dst); if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)) SKIP("strange class/scope\n"); @@@ -1138,14 -1225,16 +1337,16 @@@ src = KRT_SRC_ALIEN; } - net *net = net_get(p->p.table, dst, i->rtm_dst_len); + net *net = net_get(p->p.main_channel->table, &dst); - rta ra = { - .src= p->p.main_source, - .source = RTS_INHERIT, - .scope = SCOPE_UNIVERSE, - .cast = RTC_UNICAST - }; + if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type)) + nl_announce_route(s); + + rta *ra = lp_allocz(s->pool, sizeof(rta)); + ra->src = p->p.main_source; + ra->source = RTS_INHERIT; + ra->scope = SCOPE_UNIVERSE; + ra->cast = RTC_UNICAST; switch (i->rtm_type) { @@@ -1153,40 -1242,45 +1354,40 @@@ if (a[RTA_MULTIPATH] && (i->rtm_family == AF_INET)) { - ra.dest = RTD_MULTIPATH; - ra.nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]); - if (!ra.nexthops) + ra->dest = RTD_MULTIPATH; + ra->nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]); + if (!ra->nexthops) { - log(L_ERR "KRT: Received strange multipath route %I/%d", - net->n.prefix, net->n.pxlen); + log(L_ERR "KRT: Received strange multipath route %N", net->n.addr); return; } break; } - ra.iface = if_find_by_index(oif); - if (!ra.iface) + ra->iface = if_find_by_index(oif); + if (!ra->iface) { - log(L_ERR "KRT: Received route %I/%d with unknown ifindex %u", - net->n.prefix, net->n.pxlen, oif); + log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif); return; } if (a[RTA_GATEWAY]) { - ra.dest = RTD_ROUTER; - ra.gw = rta_get_ipa(a[RTA_GATEWAY]); - neighbor *ng; + ra->dest = RTD_ROUTER; - memcpy(&ra->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra->gw)); - ipa_ntoh(ra->gw); ++ ra->gw = rta_get_ipa(a[RTA_GATEWAY]); -#ifdef IPV6 /* Silently skip strange 6to4 routes */ - if (ipa_in_net(ra->gw, IPA_NONE, 96)) + const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96); - if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra.gw, (net_addr *) &sit)) ++ if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->gw, (net_addr *) &sit)) return; -#endif - ng = neigh_find2(&p->p, &ra->gw, ra->iface, - (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0); - if (!ng || (ng->scope == SCOPE_HOST)) + neighbor *nbr; - nbr = neigh_find2(&p->p, &ra.gw, ra.iface, ++ nbr = neigh_find2(&p->p, &ra->gw, ra->iface, + (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0); + if (!nbr || (nbr->scope == SCOPE_HOST)) { - log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr, ra.gw); - log(L_ERR "KRT: Received route %I/%d with strange next-hop %I", - net->n.prefix, net->n.pxlen, ra->gw); ++ log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr, ra->gw); return; } } @@@ -1224,11 -1321,13 +1428,11 @@@ if (a[RTA_PREFSRC]) { - ip_addr ps; - memcpy(&ps, RTA_DATA(a[RTA_PREFSRC]), sizeof(ps)); - ipa_ntoh(ps); + ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]); - ea_list *ea = alloca(sizeof(ea_list) + sizeof(eattr)); - ea->next = ra.eattrs; - ra.eattrs = ea; + ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); + ea->next = ra->eattrs; + ra->eattrs = ea; ea->flags = EALF_SORTED; ea->count = 1; ea->attrs[0].id = EA_KRT_PREFSRC; @@@ -1293,20 -1420,18 +1524,25 @@@ voi krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */ { struct nlmsghdr *h; + struct nl_parse_state s; - nl_parse_begin(&s, 1, krt_ecmp6(p)); - - nl_request_dump(BIRD_AF, RTM_GETROUTE); ++ nl_parse_begin(&s, 1, 0); + nl_request_dump(AF_INET, RTM_GETROUTE); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) - nl_parse_route(h, 1); + nl_parse_route(&s, h); else log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type); ++ nl_parse_end(&s); ++ nl_parse_begin(&s, 1, 1); + nl_request_dump(AF_INET6, RTM_GETROUTE); + while (h = nl_get_scan()) + if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) - nl_parse_route(h, 1); ++ nl_parse_route(&s, h); + else + log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type); + nl_parse_end(&s); } /* diff --cc sysdep/unix/io.c index 5ec728af0,3ceadd982..e90964c1c --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@@ -1886,12 -1853,20 +1886,26 @@@ sk_write(sock *s } } +int sk_is_ipv4(sock *s) +{ return s->af == AF_INET; } + +int sk_is_ipv6(sock *s) +{ return s->af == AF_INET6; } + + void + sk_err(sock *s, int revents) + { + int se = 0, sse = sizeof(se); + if ((s->type != SK_MAGIC) && (revents & POLLERR)) + if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0) + { + log(L_ERR "IO: Socket error: SO_ERROR: %m"); + se = 0; + } + + s->err_hook(s, se); + } + void sk_dump_all(void) { diff --cc sysdep/unix/krt.Y index 91317d97f,6fe39fa9b..33dc4a190 --- a/sysdep/unix/krt.Y +++ b/sysdep/unix/krt.Y @@@ -41,12 -31,16 +43,17 @@@ kern_proto_start: proto_start KERNEL ; CF_ADDTO(kern_proto, kern_proto_start proto_name '{') -CF_ADDTO(kern_proto, kern_proto proto_item ';') CF_ADDTO(kern_proto, kern_proto kern_item ';') + kern_mp_limit: + /* empty */ { $$ = KRT_DEFAULT_ECMP_LIMIT; } + | LIMIT expr { $$ = $2; if (($2 <= 0) || ($2 > 255)) cf_error("Merge paths limit must be in range 1-255"); } + ; + kern_item: - PERSIST bool { THIS_KRT->persist = $2; } + proto_item + | proto_channel { this_proto->net_type = $1->net_type; } + | PERSIST bool { THIS_KRT->persist = $2; } | SCAN TIME expr { /* Scan time of 0 means scan on startup only */ THIS_KRT->scan_time = $3; @@@ -60,8 -54,13 +67,13 @@@ } | DEVICE ROUTES bool { THIS_KRT->devroutes = $3; } | GRACEFUL RESTART bool { THIS_KRT->graceful_restart = $3; } - | MERGE PATHS bool { krt_set_merge_paths(this_channel, $3, KRT_DEFAULT_ECMP_LIMIT); } - | MERGE PATHS bool LIMIT expr { krt_set_merge_paths(this_channel, $3, $5); } + | MERGE PATHS bool kern_mp_limit { - THIS_KRT->merge_paths = $3 ? $4 : 0; ++ krt_set_merge_paths(this_channel, $3, $4); + #ifndef KRT_ALLOW_MERGE_PATHS + if ($3) + cf_error("Path merging not supported on this platform"); + #endif + } ; /* Kernel interface protocol */ diff --cc sysdep/unix/krt.c index 6531bb289,ef98cb3a1..d4cb964ee --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@@ -599,12 -608,12 +599,12 @@@ krt_flush_routes(struct krt_proto *p static struct rte * krt_export_net(struct krt_proto *p, net *net, rte **rt_free, ea_list **tmpa) { - struct announce_hook *ah = p->p.main_ahook; - struct filter *filter = ah->out_filter; + struct channel *c = p->p.main_channel; + struct filter *filter = c->out_filter; rte *rt; - if (p->p.accept_ra_types == RA_MERGED) - return rt_export_merged(ah, net, rt_free, tmpa, krt_filter_lp, 1); + if (c->ra_mode == RA_MERGED) - return rt_export_merged(c, net, rt_free, tmpa, 1); ++ return rt_export_merged(c, net, rt_free, tmpa, krt_filter_lp, 1); rt = net->routes; *rt_free = NULL;